From 5e23086531635e91c5424a5a2397ffd7f9c9aef2 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sun, 6 Apr 2025 13:21:46 +0800 Subject: [PATCH 1/4] Squashed commit of the following: feat!: update bindings feat!: update architecture headers build: update CMake files & symbols feat!: updatae TriCore to 5.1.0 feat!: update SPARC to 5.1.0 feat!: update S390X to 5.1.0 feat!: update RISC-V to 5.1.0 feat!: update PPC to 5.1.0 feat!: update MIPS to 5.1.0 feat!: update M68K to 5.1.0 feat!: update i386 to 5.1.0 feat!: update ARM to 5.1.0 feat!: update TCG to 5.1.0 feat!: update FPU to 5.1.0 feat!: update QEMU core to 5.1.0 --- CMakeLists.txt | 4 + bindings/dotnet/UnicornEngine/Const/Mips.fs | 6 +- bindings/dotnet/UnicornEngine/Const/Riscv.fs | 12 +- bindings/go/unicorn/mips_const.go | 6 +- bindings/go/unicorn/riscv_const.go | 12 +- .../java/src/main/java/unicorn/MipsConst.java | 6 +- .../src/main/java/unicorn/RiscvConst.java | 12 +- bindings/pascal/unicorn/MipsConst.pas | 8 +- bindings/pascal/unicorn/RiscvConst.pas | 14 +- bindings/python/unicorn/mips_const.py | 6 +- bindings/python/unicorn/riscv_const.py | 12 +- .../lib/unicorn_engine/mips_const.rb | 6 +- .../lib/unicorn_engine/riscv_const.rb | 12 +- bindings/rust/src/riscv.rs | 6 +- bindings/zig/unicorn/mips_const.zig | 6 +- bindings/zig/unicorn/riscv_const.zig | 10 +- include/unicorn/mips.h | 2 + include/unicorn/riscv.h | 6 +- include/unicorn/unicorn.h | 12 + qemu/aarch64.h | 117 +- qemu/accel/tcg/cputlb.c | 367 +- qemu/accel/tcg/tcg-runtime-gvec.c | 144 + qemu/accel/tcg/tcg-runtime.h | 15 + qemu/accel/tcg/translate-all.c | 8 +- qemu/arm.h | 192 +- qemu/exec.c | 32 +- qemu/fpu/softfloat-specialize.inc.c | 36 +- qemu/fpu/softfloat.c | 1632 ++--- qemu/include/elf.h | 4 + qemu/include/exec/cpu-all.h | 1 + qemu/include/exec/cpu-common.h | 3 - qemu/include/exec/cpu-defs.h | 7 +- qemu/include/exec/cpu_ldst.h | 176 +- qemu/include/exec/exec-all.h | 44 + qemu/include/fpu/softfloat-helpers.h | 24 +- qemu/include/fpu/softfloat-macros.h | 16 +- qemu/include/fpu/softfloat-types.h | 39 +- qemu/include/fpu/softfloat.h | 359 +- qemu/include/hw/registerfields.h | 32 +- qemu/include/qemu/bswap.h | 2 + qemu/include/qemu/compiler.h | 2 + qemu/include/qemu/host-utils.h | 4 +- qemu/include/qemu/osdep.h | 81 +- qemu/include/tcg/tcg-op-gvec.h | 296 +- qemu/include/tcg/tcg-op.h | 13 +- qemu/include/tcg/tcg-opc.h | 21 +- qemu/include/tcg/tcg.h | 5 +- qemu/m68k.h | 97 +- qemu/mips.h | 161 +- qemu/mips64.h | 161 +- qemu/mips64el.h | 161 +- qemu/mipsel.h | 161 +- qemu/ppc.h | 360 +- qemu/ppc64.h | 360 +- qemu/riscv32.h | 1099 +++- qemu/riscv64.h | 1099 +++- qemu/s390x.h | 96 +- qemu/sparc.h | 96 +- qemu/sparc64.h | 96 +- qemu/target/arm/backup.c | 5431 +++++++++++++++++ qemu/target/arm/cpu-param.h | 2 +- qemu/target/arm/cpu-qom.h | 9 +- qemu/target/arm/cpu.c | 17 +- qemu/target/arm/cpu.h | 107 +- qemu/target/arm/cpu64.c | 19 +- qemu/target/arm/crypto_helper.c | 272 +- qemu/target/arm/decode-a32.inc.c | 4 +- qemu/target/arm/decode-neon-dp.inc.c | 2806 +++++++++ qemu/target/arm/decode-neon-ls.inc.c | 149 + qemu/target/arm/decode-neon-shared.inc.c | 271 + qemu/target/arm/decode-sve.inc.c | 4 +- qemu/target/arm/helper-a64.c | 90 +- qemu/target/arm/helper-a64.h | 16 + qemu/target/arm/helper-sve.h | 542 +- qemu/target/arm/helper.c | 743 ++- qemu/target/arm/helper.h | 160 +- qemu/target/arm/internals.h | 159 +- qemu/target/arm/m_helper.c | 9 +- qemu/target/arm/mte_helper.c | 913 +++ qemu/target/arm/neon_helper.c | 41 - qemu/target/arm/op_helper.c | 17 + qemu/target/arm/pauth_helper.c | 6 +- qemu/target/arm/sve_helper.c | 4384 +++++++------ qemu/target/arm/tlb_helper.c | 6 +- qemu/target/arm/translate-a64.c | 3283 +++++----- qemu/target/arm/translate-a64.h | 16 +- qemu/target/arm/translate-neon.inc.c | 4276 +++++++++++++ qemu/target/arm/translate-sve.c | 1510 +++-- qemu/target/arm/translate-vfp.inc.c | 15 +- qemu/target/arm/translate.c | 4913 ++++----------- qemu/target/arm/translate.h | 131 +- qemu/target/arm/vec_helper.c | 274 +- qemu/target/arm/vec_internal.h | 33 + qemu/target/arm/vfp_helper.c | 25 +- qemu/target/i386/cpu.c | 108 +- qemu/target/i386/cpu.h | 18 + qemu/target/i386/excp_helper.c | 4 +- qemu/target/i386/fpu_helper.c | 1738 +++++- qemu/target/i386/helper.h | 1 + qemu/target/i386/ops_sse.h | 90 +- qemu/target/i386/svm.h | 1 + qemu/target/i386/svm_helper.c | 7 +- qemu/target/i386/translate.c | 34 +- qemu/target/m68k/cpu.c | 6 - qemu/target/m68k/fpu_helper.c | 11 +- qemu/target/m68k/helper.c | 16 +- qemu/target/m68k/helper.h | 1 + qemu/target/m68k/softfloat.c | 151 +- qemu/target/m68k/softfloat.h | 1 - qemu/target/m68k/translate.c | 17 + qemu/target/mips/cp0_helper.c | 11 +- qemu/target/mips/cpu-param.h | 3 +- qemu/target/mips/cpu.h | 28 + qemu/target/mips/fpu_helper.c | 659 +- qemu/target/mips/helper.c | 4 + qemu/target/mips/helper.h | 73 +- qemu/target/mips/internal.h | 3 +- qemu/target/mips/mips-defs.h | 51 +- qemu/target/mips/msa_helper.c | 1408 ++++- qemu/target/mips/op_helper.c | 4 + qemu/target/mips/translate.c | 219 +- qemu/target/mips/translate_init.inc.c | 95 +- qemu/target/ppc/cpu.h | 33 +- qemu/target/ppc/dfp_helper.c | 4 +- qemu/target/ppc/excp_helper.c | 131 +- qemu/target/ppc/helper.h | 5 +- qemu/target/ppc/int_helper.c | 23 +- qemu/target/ppc/translate.c | 67 +- qemu/target/ppc/translate/fp-impl.inc.c | 732 ++- qemu/target/ppc/translate/vmx-impl.inc.c | 41 +- qemu/target/ppc/translate/vsx-impl.inc.c | 2 +- qemu/target/ppc/translate_init.inc.c | 14 +- qemu/target/riscv/cpu.c | 159 +- qemu/target/riscv/cpu.h | 81 +- qemu/target/riscv/cpu_bits.h | 15 + qemu/target/riscv/cpu_helper.c | 125 +- qemu/target/riscv/csr.c | 222 +- qemu/target/riscv/fpu_helper.c | 33 +- qemu/target/riscv/helper.h | 1074 ++++ .../riscv/insn_trans/trans_privileged.inc.c | 50 +- qemu/target/riscv/insn_trans/trans_rvd.inc.c | 2 +- qemu/target/riscv/insn_trans/trans_rvf.inc.c | 20 +- qemu/target/riscv/insn_trans/trans_rvh.inc.c | 33 + qemu/target/riscv/insn_trans/trans_rvv.inc.c | 2954 +++++++++ qemu/target/riscv/internals.h | 41 + qemu/target/riscv/op_helper.c | 31 +- qemu/target/riscv/pmp.c | 18 +- qemu/target/riscv/riscv32/decode_insn16.inc.c | 51 +- qemu/target/riscv/riscv32/decode_insn32.inc.c | 3242 +++++++++- qemu/target/riscv/riscv64/decode_insn16.inc.c | 55 +- qemu/target/riscv/riscv64/decode_insn32.inc.c | 3385 ++++++++-- qemu/target/riscv/translate.c | 26 +- qemu/target/riscv/vector_helper.c | 4913 +++++++++++++++ qemu/target/s390x/cpu_features_def.inc.h | 3 +- qemu/target/s390x/fpu_helper.c | 22 +- qemu/target/s390x/gen-features.c | 1 + qemu/target/s390x/helper.h | 4 - qemu/target/s390x/insn-data.def | 6 +- qemu/target/s390x/internal.h | 3 +- qemu/target/s390x/translate.c | 3 +- qemu/target/s390x/translate_vx.inc.c | 109 +- qemu/target/s390x/vec_fpu_helper.c | 2 +- qemu/target/s390x/vec_int_helper.c | 31 - qemu/target/sparc/fop_helper.c | 4 +- qemu/target/tricore/translate.c | 1 - qemu/tcg/README | 7 +- qemu/tcg/aarch64/tcg-target.h | 3 + qemu/tcg/aarch64/tcg-target.inc.c | 77 +- qemu/tcg/aarch64/tcg-target.opc.h | 1 + qemu/tcg/arm/tcg-target.inc.c | 2 - qemu/tcg/i386/tcg-target.h | 3 + qemu/tcg/i386/tcg-target.inc.c | 120 +- qemu/tcg/mips/tcg-target.inc.c | 2 - qemu/tcg/ppc/tcg-target.h | 3 + qemu/tcg/ppc/tcg-target.inc.c | 42 +- qemu/tcg/ppc/tcg-target.opc.h | 1 - qemu/tcg/riscv/tcg-target.inc.c | 4 - qemu/tcg/s390/tcg-target.inc.c | 2 - qemu/tcg/sparc/tcg-target.inc.c | 2 - qemu/tcg/tcg-op-gvec.c | 377 +- qemu/tcg/tcg-op-vec.c | 63 +- qemu/tcg/tcg-op.c | 16 +- qemu/tcg/tcg.c | 85 +- qemu/tricore.h | 96 +- qemu/util/guest-random.c | 1 - qemu/x86_64.h | 97 +- symbols.sh | 1579 ++++- uc.c | 1 - 188 files changed, 50201 insertions(+), 13346 deletions(-) create mode 100644 qemu/target/arm/backup.c create mode 100644 qemu/target/arm/decode-neon-dp.inc.c create mode 100644 qemu/target/arm/decode-neon-ls.inc.c create mode 100644 qemu/target/arm/decode-neon-shared.inc.c create mode 100644 qemu/target/arm/mte_helper.c create mode 100644 qemu/target/arm/translate-neon.inc.c create mode 100644 qemu/target/arm/vec_internal.h create mode 100644 qemu/target/riscv/insn_trans/trans_rvh.inc.c create mode 100644 qemu/target/riscv/insn_trans/trans_rvv.inc.c create mode 100644 qemu/target/riscv/internals.h create mode 100644 qemu/target/riscv/vector_helper.c diff --git a/CMakeLists.txt b/CMakeLists.txt index e4df966aed..81c56aa5e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -568,6 +568,7 @@ add_library(arm-softmmu STATIC qemu/target/arm/helper.c qemu/target/arm/iwmmxt_helper.c qemu/target/arm/m_helper.c + qemu/target/arm/mte_helper.c qemu/target/arm/neon_helper.c qemu/target/arm/op_helper.c qemu/target/arm/psci.c @@ -612,6 +613,7 @@ add_library(aarch64-softmmu STATIC qemu/target/arm/helper.c qemu/target/arm/iwmmxt_helper.c qemu/target/arm/m_helper.c + qemu/target/arm/mte_helper.c qemu/target/arm/neon_helper.c qemu/target/arm/op_helper.c qemu/target/arm/pauth_helper.c @@ -1019,6 +1021,7 @@ add_library(riscv32-softmmu STATIC qemu/target/riscv/pmp.c qemu/target/riscv/translate.c qemu/target/riscv/unicorn.c + qemu/target/riscv/vector_helper.c ) if(MSVC) @@ -1052,6 +1055,7 @@ add_library(riscv64-softmmu STATIC qemu/target/riscv/pmp.c qemu/target/riscv/translate.c qemu/target/riscv/unicorn.c + qemu/target/riscv/vector_helper.c ) if(MSVC) diff --git a/bindings/dotnet/UnicornEngine/Const/Mips.fs b/bindings/dotnet/UnicornEngine/Const/Mips.fs index 76d0c71494..74c4946442 100644 --- a/bindings/dotnet/UnicornEngine/Const/Mips.fs +++ b/bindings/dotnet/UnicornEngine/Const/Mips.fs @@ -41,8 +41,10 @@ module Mips = let UC_CPU_MIPS64_I6500 = 9 let UC_CPU_MIPS64_LOONGSON_2E = 10 let UC_CPU_MIPS64_LOONGSON_2F = 11 - let UC_CPU_MIPS64_MIPS64DSPR2 = 12 - let UC_CPU_MIPS64_ENDING = 13 + let UC_CPU_MIPS64_LOONGSON_3A1000 = 12 + let UC_CPU_MIPS64_LOONGSON_3A4000 = 13 + let UC_CPU_MIPS64_MIPS64DSPR2 = 14 + let UC_CPU_MIPS64_ENDING = 15 // MIPS registers diff --git a/bindings/dotnet/UnicornEngine/Const/Riscv.fs b/bindings/dotnet/UnicornEngine/Const/Riscv.fs index 244e5fec45..d791cfcd5f 100644 --- a/bindings/dotnet/UnicornEngine/Const/Riscv.fs +++ b/bindings/dotnet/UnicornEngine/Const/Riscv.fs @@ -10,15 +10,17 @@ module Riscv = // RISCV32 CPU let UC_CPU_RISCV32_ANY = 0 - let UC_CPU_RISCV32_BASE32 = 1 - let UC_CPU_RISCV32_SIFIVE_E31 = 2 - let UC_CPU_RISCV32_SIFIVE_U34 = 3 - let UC_CPU_RISCV32_ENDING = 4 + let UC_CPU_RISCV32_BASE = 1 + let UC_CPU_RISCV32_IBEX = 2 + let UC_CPU_RISCV32_SIFIVE_E31 = 3 + let UC_CPU_RISCV32_SIFIVE_E34 = 4 + let UC_CPU_RISCV32_SIFIVE_U34 = 5 + let UC_CPU_RISCV32_ENDING = 6 // RISCV64 CPU let UC_CPU_RISCV64_ANY = 0 - let UC_CPU_RISCV64_BASE64 = 1 + let UC_CPU_RISCV64_BASE = 1 let UC_CPU_RISCV64_SIFIVE_E51 = 2 let UC_CPU_RISCV64_SIFIVE_U54 = 3 let UC_CPU_RISCV64_ENDING = 4 diff --git a/bindings/go/unicorn/mips_const.go b/bindings/go/unicorn/mips_const.go index dfb6ddb066..c78b813e2b 100644 --- a/bindings/go/unicorn/mips_const.go +++ b/bindings/go/unicorn/mips_const.go @@ -36,8 +36,10 @@ const ( CPU_MIPS64_I6500 = 9 CPU_MIPS64_LOONGSON_2E = 10 CPU_MIPS64_LOONGSON_2F = 11 - CPU_MIPS64_MIPS64DSPR2 = 12 - CPU_MIPS64_ENDING = 13 + CPU_MIPS64_LOONGSON_3A1000 = 12 + CPU_MIPS64_LOONGSON_3A4000 = 13 + CPU_MIPS64_MIPS64DSPR2 = 14 + CPU_MIPS64_ENDING = 15 // MIPS registers diff --git a/bindings/go/unicorn/riscv_const.go b/bindings/go/unicorn/riscv_const.go index 08458f77a6..d497b50f87 100644 --- a/bindings/go/unicorn/riscv_const.go +++ b/bindings/go/unicorn/riscv_const.go @@ -5,15 +5,17 @@ const ( // RISCV32 CPU CPU_RISCV32_ANY = 0 - CPU_RISCV32_BASE32 = 1 - CPU_RISCV32_SIFIVE_E31 = 2 - CPU_RISCV32_SIFIVE_U34 = 3 - CPU_RISCV32_ENDING = 4 + CPU_RISCV32_BASE = 1 + CPU_RISCV32_IBEX = 2 + CPU_RISCV32_SIFIVE_E31 = 3 + CPU_RISCV32_SIFIVE_E34 = 4 + CPU_RISCV32_SIFIVE_U34 = 5 + CPU_RISCV32_ENDING = 6 // RISCV64 CPU CPU_RISCV64_ANY = 0 - CPU_RISCV64_BASE64 = 1 + CPU_RISCV64_BASE = 1 CPU_RISCV64_SIFIVE_E51 = 2 CPU_RISCV64_SIFIVE_U54 = 3 CPU_RISCV64_ENDING = 4 diff --git a/bindings/java/src/main/java/unicorn/MipsConst.java b/bindings/java/src/main/java/unicorn/MipsConst.java index bf6d8cf2fe..567f55661d 100644 --- a/bindings/java/src/main/java/unicorn/MipsConst.java +++ b/bindings/java/src/main/java/unicorn/MipsConst.java @@ -38,8 +38,10 @@ public interface MipsConst { public static final int UC_CPU_MIPS64_I6500 = 9; public static final int UC_CPU_MIPS64_LOONGSON_2E = 10; public static final int UC_CPU_MIPS64_LOONGSON_2F = 11; - public static final int UC_CPU_MIPS64_MIPS64DSPR2 = 12; - public static final int UC_CPU_MIPS64_ENDING = 13; + public static final int UC_CPU_MIPS64_LOONGSON_3A1000 = 12; + public static final int UC_CPU_MIPS64_LOONGSON_3A4000 = 13; + public static final int UC_CPU_MIPS64_MIPS64DSPR2 = 14; + public static final int UC_CPU_MIPS64_ENDING = 15; // MIPS registers diff --git a/bindings/java/src/main/java/unicorn/RiscvConst.java b/bindings/java/src/main/java/unicorn/RiscvConst.java index 5814180974..3a298c5c7a 100644 --- a/bindings/java/src/main/java/unicorn/RiscvConst.java +++ b/bindings/java/src/main/java/unicorn/RiscvConst.java @@ -7,15 +7,17 @@ public interface RiscvConst { // RISCV32 CPU public static final int UC_CPU_RISCV32_ANY = 0; - public static final int UC_CPU_RISCV32_BASE32 = 1; - public static final int UC_CPU_RISCV32_SIFIVE_E31 = 2; - public static final int UC_CPU_RISCV32_SIFIVE_U34 = 3; - public static final int UC_CPU_RISCV32_ENDING = 4; + public static final int UC_CPU_RISCV32_BASE = 1; + public static final int UC_CPU_RISCV32_IBEX = 2; + public static final int UC_CPU_RISCV32_SIFIVE_E31 = 3; + public static final int UC_CPU_RISCV32_SIFIVE_E34 = 4; + public static final int UC_CPU_RISCV32_SIFIVE_U34 = 5; + public static final int UC_CPU_RISCV32_ENDING = 6; // RISCV64 CPU public static final int UC_CPU_RISCV64_ANY = 0; - public static final int UC_CPU_RISCV64_BASE64 = 1; + public static final int UC_CPU_RISCV64_BASE = 1; public static final int UC_CPU_RISCV64_SIFIVE_E51 = 2; public static final int UC_CPU_RISCV64_SIFIVE_U54 = 3; public static final int UC_CPU_RISCV64_ENDING = 4; diff --git a/bindings/pascal/unicorn/MipsConst.pas b/bindings/pascal/unicorn/MipsConst.pas index 3cbce39ac0..748b1d8568 100644 --- a/bindings/pascal/unicorn/MipsConst.pas +++ b/bindings/pascal/unicorn/MipsConst.pas @@ -39,8 +39,10 @@ interface UC_CPU_MIPS64_I6500 = 9; UC_CPU_MIPS64_LOONGSON_2E = 10; UC_CPU_MIPS64_LOONGSON_2F = 11; - UC_CPU_MIPS64_MIPS64DSPR2 = 12; - UC_CPU_MIPS64_ENDING = 13; + UC_CPU_MIPS64_LOONGSON_3A1000 = 12; + UC_CPU_MIPS64_LOONGSON_3A4000 = 13; + UC_CPU_MIPS64_MIPS64DSPR2 = 14; + UC_CPU_MIPS64_ENDING = 15; // MIPS registers @@ -242,4 +244,4 @@ interface UC_MIPS_REG_LO3 = 48; implementation -end. \ No newline at end of file +end. diff --git a/bindings/pascal/unicorn/RiscvConst.pas b/bindings/pascal/unicorn/RiscvConst.pas index 075e271c65..a4629832c8 100644 --- a/bindings/pascal/unicorn/RiscvConst.pas +++ b/bindings/pascal/unicorn/RiscvConst.pas @@ -8,15 +8,17 @@ interface // RISCV32 CPU UC_CPU_RISCV32_ANY = 0; - UC_CPU_RISCV32_BASE32 = 1; - UC_CPU_RISCV32_SIFIVE_E31 = 2; - UC_CPU_RISCV32_SIFIVE_U34 = 3; - UC_CPU_RISCV32_ENDING = 4; + UC_CPU_RISCV32_BASE = 1; + UC_CPU_RISCV32_IBEX = 2; + UC_CPU_RISCV32_SIFIVE_E31 = 3; + UC_CPU_RISCV32_SIFIVE_E34 = 4; + UC_CPU_RISCV32_SIFIVE_U34 = 5; + UC_CPU_RISCV32_ENDING = 6; // RISCV64 CPU UC_CPU_RISCV64_ANY = 0; - UC_CPU_RISCV64_BASE64 = 1; + UC_CPU_RISCV64_BASE = 1; UC_CPU_RISCV64_SIFIVE_E51 = 2; UC_CPU_RISCV64_SIFIVE_U54 = 3; UC_CPU_RISCV64_ENDING = 4; @@ -291,4 +293,4 @@ interface UC_RISCV_REG_FT11 = 189; implementation -end. \ No newline at end of file +end. diff --git a/bindings/python/unicorn/mips_const.py b/bindings/python/unicorn/mips_const.py index c60b2d0f77..63bf9f3f39 100644 --- a/bindings/python/unicorn/mips_const.py +++ b/bindings/python/unicorn/mips_const.py @@ -34,8 +34,10 @@ UC_CPU_MIPS64_I6500 = 9 UC_CPU_MIPS64_LOONGSON_2E = 10 UC_CPU_MIPS64_LOONGSON_2F = 11 -UC_CPU_MIPS64_MIPS64DSPR2 = 12 -UC_CPU_MIPS64_ENDING = 13 +UC_CPU_MIPS64_LOONGSON_3A1000 = 12 +UC_CPU_MIPS64_LOONGSON_3A4000 = 13 +UC_CPU_MIPS64_MIPS64DSPR2 = 14 +UC_CPU_MIPS64_ENDING = 15 # MIPS registers diff --git a/bindings/python/unicorn/riscv_const.py b/bindings/python/unicorn/riscv_const.py index 3e63376fd5..d1f2ccd2f9 100644 --- a/bindings/python/unicorn/riscv_const.py +++ b/bindings/python/unicorn/riscv_const.py @@ -3,15 +3,17 @@ # RISCV32 CPU UC_CPU_RISCV32_ANY = 0 -UC_CPU_RISCV32_BASE32 = 1 -UC_CPU_RISCV32_SIFIVE_E31 = 2 -UC_CPU_RISCV32_SIFIVE_U34 = 3 -UC_CPU_RISCV32_ENDING = 4 +UC_CPU_RISCV32_BASE = 1 +UC_CPU_RISCV32_IBEX = 2 +UC_CPU_RISCV32_SIFIVE_E31 = 3 +UC_CPU_RISCV32_SIFIVE_E34 = 4 +UC_CPU_RISCV32_SIFIVE_U34 = 5 +UC_CPU_RISCV32_ENDING = 6 # RISCV64 CPU UC_CPU_RISCV64_ANY = 0 -UC_CPU_RISCV64_BASE64 = 1 +UC_CPU_RISCV64_BASE = 1 UC_CPU_RISCV64_SIFIVE_E51 = 2 UC_CPU_RISCV64_SIFIVE_U54 = 3 UC_CPU_RISCV64_ENDING = 4 diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb index 374912a870..ed9520b592 100644 --- a/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb +++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb @@ -36,8 +36,10 @@ module UnicornEngine UC_CPU_MIPS64_I6500 = 9 UC_CPU_MIPS64_LOONGSON_2E = 10 UC_CPU_MIPS64_LOONGSON_2F = 11 - UC_CPU_MIPS64_MIPS64DSPR2 = 12 - UC_CPU_MIPS64_ENDING = 13 + UC_CPU_MIPS64_LOONGSON_3A1000 = 12 + UC_CPU_MIPS64_LOONGSON_3A4000 = 13 + UC_CPU_MIPS64_MIPS64DSPR2 = 14 + UC_CPU_MIPS64_ENDING = 15 # MIPS registers diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb index 33203d0a4d..99eba71355 100644 --- a/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb +++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb @@ -5,15 +5,17 @@ module UnicornEngine # RISCV32 CPU UC_CPU_RISCV32_ANY = 0 - UC_CPU_RISCV32_BASE32 = 1 - UC_CPU_RISCV32_SIFIVE_E31 = 2 - UC_CPU_RISCV32_SIFIVE_U34 = 3 - UC_CPU_RISCV32_ENDING = 4 + UC_CPU_RISCV32_BASE = 1 + UC_CPU_RISCV32_IBEX = 2 + UC_CPU_RISCV32_SIFIVE_E31 = 3 + UC_CPU_RISCV32_SIFIVE_E34 = 4 + UC_CPU_RISCV32_SIFIVE_U34 = 5 + UC_CPU_RISCV32_ENDING = 6 # RISCV64 CPU UC_CPU_RISCV64_ANY = 0 - UC_CPU_RISCV64_BASE64 = 1 + UC_CPU_RISCV64_BASE = 1 UC_CPU_RISCV64_SIFIVE_E51 = 2 UC_CPU_RISCV64_SIFIVE_U54 = 3 UC_CPU_RISCV64_ENDING = 4 diff --git a/bindings/rust/src/riscv.rs b/bindings/rust/src/riscv.rs index 53c5990bc3..0862e91e7b 100644 --- a/bindings/rust/src/riscv.rs +++ b/bindings/rust/src/riscv.rs @@ -349,8 +349,10 @@ impl From for i32 { #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Riscv32CpuModel { UC_CPU_RISCV32_ANY = 0, - UC_CPU_RISCV32_BASE32, + UC_CPU_RISCV32_BASE, + UC_CPU_RISCV32_IBEX, UC_CPU_RISCV32_SIFIVE_E31, + UC_CPU_RISCV32_SIFIVE_E34, UC_CPU_RISCV32_SIFIVE_U34, } @@ -370,7 +372,7 @@ impl From<&Riscv32CpuModel> for i32 { #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Riscv64CpuModel { UC_CPU_RISCV64_ANY = 0, - UC_CPU_RISCV64_BASE64, + UC_CPU_RISCV64_BASE, UC_CPU_RISCV64_SIFIVE_E51, UC_CPU_RISCV64_SIFIVE_U54, } diff --git a/bindings/zig/unicorn/mips_const.zig b/bindings/zig/unicorn/mips_const.zig index 0987cb2f7b..de66623340 100644 --- a/bindings/zig/unicorn/mips_const.zig +++ b/bindings/zig/unicorn/mips_const.zig @@ -36,8 +36,10 @@ pub const mipsConst = enum(c_int) { CPU_MIPS64_I6500 = 9, CPU_MIPS64_LOONGSON_2E = 10, CPU_MIPS64_LOONGSON_2F = 11, - CPU_MIPS64_MIPS64DSPR2 = 12, - CPU_MIPS64_ENDING = 13, + CPU_MIPS64_LOONGSON_3A1000 = 12, + CPU_MIPS64_LOONGSON_3A4000 = 13, + CPU_MIPS64_MIPS64DSPR2 = 14, + CPU_MIPS64_ENDING = 15, // MIPS registers diff --git a/bindings/zig/unicorn/riscv_const.zig b/bindings/zig/unicorn/riscv_const.zig index 00a34001f7..340d1988c6 100644 --- a/bindings/zig/unicorn/riscv_const.zig +++ b/bindings/zig/unicorn/riscv_const.zig @@ -5,10 +5,12 @@ pub const riscvConst = enum(c_int) { // RISCV32 CPU CPU_RISCV32_ANY = 0, - CPU_RISCV32_BASE32 = 1, - CPU_RISCV32_SIFIVE_E31 = 2, - CPU_RISCV32_SIFIVE_U34 = 3, - CPU_RISCV32_ENDING = 4, + CPU_RISCV32_BASE = 1, + CPU_RISCV32_IBEX = 2, + CPU_RISCV32_SIFIVE_E31 = 3, + CPU_RISCV32_SIFIVE_E34 = 4, + CPU_RISCV32_SIFIVE_U34 = 5, + CPU_RISCV32_ENDING = 6, // RISCV64 CPU diff --git a/include/unicorn/mips.h b/include/unicorn/mips.h index 7a4c9c1cb8..4eeb9241d7 100644 --- a/include/unicorn/mips.h +++ b/include/unicorn/mips.h @@ -55,6 +55,8 @@ typedef enum uc_cpu_mips64 { UC_CPU_MIPS64_I6500, UC_CPU_MIPS64_LOONGSON_2E, UC_CPU_MIPS64_LOONGSON_2F, + UC_CPU_MIPS64_LOONGSON_3A1000, + UC_CPU_MIPS64_LOONGSON_3A4000, UC_CPU_MIPS64_MIPS64DSPR2, UC_CPU_MIPS64_ENDING diff --git a/include/unicorn/riscv.h b/include/unicorn/riscv.h index cf1595ae4f..e72a1aa39c 100644 --- a/include/unicorn/riscv.h +++ b/include/unicorn/riscv.h @@ -18,8 +18,10 @@ extern "C" { //> RISCV32 CPU typedef enum uc_cpu_riscv32 { UC_CPU_RISCV32_ANY = 0, - UC_CPU_RISCV32_BASE32, + UC_CPU_RISCV32_BASE, + UC_CPU_RISCV32_IBEX, UC_CPU_RISCV32_SIFIVE_E31, + UC_CPU_RISCV32_SIFIVE_E34, UC_CPU_RISCV32_SIFIVE_U34, UC_CPU_RISCV32_ENDING @@ -28,7 +30,7 @@ typedef enum uc_cpu_riscv32 { //> RISCV64 CPU typedef enum uc_cpu_riscv64 { UC_CPU_RISCV64_ANY = 0, - UC_CPU_RISCV64_BASE64, + UC_CPU_RISCV64_BASE, UC_CPU_RISCV64_SIFIVE_E51, UC_CPU_RISCV64_SIFIVE_U54, diff --git a/include/unicorn/unicorn.h b/include/unicorn/unicorn.h index 5521262e8e..faac1378c5 100644 --- a/include/unicorn/unicorn.h +++ b/include/unicorn/unicorn.h @@ -69,6 +69,18 @@ typedef size_t uc_hook; #define UNICORN_DEPRECATED #endif +#ifdef _MSC_VER +#define UNICORN_UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNICORN_UNUSED __attribute__((unused)) +#endif + +#ifdef _MSC_VER +#define UNICORN_NONNULL +#else +#define UNICORN_NONNULL __attribute__((nonnull)) +#endif + // Unicorn API version #define UC_API_MAJOR 2 #define UC_API_MINOR 1 diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 6f1315028d..55c3afb894 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_aarch64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_aarch64 #define tcg_gen_st_i64 tcg_gen_st_i64_aarch64 +#define tcg_gen_add_i64 tcg_gen_add_i64_aarch64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_aarch64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_aarch64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_aarch64 #define cpu_icount_to_ns cpu_icount_to_ns_aarch64 #define cpu_is_stopped cpu_is_stopped_aarch64 #define cpu_get_ticks cpu_get_ticks_aarch64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_aarch64 #define floatx80_mul floatx80_mul_aarch64 #define floatx80_div floatx80_div_aarch64 +#define floatx80_modrem floatx80_modrem_aarch64 +#define floatx80_mod floatx80_mod_aarch64 #define floatx80_rem floatx80_rem_aarch64 #define floatx80_sqrt floatx80_sqrt_aarch64 #define floatx80_eq floatx80_eq_aarch64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_aarch64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_aarch64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_aarch64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_aarch64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_aarch64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_aarch64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_aarch64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_aarch64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_aarch64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_aarch64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_aarch64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_aarch64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_aarch64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_aarch64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_aarch64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_aarch64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_aarch64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_aarch64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_aarch64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_aarch64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_aarch64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_aarch64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_aarch64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_aarch64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_aarch64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_aarch64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_aarch64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_aarch64 #define tcg_gen_shri_vec tcg_gen_shri_vec_aarch64 #define tcg_gen_sari_vec tcg_gen_sari_vec_aarch64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_aarch64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_aarch64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_aarch64 #define tcg_gen_add_vec tcg_gen_add_vec_aarch64 #define tcg_gen_sub_vec tcg_gen_sub_vec_aarch64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_aarch64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_aarch64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_aarch64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_aarch64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_aarch64 #define tcg_gen_shls_vec tcg_gen_shls_vec_aarch64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_aarch64 #define tcg_gen_sars_vec tcg_gen_sars_vec_aarch64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_aarch64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_aarch64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_aarch64 #define tb_htable_lookup tb_htable_lookup_aarch64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_aarch64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_aarch64 #define tlb_init tlb_init_aarch64 +#define tlb_destroy tlb_destroy_aarch64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_aarch64 #define tlb_flush tlb_flush_aarch64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_aarch64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_aarch64 #define get_page_addr_code_hostp get_page_addr_code_hostp_aarch64 #define get_page_addr_code get_page_addr_code_aarch64 +#define probe_access_flags probe_access_flags_aarch64 #define probe_access probe_access_aarch64 #define tlb_vaddr_to_host tlb_vaddr_to_host_aarch64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_aarch64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_aarch64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_aarch64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_aarch64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_aarch64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_aarch64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_aarch64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_aarch64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_aarch64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_aarch64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_aarch64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_aarch64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_aarch64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_aarch64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_aarch64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_aarch64 #define cpu_ldub_data_ra cpu_ldub_data_ra_aarch64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_aarch64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_aarch64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_aarch64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_aarch64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_aarch64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_aarch64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_aarch64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_aarch64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_aarch64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_aarch64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_aarch64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_aarch64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_aarch64 #define cpu_ldub_data cpu_ldub_data_aarch64 #define cpu_ldsb_data cpu_ldsb_data_aarch64 -#define cpu_lduw_data cpu_lduw_data_aarch64 -#define cpu_ldsw_data cpu_ldsw_data_aarch64 -#define cpu_ldl_data cpu_ldl_data_aarch64 -#define cpu_ldq_data cpu_ldq_data_aarch64 +#define cpu_lduw_be_data cpu_lduw_be_data_aarch64 +#define cpu_lduw_le_data cpu_lduw_le_data_aarch64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_aarch64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_aarch64 +#define cpu_ldl_be_data cpu_ldl_be_data_aarch64 +#define cpu_ldl_le_data cpu_ldl_le_data_aarch64 +#define cpu_ldq_le_data cpu_ldq_le_data_aarch64 +#define cpu_ldq_be_data cpu_ldq_be_data_aarch64 #define helper_ret_stb_mmu helper_ret_stb_mmu_aarch64 #define helper_le_stw_mmu helper_le_stw_mmu_aarch64 #define helper_be_stw_mmu helper_be_stw_mmu_aarch64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_aarch64 #define helper_be_stq_mmu helper_be_stq_mmu_aarch64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_aarch64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_aarch64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_aarch64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_aarch64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_aarch64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_aarch64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_aarch64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_aarch64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_aarch64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_aarch64 #define cpu_stb_data_ra cpu_stb_data_ra_aarch64 -#define cpu_stw_data_ra cpu_stw_data_ra_aarch64 -#define cpu_stl_data_ra cpu_stl_data_ra_aarch64 -#define cpu_stq_data_ra cpu_stq_data_ra_aarch64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_aarch64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_aarch64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_aarch64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_aarch64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_aarch64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_aarch64 #define cpu_stb_data cpu_stb_data_aarch64 -#define cpu_stw_data cpu_stw_data_aarch64 -#define cpu_stl_data cpu_stl_data_aarch64 -#define cpu_stq_data cpu_stq_data_aarch64 +#define cpu_stw_be_data cpu_stw_be_data_aarch64 +#define cpu_stw_le_data cpu_stw_le_data_aarch64 +#define cpu_stl_be_data cpu_stl_be_data_aarch64 +#define cpu_stl_le_data cpu_stl_le_data_aarch64 +#define cpu_stq_be_data cpu_stq_be_data_aarch64 +#define cpu_stq_le_data cpu_stq_le_data_aarch64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_aarch64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_aarch64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_aarch64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_aarch64 #define cpu_ldl_code cpu_ldl_code_aarch64 #define cpu_ldq_code cpu_ldq_code_aarch64 +#define cpu_interrupt_handler cpu_interrupt_handler_aarch64 #define helper_div_i32 helper_div_i32_aarch64 #define helper_rem_i32 helper_rem_i32_aarch64 #define helper_divu_i32 helper_divu_i32_aarch64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_aarch64 #define helper_gvec_sar32i helper_gvec_sar32i_aarch64 #define helper_gvec_sar64i helper_gvec_sar64i_aarch64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_aarch64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_aarch64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_aarch64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_aarch64 #define helper_gvec_shl8v helper_gvec_shl8v_aarch64 #define helper_gvec_shl16v helper_gvec_shl16v_aarch64 #define helper_gvec_shl32v helper_gvec_shl32v_aarch64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_aarch64 #define helper_gvec_sar32v helper_gvec_sar32v_aarch64 #define helper_gvec_sar64v helper_gvec_sar64v_aarch64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_aarch64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_aarch64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_aarch64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_aarch64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_aarch64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64 #define helper_gvec_eq8 helper_gvec_eq8_aarch64 #define helper_gvec_ne8 helper_gvec_ne8_aarch64 #define helper_gvec_lt8 helper_gvec_lt8_aarch64 @@ -1615,6 +1669,11 @@ #define arm_v7m_mmu_idx_all arm_v7m_mmu_idx_all_aarch64 #define arm_v7m_mmu_idx_for_secstate_and_priv arm_v7m_mmu_idx_for_secstate_and_priv_aarch64 #define arm_v7m_mmu_idx_for_secstate arm_v7m_mmu_idx_for_secstate_aarch64 +#define mte_probe1 mte_probe1_aarch64 +#define mte_check1 mte_check1_aarch64 +#define mte_checkN mte_checkN_aarch64 +#define gen_helper_mte_check1 gen_helper_mte_check1_aarch64 +#define gen_helper_mte_checkN gen_helper_mte_checkN_aarch64 #define helper_neon_qadd_u8 helper_neon_qadd_u8_aarch64 #define helper_neon_qadd_u16 helper_neon_qadd_u16_aarch64 #define helper_neon_qadd_u32 helper_neon_qadd_u32_aarch64 @@ -1854,6 +1913,21 @@ #define helper_autdb helper_autdb_aarch64 #define helper_xpaci helper_xpaci_aarch64 #define helper_xpacd helper_xpacd_aarch64 +#define helper_mte_check1 helper_mte_check1_aarch64 +#define helper_mte_checkN helper_mte_checkN_aarch64 +#define helper_mte_check_zva helper_mte_check_zva_aarch64 +#define helper_irg helper_irg_aarch64 +#define helper_addsubg helper_addsubg_aarch64 +#define helper_ldg helper_ldg_aarch64 +#define helper_stg helper_stg_aarch64 +#define helper_stg_parallel helper_stg_parallel_aarch64 +#define helper_stg_stub helper_stg_stub_aarch64 +#define helper_st2g helper_st2g_aarch64 +#define helper_st2g_parallel helper_st2g_parallel_aarch64 +#define helper_st2g_stub helper_st2g_stub_aarch64 +#define helper_ldgm helper_ldgm_aarch64 +#define helper_stgm helper_stgm_aarch64 +#define helper_stzgm_tags helper_stzgm_tags_aarch64 #define arm_is_psci_call arm_is_psci_call_aarch64 #define arm_handle_psci_call arm_handle_psci_call_aarch64 #define helper_sve_predtest1 helper_sve_predtest1_aarch64 @@ -2746,6 +2820,7 @@ #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64 #define unallocated_encoding unallocated_encoding_aarch64 #define new_tmp_a64 new_tmp_a64_aarch64 +#define new_tmp_a64_local new_tmp_a64_local_aarch64 #define new_tmp_a64_zero new_tmp_a64_zero_aarch64 #define cpu_reg cpu_reg_aarch64 #define cpu_reg_sp cpu_reg_sp_aarch64 diff --git a/qemu/accel/tcg/cputlb.c b/qemu/accel/tcg/cputlb.c index f7ffee48f1..3efbf5562c 100644 --- a/qemu/accel/tcg/cputlb.c +++ b/qemu/accel/tcg/cputlb.c @@ -261,6 +261,21 @@ void tlb_init(CPUState *cpu) } } +void tlb_destroy(CPUState *cpu) +{ + CPUArchState *env = cpu->env_ptr; + int i; + + // qemu_spin_destroy(&env_tlb(env)->c.lock); + for (i = 0; i < NB_MMU_MODES; i++) { + CPUTLBDesc *desc = &env_tlb(env)->d[i]; + CPUTLBDescFast *fast = &env_tlb(env)->f[i]; + + g_free(fast->table); + g_free(desc->iotlb); + } +} + /* flush_all_helper: run fn across all cpus * * If the wait flag is set then the src cpu's helper will be queued as @@ -450,9 +465,7 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, run_on_cpu_data data) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; @@ -486,9 +499,7 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -523,9 +534,7 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, uint16_t idxmap) { -#ifdef TARGET_ARM - struct uc_struct *uc = src_cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = src_cpu->uc; /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -567,9 +576,7 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, target_ulong addr, uint16_t idxmap) { -#ifdef TARGET_ARM - struct uc_struct *uc = src_cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = src_cpu->uc; /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -755,9 +762,7 @@ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, so that it is no longer dirty */ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; CPUArchState *env = cpu->env_ptr; int mmu_idx; @@ -811,9 +816,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, hwaddr paddr, MemTxAttrs attrs, int prot, int mmu_idx, target_ulong size) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; CPUArchState *env = cpu->env_ptr; CPUTLB *tlb = env_tlb(env); CPUTLBDesc *desc = &tlb->d[mmu_idx]; @@ -1190,9 +1193,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, CPUIOTLBEntry *iotlbentry, uintptr_t retaddr, CPUTLBEntry *tlbe) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; MemoryRegion *mr = cpu->uc->memory_mapping(cpu->uc, tlbe->paddr | (mem_vaddr & ~TARGET_PAGE_MASK)); @@ -1215,6 +1216,86 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, } } +static int probe_access_internal(CPUArchState *env, target_ulong addr, + int fault_size, MMUAccessType access_type, + int mmu_idx, bool nonfault, + void **phost, uintptr_t retaddr) +{ + struct uc_struct *uc = env->uc; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr, page_addr; + size_t elt_ofs; + int flags; + + switch (access_type) { + case MMU_DATA_LOAD: + elt_ofs = offsetof(CPUTLBEntry, addr_read); + break; + case MMU_DATA_STORE: + elt_ofs = offsetof(CPUTLBEntry, addr_write); + break; + case MMU_INST_FETCH: + elt_ofs = offsetof(CPUTLBEntry, addr_code); + break; + default: + g_assert_not_reached(); + } + tlb_addr = tlb_read_ofs(entry, elt_ofs); + + page_addr = addr & TARGET_PAGE_MASK; + if (!tlb_hit_page(uc, tlb_addr, page_addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { + CPUState *cs = env_cpu(env); + CPUClass *cc = CPU_GET_CLASS(cs); + + if (!cc->tlb_fill(cs, addr, fault_size, access_type, + mmu_idx, nonfault, retaddr)) { + /* Non-faulting page table read failed. */ + *phost = NULL; + return TLB_INVALID_MASK; + } + + /* TLB resize via tlb_fill may have moved the entry. */ + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_read_ofs(entry, elt_ofs); + } + flags = tlb_addr & TLB_FLAGS_MASK; + + /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ + if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { + *phost = NULL; + return TLB_MMIO; + } + + /* Everything else is RAM. */ + *phost = (void *)((uintptr_t)addr + entry->addend); + return flags; +} + +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr) +{ + int flags; + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + + flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, + nonfault, phost, retaddr); + + /* Handle clean RAM pages. */ + if (unlikely(flags & TLB_NOTDIRTY)) { + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + + notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr, entry); + flags &= ~TLB_NOTDIRTY; + } + + return flags; +} + /* * Probe for whether the specified guest access is permitted. If it is not * permitted then an exception will be taken in the same way as if this @@ -1225,9 +1306,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, void *probe_access(CPUArchState *env, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { -#ifdef TARGET_ARM - struct uc_struct *uc = env->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = env->uc; uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); target_ulong tlb_addr; @@ -1352,9 +1431,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { -#ifdef TARGET_ARM - struct uc_struct *uc = env->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = env->uc; size_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); @@ -1951,36 +2028,54 @@ int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, full_ldub_mmu); } -uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, - MO_TE == MO_LE - ? full_le_lduw_mmu : full_be_lduw_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu); } -int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW, + full_be_lduw_mmu); +} + +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu); +} + +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, - MO_TE == MO_LE - ? full_le_lduw_mmu : full_be_lduw_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu); } -uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, - MO_TE == MO_LE - ? full_le_ldul_mmu : full_be_ldul_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu); } -uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, - MO_TE == MO_LE - ? helper_le_ldq_mmu : helper_be_ldq_mmu); + return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW, + full_le_lduw_mmu); +} + +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu); +} + +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu); } uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, @@ -1994,25 +2089,50 @@ int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +{ + return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) { - return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) @@ -2025,24 +2145,44 @@ int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) return cpu_ldsb_data_ra(env, ptr, 0); } -uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) +uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_lduw_be_data_ra(env, ptr, 0); +} + +int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldsw_be_data_ra(env, ptr, 0); +} + +uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldl_be_data_ra(env, ptr, 0); +} + +uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr) { - return cpu_lduw_data_ra(env, ptr, 0); + return cpu_ldq_be_data_ra(env, ptr, 0); } -int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) +uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldsw_data_ra(env, ptr, 0); + return cpu_lduw_le_data_ra(env, ptr, 0); } -uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) +int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldl_data_ra(env, ptr, 0); + return cpu_ldsw_le_data_ra(env, ptr, 0); } -uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) +uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldq_data_ra(env, ptr, 0); + return cpu_ldl_le_data_ra(env, ptr, 0); +} + +uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldq_le_data_ra(env, ptr, 0); } /* @@ -2428,22 +2568,40 @@ void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); } -void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW); } -void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL); } -void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ); +} + +void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW); +} + +void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL); +} + +void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ); } void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, @@ -2452,22 +2610,40 @@ void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) { - cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) { - cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, - uint64_t val, uintptr_t retaddr) +void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr, + uint64_t val, uintptr_t retaddr) { - cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) +{ + cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) +{ + cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr, + uint64_t val, uintptr_t retaddr) +{ + cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) @@ -2475,19 +2651,34 @@ void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) cpu_stb_data_ra(env, ptr, val, 0); } -void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) +void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) +{ + cpu_stw_be_data_ra(env, ptr, val, 0); +} + +void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) +{ + cpu_stl_be_data_ra(env, ptr, val, 0); +} + +void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val) +{ + cpu_stq_be_data_ra(env, ptr, val, 0); +} + +void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) { - cpu_stw_data_ra(env, ptr, val, 0); + cpu_stw_le_data_ra(env, ptr, val, 0); } -void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) +void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) { - cpu_stl_data_ra(env, ptr, val, 0); + cpu_stl_le_data_ra(env, ptr, val, 0); } -void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) +void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) { - cpu_stq_data_ra(env, ptr, val, 0); + cpu_stq_le_data_ra(env, ptr, val, 0); } /* First set of helpers allows passing in of OI and RETADDR. This makes diff --git a/qemu/accel/tcg/tcg-runtime-gvec.c b/qemu/accel/tcg/tcg-runtime-gvec.c index ea997c257f..41ab422366 100644 --- a/qemu/accel/tcg/tcg-runtime-gvec.c +++ b/qemu/accel/tcg/tcg-runtime-gvec.c @@ -724,6 +724,54 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) clear_high(d, oprsz, desc); } +void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + int shift = simd_data(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + *(uint8_t *)((char *)d + i) = rol8(*(uint8_t *)((char *)a + i), shift); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + int shift = simd_data(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + *(uint16_t *)((char *)d + i) = rol16(*(uint16_t *)((char *)a + i), shift); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + int shift = simd_data(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + *(uint32_t *)((char *)d + i) = rol32(*(uint32_t *)((char *)a + i), shift); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + int shift = simd_data(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + *(uint64_t *)((char *)d + i) = rol64(*(uint64_t *)((char *)a + i), shift); + } + clear_high(d, oprsz, desc); +} + void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) { intptr_t oprsz = simd_oprsz(desc); @@ -868,6 +916,102 @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) clear_high(d, oprsz, desc); } +void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + uint8_t sh = *(uint8_t *)((char *)b + i) & 7; + *(uint8_t *)((char *)d + i) = rol8(*(uint8_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + uint8_t sh = *(uint16_t *)((char *)b + i) & 15; + *(uint16_t *)((char *)d + i) = rol16(*(uint16_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + uint8_t sh = *(uint32_t *)((char *)b + i) & 31; + *(uint32_t *)((char *)d + i) = rol32(*(uint32_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint8_t sh = *(uint64_t *)((char *)b + i) & 63; + *(uint64_t *)((char *)d + i) = rol64(*(uint64_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + uint8_t sh = *(uint8_t *)((char *)b + i) & 7; + *(uint8_t *)((char *)d + i) = ror8(*(uint8_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + uint8_t sh = *(uint16_t *)((char *)b + i) & 15; + *(uint16_t *)((char *)d + i) = ror16(*(uint16_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + uint8_t sh = *(uint32_t *)((char *)b + i) & 31; + *(uint32_t *)((char *)d + i) = ror32(*(uint32_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint8_t sh = *(uint64_t *)((char *)b + i) & 63; + *(uint64_t *)((char *)d + i) = ror64(*(uint64_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + #define DO_CMP1(NAME, TYPE, OP) \ void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ { \ diff --git a/qemu/accel/tcg/tcg-runtime.h b/qemu/accel/tcg/tcg-runtime.h index ab7369e8e3..b694d30e22 100644 --- a/qemu/accel/tcg/tcg-runtime.h +++ b/qemu/accel/tcg/tcg-runtime.h @@ -213,6 +213,11 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -228,6 +233,16 @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index 3f6d2630f7..d240f35c87 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -1694,9 +1694,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, int cflags) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; TCGContext *tcg_ctx = cpu->uc->tcg_ctx; CPUArchState *env = cpu->env_ptr; TranslationBlock *tb, *existing_tb; @@ -2155,9 +2153,7 @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; /* Discard jump cache entries for any tb which might potentially overlap the flushed page. */ diff --git a/qemu/arm.h b/qemu/arm.h index 27592db350..061cd1d444 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_arm #define tcg_gen_shr_i64 tcg_gen_shr_i64_arm #define tcg_gen_st_i64 tcg_gen_st_i64_arm +#define tcg_gen_add_i64 tcg_gen_add_i64_arm +#define tcg_gen_sub_i64 tcg_gen_sub_i64_arm #define tcg_gen_xor_i64 tcg_gen_xor_i64_arm +#define tcg_gen_neg_i64 tcg_gen_neg_i64_arm #define cpu_icount_to_ns cpu_icount_to_ns_arm #define cpu_is_stopped cpu_is_stopped_arm #define cpu_get_ticks cpu_get_ticks_arm @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_arm #define floatx80_mul floatx80_mul_arm #define floatx80_div floatx80_div_arm +#define floatx80_modrem floatx80_modrem_arm +#define floatx80_mod floatx80_mod_arm #define floatx80_rem floatx80_rem_arm #define floatx80_sqrt floatx80_sqrt_arm #define floatx80_eq floatx80_eq_arm @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_arm #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_arm #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_arm +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_arm #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_arm #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_arm #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_arm @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_arm #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_arm #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_arm +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_arm +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_arm #define tcg_gen_gvec_sari tcg_gen_gvec_sari_arm +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_arm +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_arm #define tcg_gen_gvec_shls tcg_gen_gvec_shls_arm #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_arm #define tcg_gen_gvec_sars tcg_gen_gvec_sars_arm +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_arm #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_arm #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_arm #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_arm +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_arm +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_arm #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_arm #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_arm #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_arm @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_arm #define tcg_gen_shri_vec tcg_gen_shri_vec_arm #define tcg_gen_sari_vec tcg_gen_sari_vec_arm +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_arm +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_arm #define tcg_gen_cmp_vec tcg_gen_cmp_vec_arm #define tcg_gen_add_vec tcg_gen_add_vec_arm #define tcg_gen_sub_vec tcg_gen_sub_vec_arm @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_arm #define tcg_gen_shrv_vec tcg_gen_shrv_vec_arm #define tcg_gen_sarv_vec tcg_gen_sarv_vec_arm +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_arm +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_arm #define tcg_gen_shls_vec tcg_gen_shls_vec_arm #define tcg_gen_shrs_vec tcg_gen_shrs_vec_arm #define tcg_gen_sars_vec tcg_gen_sars_vec_arm +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_arm #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_arm #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_arm #define tb_htable_lookup tb_htable_lookup_arm @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_arm #define cpu_loop_exit_atomic cpu_loop_exit_atomic_arm #define tlb_init tlb_init_arm +#define tlb_destroy tlb_destroy_arm #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_arm #define tlb_flush tlb_flush_arm #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_arm @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_arm #define get_page_addr_code_hostp get_page_addr_code_hostp_arm #define get_page_addr_code get_page_addr_code_arm +#define probe_access_flags probe_access_flags_arm #define probe_access probe_access_arm #define tlb_vaddr_to_host tlb_vaddr_to_host_arm #define helper_ret_ldub_mmu helper_ret_ldub_mmu_arm @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_arm #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_arm #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_arm -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_arm -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_arm -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_arm -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_arm +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_arm +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_arm +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_arm +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_arm +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_arm +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_arm +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_arm +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_arm #define cpu_ldub_data_ra cpu_ldub_data_ra_arm #define cpu_ldsb_data_ra cpu_ldsb_data_ra_arm -#define cpu_lduw_data_ra cpu_lduw_data_ra_arm -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_arm -#define cpu_ldl_data_ra cpu_ldl_data_ra_arm -#define cpu_ldq_data_ra cpu_ldq_data_ra_arm +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_arm +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_arm +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_arm +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_arm +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_arm +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_arm +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_arm +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_arm #define cpu_ldub_data cpu_ldub_data_arm #define cpu_ldsb_data cpu_ldsb_data_arm -#define cpu_lduw_data cpu_lduw_data_arm -#define cpu_ldsw_data cpu_ldsw_data_arm -#define cpu_ldl_data cpu_ldl_data_arm -#define cpu_ldq_data cpu_ldq_data_arm +#define cpu_lduw_be_data cpu_lduw_be_data_arm +#define cpu_lduw_le_data cpu_lduw_le_data_arm +#define cpu_ldsw_be_data cpu_ldsw_be_data_arm +#define cpu_ldsw_le_data cpu_ldsw_le_data_arm +#define cpu_ldl_be_data cpu_ldl_be_data_arm +#define cpu_ldl_le_data cpu_ldl_le_data_arm +#define cpu_ldq_le_data cpu_ldq_le_data_arm +#define cpu_ldq_be_data cpu_ldq_be_data_arm #define helper_ret_stb_mmu helper_ret_stb_mmu_arm #define helper_le_stw_mmu helper_le_stw_mmu_arm #define helper_be_stw_mmu helper_be_stw_mmu_arm @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_arm #define helper_be_stq_mmu helper_be_stq_mmu_arm #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_arm -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_arm -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_arm -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_arm +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_arm +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_arm +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_arm +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_arm +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_arm +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_arm #define cpu_stb_data_ra cpu_stb_data_ra_arm -#define cpu_stw_data_ra cpu_stw_data_ra_arm -#define cpu_stl_data_ra cpu_stl_data_ra_arm -#define cpu_stq_data_ra cpu_stq_data_ra_arm +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_arm +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_arm +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_arm +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_arm +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_arm +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_arm #define cpu_stb_data cpu_stb_data_arm -#define cpu_stw_data cpu_stw_data_arm -#define cpu_stl_data cpu_stl_data_arm -#define cpu_stq_data cpu_stq_data_arm +#define cpu_stw_be_data cpu_stw_be_data_arm +#define cpu_stw_le_data cpu_stw_le_data_arm +#define cpu_stl_be_data cpu_stl_be_data_arm +#define cpu_stl_le_data cpu_stl_le_data_arm +#define cpu_stq_be_data cpu_stq_be_data_arm +#define cpu_stq_le_data cpu_stq_le_data_arm #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_arm #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_arm #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_arm @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_arm #define cpu_ldl_code cpu_ldl_code_arm #define cpu_ldq_code cpu_ldq_code_arm +#define cpu_interrupt_handler cpu_interrupt_handler_arm #define helper_div_i32 helper_div_i32_arm #define helper_rem_i32 helper_rem_i32_arm #define helper_divu_i32 helper_divu_i32_arm @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_arm #define helper_gvec_sar32i helper_gvec_sar32i_arm #define helper_gvec_sar64i helper_gvec_sar64i_arm +#define helper_gvec_rotl8i helper_gvec_rotl8i_arm +#define helper_gvec_rotl16i helper_gvec_rotl16i_arm +#define helper_gvec_rotl32i helper_gvec_rotl32i_arm +#define helper_gvec_rotl64i helper_gvec_rotl64i_arm #define helper_gvec_shl8v helper_gvec_shl8v_arm #define helper_gvec_shl16v helper_gvec_shl16v_arm #define helper_gvec_shl32v helper_gvec_shl32v_arm @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_arm #define helper_gvec_sar32v helper_gvec_sar32v_arm #define helper_gvec_sar64v helper_gvec_sar64v_arm +#define helper_gvec_rotl8v helper_gvec_rotl8v_arm +#define helper_gvec_rotl16v helper_gvec_rotl16v_arm +#define helper_gvec_rotl32v helper_gvec_rotl32v_arm +#define helper_gvec_rotl64v helper_gvec_rotl64v_arm +#define helper_gvec_rotr8v helper_gvec_rotr8v_arm +#define helper_gvec_rotr16v helper_gvec_rotr16v_arm +#define helper_gvec_rotr32v helper_gvec_rotr32v_arm +#define helper_gvec_rotr64v helper_gvec_rotr64v_arm #define helper_gvec_eq8 helper_gvec_eq8_arm #define helper_gvec_ne8 helper_gvec_ne8_arm #define helper_gvec_lt8 helper_gvec_lt8_arm @@ -1997,4 +2051,100 @@ #define sri_op sri_op_arm #define usra_op usra_op_arm #define ssra_op ssra_op_arm +#define gen_gvec_ceq0 gen_gvec_ceq0_arm +#define gen_gvec_cge0 gen_gvec_cge0_arm +#define gen_gvec_cgt0 gen_gvec_cgt0_arm +#define gen_gvec_cle0 gen_gvec_cle0_arm +#define gen_gvec_clt0 gen_gvec_clt0_arm +#define gen_gvec_cmtst gen_gvec_cmtst_arm +#define gen_gvec_mla gen_gvec_mla_arm +#define gen_gvec_mls gen_gvec_mls_arm +#define gen_gvec_saba gen_gvec_saba_arm +#define gen_gvec_sabd gen_gvec_sabd_arm +#define gen_gvec_sli gen_gvec_sli_arm +#define gen_gvec_sqadd_qc gen_gvec_sqadd_qc_arm +#define gen_gvec_sqrdmlah_qc gen_gvec_sqrdmlah_qc_arm +#define gen_gvec_sqrdmlsh_qc gen_gvec_sqrdmlsh_qc_arm +#define gen_gvec_sqsub_qc gen_gvec_sqsub_qc_arm +#define gen_gvec_sri gen_gvec_sri_arm +#define gen_gvec_srshr gen_gvec_srshr_arm +#define gen_gvec_srsra gen_gvec_srsra_arm +#define gen_gvec_sshl gen_gvec_sshl_arm +#define gen_gvec_ssra gen_gvec_ssra_arm +#define gen_gvec_uaba gen_gvec_uaba_arm +#define gen_gvec_uabd gen_gvec_uabd_arm +#define gen_gvec_uqadd_qc gen_gvec_uqadd_qc_arm +#define gen_gvec_uqsub_qc gen_gvec_uqsub_qc_arm +#define gen_gvec_urshr gen_gvec_urshr_arm +#define gen_gvec_ursra gen_gvec_ursra_arm +#define gen_gvec_ushl gen_gvec_ushl_arm +#define gen_gvec_usra gen_gvec_usra_arm +#define helper_crypto_rax1 helper_crypto_rax1_arm +#define helper_crypto_sha1c helper_crypto_sha1c_arm +#define helper_crypto_sha1m helper_crypto_sha1m_arm +#define helper_crypto_sha1p helper_crypto_sha1p_arm +#define helper_crypto_sha1su0 helper_crypto_sha1su0_arm +#define helper_crypto_sm3tt1a helper_crypto_sm3tt1a_arm +#define helper_crypto_sm3tt1b helper_crypto_sm3tt1b_arm +#define helper_crypto_sm3tt2a helper_crypto_sm3tt2a_arm +#define helper_crypto_sm3tt2b helper_crypto_sm3tt2b_arm +#define helper_gvec_ceq0_b helper_gvec_ceq0_b_arm +#define helper_gvec_ceq0_h helper_gvec_ceq0_h_arm +#define helper_gvec_cge0_b helper_gvec_cge0_b_arm +#define helper_gvec_cge0_h helper_gvec_cge0_h_arm +#define helper_gvec_cgt0_b helper_gvec_cgt0_b_arm +#define helper_gvec_cgt0_h helper_gvec_cgt0_h_arm +#define helper_gvec_cle0_b helper_gvec_cle0_b_arm +#define helper_gvec_cle0_h helper_gvec_cle0_h_arm +#define helper_gvec_clt0_b helper_gvec_clt0_b_arm +#define helper_gvec_clt0_h helper_gvec_clt0_h_arm +#define helper_gvec_fabd_s helper_gvec_fabd_s_arm +#define helper_gvec_saba_b helper_gvec_saba_b_arm +#define helper_gvec_saba_d helper_gvec_saba_d_arm +#define helper_gvec_saba_h helper_gvec_saba_h_arm +#define helper_gvec_saba_s helper_gvec_saba_s_arm +#define helper_gvec_sabd_b helper_gvec_sabd_b_arm +#define helper_gvec_sabd_d helper_gvec_sabd_d_arm +#define helper_gvec_sabd_h helper_gvec_sabd_h_arm +#define helper_gvec_sabd_s helper_gvec_sabd_s_arm +#define helper_gvec_sli_b helper_gvec_sli_b_arm +#define helper_gvec_sli_d helper_gvec_sli_d_arm +#define helper_gvec_sli_h helper_gvec_sli_h_arm +#define helper_gvec_sli_s helper_gvec_sli_s_arm +#define helper_gvec_sri_b helper_gvec_sri_b_arm +#define helper_gvec_sri_d helper_gvec_sri_d_arm +#define helper_gvec_sri_h helper_gvec_sri_h_arm +#define helper_gvec_sri_s helper_gvec_sri_s_arm +#define helper_gvec_srshr_b helper_gvec_srshr_b_arm +#define helper_gvec_srshr_d helper_gvec_srshr_d_arm +#define helper_gvec_srshr_h helper_gvec_srshr_h_arm +#define helper_gvec_srshr_s helper_gvec_srshr_s_arm +#define helper_gvec_srsra_b helper_gvec_srsra_b_arm +#define helper_gvec_srsra_d helper_gvec_srsra_d_arm +#define helper_gvec_srsra_h helper_gvec_srsra_h_arm +#define helper_gvec_srsra_s helper_gvec_srsra_s_arm +#define helper_gvec_ssra_b helper_gvec_ssra_b_arm +#define helper_gvec_ssra_d helper_gvec_ssra_d_arm +#define helper_gvec_ssra_h helper_gvec_ssra_h_arm +#define helper_gvec_ssra_s helper_gvec_ssra_s_arm +#define helper_gvec_uaba_b helper_gvec_uaba_b_arm +#define helper_gvec_uaba_d helper_gvec_uaba_d_arm +#define helper_gvec_uaba_h helper_gvec_uaba_h_arm +#define helper_gvec_uaba_s helper_gvec_uaba_s_arm +#define helper_gvec_uabd_b helper_gvec_uabd_b_arm +#define helper_gvec_uabd_d helper_gvec_uabd_d_arm +#define helper_gvec_uabd_h helper_gvec_uabd_h_arm +#define helper_gvec_uabd_s helper_gvec_uabd_s_arm +#define helper_gvec_urshr_b helper_gvec_urshr_b_arm +#define helper_gvec_urshr_d helper_gvec_urshr_d_arm +#define helper_gvec_urshr_h helper_gvec_urshr_h_arm +#define helper_gvec_urshr_s helper_gvec_urshr_s_arm +#define helper_gvec_ursra_b helper_gvec_ursra_b_arm +#define helper_gvec_ursra_d helper_gvec_ursra_d_arm +#define helper_gvec_ursra_h helper_gvec_ursra_h_arm +#define helper_gvec_ursra_s helper_gvec_ursra_s_arm +#define helper_gvec_usra_b helper_gvec_usra_b_arm +#define helper_gvec_usra_d helper_gvec_usra_d_arm +#define helper_gvec_usra_h helper_gvec_usra_h_arm +#define helper_gvec_usra_s helper_gvec_usra_s_arm #endif diff --git a/qemu/exec.c b/qemu/exec.c index 9786b19557..e9070d3448 100644 --- a/qemu/exec.c +++ b/qemu/exec.c @@ -171,9 +171,7 @@ static void phys_page_set(AddressSpaceDispatch *d, hwaddr index, uint64_t nb, uint16_t leaf) { -#ifdef TARGET_ARM - struct uc_struct *uc = d->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = d->uc; /* Wildly overreserve - it doesn't matter much. */ phys_map_node_reserve(d, &d->map, 3 * P_L2_LEVELS); @@ -254,9 +252,7 @@ static inline bool section_covers_addr(const MemoryRegionSection *section, static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr) { -#ifdef TARGET_ARM - struct uc_struct *uc = d->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = d->uc; PhysPageEntry lp = d->phys_map, *p; Node *nodes = d->map.nodes; MemoryRegionSection *sections = d->map.sections; @@ -283,9 +279,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, bool resolve_subpage) { -#ifdef TARGET_ARM - struct uc_struct *uc = d->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = d->uc; MemoryRegionSection *section = d->mru_section; subpage_t *subpage; @@ -1421,9 +1415,7 @@ static uint16_t dummy_section(struct uc_struct *uc, PhysPageMap *map, FlatView * MemoryRegionSection *iotlb_to_section(CPUState *cpu, hwaddr index, MemTxAttrs attrs) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; int asidx = cpu_asidx_from_attrs(cpu, attrs); CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; AddressSpaceDispatch *d = cpuas->memory_dispatch; @@ -1459,7 +1451,7 @@ AddressSpaceDispatch *address_space_dispatch_new(struct uc_struct *uc, FlatView void address_space_dispatch_clear(AddressSpaceDispatch *d) { MemoryRegionSection *section; - struct uc_struct *uc = d->uc; + UNICORN_UNUSED struct uc_struct *uc = d->uc; while (d->map.sections_nb > 0) { d->map.sections_nb--; section = &d->map.sections[d->map.sections_nb]; @@ -1891,7 +1883,7 @@ void *address_space_map(AddressSpace *as, MemoryRegion *mr; void *ptr; FlatView *fv; - struct uc_struct *uc = as->uc; + UNICORN_UNUSED struct uc_struct *uc = as->uc; if (len == 0) { return NULL; @@ -2020,9 +2012,7 @@ static inline MemoryRegion *address_space_translate_cached( int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, void *ptr, target_ulong len, bool is_write) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; hwaddr phys_addr; target_ulong l, page; uint8_t *buf = ptr; @@ -2030,6 +2020,7 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, while (len > 0) { int asidx; MemTxAttrs attrs; + MemTxResult res; page = addr & TARGET_PAGE_MASK; phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs); @@ -2042,12 +2033,15 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, l = len; phys_addr += (addr & ~TARGET_PAGE_MASK); if (is_write) { - address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, + res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf, l); } else { - address_space_read(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf, + res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf, l); } + if (res != MEMTX_OK) { + return -1; + } len -= l; buf += l; addr += l; diff --git a/qemu/fpu/softfloat-specialize.inc.c b/qemu/fpu/softfloat-specialize.inc.c index 5ab2fa1941..034d18199c 100644 --- a/qemu/fpu/softfloat-specialize.inc.c +++ b/qemu/fpu/softfloat-specialize.inc.c @@ -93,7 +93,7 @@ this code that are retained. * 2008 revision and backward compatibility with their original choice. * Thus for MIPS we must make the choice at runtime. */ -static inline flag snan_bit_is_one(float_status *status) +static inline bool snan_bit_is_one(float_status *status) { #if defined(TARGET_MIPS) return status->snan_bit_is_one; @@ -114,7 +114,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status) #ifdef NO_SIGNALING_NANS return false; #else - flag msb = extract64(frac, DECOMPOSED_BINARY_POINT - 1, 1); + bool msb = extract64(frac, DECOMPOSED_BINARY_POINT - 1, 1); return msb == snan_bit_is_one(status); #endif } @@ -236,7 +236,7 @@ void float_raise(uint8_t flags, float_status *status) | Internal canonical NaN format. *----------------------------------------------------------------------------*/ typedef struct { - flag sign; + bool sign; uint64_t high, low; } commonNaNT; @@ -245,7 +245,7 @@ typedef struct { | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float16_is_quiet_nan(float16 a_, float_status *status) +bool float16_is_quiet_nan(float16 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return float16_is_any_nan(a_); @@ -254,7 +254,7 @@ int float16_is_quiet_nan(float16 a_, float_status *status) if (snan_bit_is_one(status)) { return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); } else { - return ((a & ~0x8000) >= 0x7C80); + return ((a >> 9) & 0x3F) == 0x3F; } #endif } @@ -264,14 +264,14 @@ int float16_is_quiet_nan(float16 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float16_is_signaling_nan(float16 a_, float_status *status) +bool float16_is_signaling_nan(float16 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return 0; #else uint16_t a = float16_val(a_); if (snan_bit_is_one(status)) { - return ((a & ~0x8000) >= 0x7C80); + return ((a >> 9) & 0x3F) == 0x3F; } else { return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); } @@ -283,7 +283,7 @@ int float16_is_signaling_nan(float16 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float32_is_quiet_nan(float32 a_, float_status *status) +bool float32_is_quiet_nan(float32 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return float32_is_any_nan(a_); @@ -302,7 +302,7 @@ int float32_is_quiet_nan(float32 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float32_is_signaling_nan(float32 a_, float_status *status) +bool float32_is_signaling_nan(float32 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return 0; @@ -374,7 +374,7 @@ static float32 commonNaNToFloat32(commonNaNT a, float_status *status) *----------------------------------------------------------------------------*/ static int pickNaN(FloatClass a_cls, FloatClass b_cls, - flag aIsLargerSignificand) + bool aIsLargerSignificand) { #if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) /* ARM mandated NaN propagation rules (see FPProcessNaNs()), take @@ -584,7 +584,7 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls, static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status) { - flag aIsLargerSignificand; + bool aIsLargerSignificand; uint32_t av, bv; FloatClass a_cls, b_cls; @@ -637,7 +637,7 @@ static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float64_is_quiet_nan(float64 a_, float_status *status) +bool float64_is_quiet_nan(float64 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return float64_is_any_nan(a_); @@ -657,7 +657,7 @@ int float64_is_quiet_nan(float64 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float64_is_signaling_nan(float64 a_, float_status *status) +bool float64_is_signaling_nan(float64 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return 0; @@ -722,7 +722,7 @@ static float64 commonNaNToFloat64(commonNaNT a, float_status *status) static float64 propagateFloat64NaN(float64 a, float64 b, float_status *status) { - flag aIsLargerSignificand; + bool aIsLargerSignificand; uint64_t av, bv; FloatClass a_cls, b_cls; @@ -890,7 +890,7 @@ static floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status) floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status) { - flag aIsLargerSignificand; + bool aIsLargerSignificand; FloatClass a_cls, b_cls; /* This is not complete, but is good enough for pickNaN. */ @@ -939,7 +939,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float128_is_quiet_nan(float128 a, float_status *status) +bool float128_is_quiet_nan(float128 a, float_status *status) { #ifdef NO_SIGNALING_NANS return float128_is_any_nan(a); @@ -959,7 +959,7 @@ int float128_is_quiet_nan(float128 a, float_status *status) | signaling NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float128_is_signaling_nan(float128 a, float_status *status) +bool float128_is_signaling_nan(float128 a, float_status *status) { #ifdef NO_SIGNALING_NANS return 0; @@ -1038,7 +1038,7 @@ static float128 commonNaNToFloat128(commonNaNT a, float_status *status) static float128 propagateFloat128NaN(float128 a, float128 b, float_status *status) { - flag aIsLargerSignificand; + bool aIsLargerSignificand; FloatClass a_cls, b_cls; /* This is not complete, but is good enough for pickNaN. */ diff --git a/qemu/fpu/softfloat.c b/qemu/fpu/softfloat.c index 0e7938dc1c..930a2e352a 100644 --- a/qemu/fpu/softfloat.c +++ b/qemu/fpu/softfloat.c @@ -114,7 +114,7 @@ this code that are retained. * * The idea is thus to leverage the host FPU to (1) compute FP operations * and (2) identify whether FP exceptions occurred while avoiding - * expensive exception flag register accesses. + * expensive exception bool register accesses. * * An important optimization shown in the paper is that given that exception * flags are rarely cleared by the guest, we can avoid recomputing some flags. @@ -217,7 +217,7 @@ GEN_INPUT_FLUSH3(float64_input_flush3, float64) /* * Some targets clear the FP flags before most FP operations. This prevents - * the use of hardfloat, since hardfloat relies on the inexact flag being + * the use of hardfloat, since hardfloat relies on the inexact bool being * already set. */ #if defined(TARGET_PPC) || defined(__FAST_MATH__) @@ -342,12 +342,10 @@ static inline bool f64_is_inf(union_float64 a) return float64_is_infinity(a.s); } -/* Note: @fast_test and @post can be NULL */ static inline float32 float32_gen2(float32 xa, float32 xb, float_status *s, hard_f32_op2_fn hard, soft_f32_op2_fn soft, - f32_check_fn pre, f32_check_fn post, - f32_check_fn fast_test, soft_f32_op2_fn fast_op) + f32_check_fn pre, f32_check_fn post) { union_float32 ua, ub, ur; @@ -362,17 +360,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s, if (unlikely(!pre(ua, ub))) { goto soft; } - if (fast_test && fast_test(ua, ub)) { - return fast_op(ua.s, ub.s, s); - } ur.h = hard(ua.h, ub.h); if (unlikely(f32_is_inf(ur))) { s->float_exception_flags |= float_flag_overflow; - } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { - if (post == NULL || post(ua, ub)) { - goto soft; - } + } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) { + goto soft; } return ur.s; @@ -383,8 +376,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s, static inline float64 float64_gen2(float64 xa, float64 xb, float_status *s, hard_f64_op2_fn hard, soft_f64_op2_fn soft, - f64_check_fn pre, f64_check_fn post, - f64_check_fn fast_test, soft_f64_op2_fn fast_op) + f64_check_fn pre, f64_check_fn post) { union_float64 ua, ub, ur; @@ -399,17 +391,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s, if (unlikely(!pre(ua, ub))) { goto soft; } - if (fast_test && fast_test(ua, ub)) { - return fast_op(ua.s, ub.s, s); - } ur.h = hard(ua.h, ub.h); if (unlikely(f64_is_inf(ur))) { s->float_exception_flags |= float_flag_overflow; - } else if (unlikely(fabs(ur.h) <= DBL_MIN)) { - if (post == NULL || post(ua, ub)) { - goto soft; - } + } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) { + goto soft; } return ur.s; @@ -439,7 +426,7 @@ static inline int extractFloat32Exp(float32 a) | Returns the sign bit of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloat32Sign(float32 a) +static inline bool extractFloat32Sign(float32 a) { return float32_val(a) >> 31; } @@ -466,7 +453,7 @@ static inline int extractFloat64Exp(float64 a) | Returns the sign bit of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloat64Sign(float64 a) +static inline bool extractFloat64Sign(float64 a) { return float64_val(a) >> 63; } @@ -786,8 +773,7 @@ static FloatParts round_canonical(FloatParts p, float_status *s, p.cls = float_class_zero; goto do_zero; } else { - bool is_tiny = (s->float_detect_tininess - == float_tininess_before_rounding) + bool is_tiny = s->tininess_before_rounding || (exp < 0) || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT); @@ -802,6 +788,8 @@ static FloatParts round_canonical(FloatParts p, float_status *s, case float_round_to_odd: inc = frac & frac_lsb ? 0 : round_mask; break; + default: + break; } flags |= float_flag_inexact; frac += inc; @@ -1149,7 +1137,7 @@ static double hard_f64_sub(double a, double b) return a - b; } -static bool f32_addsub_post(union_float32 a, union_float32 b) +static bool f32_addsubmul_post(union_float32 a, union_float32 b) { if (QEMU_HARDFLOAT_2F32_USE_FP) { return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); @@ -1157,7 +1145,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b) return !(float32_is_zero(a.s) && float32_is_zero(b.s)); } -static bool f64_addsub_post(union_float64 a, union_float64 b) +static bool f64_addsubmul_post(union_float64 a, union_float64 b) { if (QEMU_HARDFLOAT_2F64_USE_FP) { return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); @@ -1170,14 +1158,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s, hard_f32_op2_fn hard, soft_f32_op2_fn soft) { return float32_gen2(a, b, s, hard, soft, - f32_is_zon2, f32_addsub_post, NULL, NULL); + f32_is_zon2, f32_addsubmul_post); } static float64 float64_addsub(float64 a, float64 b, float_status *s, hard_f64_op2_fn hard, soft_f64_op2_fn soft) { return float64_gen2(a, b, s, hard, soft, - f64_is_zon2, f64_addsub_post, NULL, NULL); + f64_is_zon2, f64_addsubmul_post); } float32 QEMU_FLATTEN @@ -1294,42 +1282,18 @@ static double hard_f64_mul(double a, double b) return a * b; } -static bool f32_mul_fast_test(union_float32 a, union_float32 b) -{ - return float32_is_zero(a.s) || float32_is_zero(b.s); -} - -static bool f64_mul_fast_test(union_float64 a, union_float64 b) -{ - return float64_is_zero(a.s) || float64_is_zero(b.s); -} - -static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s) -{ - bool signbit = float32_is_neg(a) ^ float32_is_neg(b); - - return float32_set_sign(float32_zero, signbit); -} - -static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s) -{ - bool signbit = float64_is_neg(a) ^ float64_is_neg(b); - - return float64_set_sign(float64_zero, signbit); -} - float32 QEMU_FLATTEN float32_mul(float32 a, float32 b, float_status *s) { return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul, - f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op); + f32_is_zon2, f32_addsubmul_post); } float64 QEMU_FLATTEN float64_mul(float64 a, float64 b, float_status *s) { return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul, - f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op); + f64_is_zon2, f64_addsubmul_post); } /* @@ -1872,14 +1836,14 @@ float32 QEMU_FLATTEN float32_div(float32 a, float32 b, float_status *s) { return float32_gen2(a, b, s, hard_f32_div, soft_f32_div, - f32_div_pre, f32_div_post, NULL, NULL); + f32_div_pre, f32_div_post); } float64 QEMU_FLATTEN float64_div(float64 a, float64 b, float_status *s) { return float64_gen2(a, b, s, hard_f64_div, soft_f64_div, - f64_div_pre, f64_div_post, NULL, NULL); + f64_div_pre, f64_div_post); } /* @@ -2004,7 +1968,7 @@ float32 float64_to_float32(float64 a, float_status *s) * Arithmetic. */ -static FloatParts round_to_int(FloatParts a, int rmode, +static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode, int scale, float_status *s) { switch (a.cls) { @@ -2139,7 +2103,7 @@ float64 float64_round_to_int(float64 a, float_status *s) * is returned. */ -static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale, +static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode, int scale, int64_t min, int64_t max, float_status *s) { @@ -2191,63 +2155,63 @@ static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale, } } -int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale, +int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float16_unpack_canonical(a, s), rmode, scale, INT16_MIN, INT16_MAX, s); } -int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale, +int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float16_unpack_canonical(a, s), rmode, scale, INT32_MIN, INT32_MAX, s); } -int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale, +int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float16_unpack_canonical(a, s), rmode, scale, INT64_MIN, INT64_MAX, s); } -int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale, +int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float32_unpack_canonical(a, s), rmode, scale, INT16_MIN, INT16_MAX, s); } -int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale, +int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float32_unpack_canonical(a, s), rmode, scale, INT32_MIN, INT32_MAX, s); } -int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale, +int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float32_unpack_canonical(a, s), rmode, scale, INT64_MIN, INT64_MAX, s); } -int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale, +int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float64_unpack_canonical(a, s), rmode, scale, INT16_MIN, INT16_MAX, s); } -int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale, +int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float64_unpack_canonical(a, s), rmode, scale, INT32_MIN, INT32_MAX, s); } -int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale, +int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float64_unpack_canonical(a, s), @@ -2357,7 +2321,7 @@ int64_t float64_to_int64_round_to_zero(float64 a, float_status *s) * flag. */ -static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale, +static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode, int scale, uint64_t max, float_status *s) { int orig_flags = get_float_exception_flags(s); @@ -2404,63 +2368,63 @@ static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale, } } -uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale, +uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float16_unpack_canonical(a, s), rmode, scale, UINT16_MAX, s); } -uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale, +uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float16_unpack_canonical(a, s), rmode, scale, UINT32_MAX, s); } -uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale, +uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float16_unpack_canonical(a, s), rmode, scale, UINT64_MAX, s); } -uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale, +uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float32_unpack_canonical(a, s), rmode, scale, UINT16_MAX, s); } -uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale, +uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float32_unpack_canonical(a, s), rmode, scale, UINT32_MAX, s); } -uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale, +uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float32_unpack_canonical(a, s), rmode, scale, UINT64_MAX, s); } -uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale, +uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float64_unpack_canonical(a, s), rmode, scale, UINT16_MAX, s); } -uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale, +uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float64_unpack_canonical(a, s), rmode, scale, UINT32_MAX, s); } -uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale, +uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float64_unpack_canonical(a, s), @@ -2934,8 +2898,8 @@ MINMAX(64, maxnummag, false, true, true) #undef MINMAX /* Floating point compare */ -static int compare_floats(FloatParts a, FloatParts b, bool is_quiet, - float_status *s) +static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet, + float_status *s) { if (is_nan(a.cls) || is_nan(b.cls)) { if (!is_quiet || @@ -3006,17 +2970,17 @@ COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64) #undef COMPARE -int float16_compare(float16 a, float16 b, float_status *s) +FloatRelation float16_compare(float16 a, float16 b, float_status *s) { return soft_f16_compare(a, b, false, s); } -int float16_compare_quiet(float16 a, float16 b, float_status *s) +FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s) { return soft_f16_compare(a, b, true, s); } -static int QEMU_FLATTEN +static FloatRelation QEMU_FLATTEN f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s) { union_float32 ua, ub; @@ -3045,17 +3009,17 @@ f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s) return soft_f32_compare(ua.s, ub.s, is_quiet, s); } -int float32_compare(float32 a, float32 b, float_status *s) +FloatRelation float32_compare(float32 a, float32 b, float_status *s) { return f32_compare(a, b, false, s); } -int float32_compare_quiet(float32 a, float32 b, float_status *s) +FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s) { return f32_compare(a, b, true, s); } -static int QEMU_FLATTEN +static FloatRelation QEMU_FLATTEN f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s) { union_float64 ua, ub; @@ -3084,12 +3048,12 @@ f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s) return soft_f64_compare(ua.s, ub.s, is_quiet, s); } -int float64_compare(float64 a, float64 b, float_status *s) +FloatRelation float64_compare(float64 a, float64 b, float_status *s) { return f64_compare(a, b, false, s); } -int float64_compare_quiet(float64 a, float64 b, float_status *s) +FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s) { return f64_compare(a, b, true, s); } @@ -3420,10 +3384,10 @@ float64 float64_squash_input_denormal(float64 a, float_status *status) | positive or negative integer is returned. *----------------------------------------------------------------------------*/ -static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status) +static int32_t roundAndPackInt32(bool zSign, uint64_t absZ, float_status *status) { int8_t roundingMode; - flag roundNearestEven; + bool roundNearestEven; int8_t roundIncrement, roundBits; int32_t z; @@ -3451,7 +3415,9 @@ static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status } roundBits = absZ & 0x7F; absZ = ( absZ + roundIncrement )>>7; - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x40) && roundNearestEven) { + absZ &= ~1; + } z = absZ; if ( zSign ) z = - z; if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { @@ -3477,11 +3443,11 @@ static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status | returned. *----------------------------------------------------------------------------*/ -static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, +static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1, float_status *status) { int8_t roundingMode; - flag roundNearestEven, increment; + bool roundNearestEven, increment; int64_t z; roundingMode = status->float_rounding_mode; @@ -3509,7 +3475,9 @@ static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, if ( increment ) { ++absZ0; if ( absZ0 == 0 ) goto overflow; - absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven ); + if (!(absZ1 << 1) && roundNearestEven) { + absZ0 &= ~1; + } } z = absZ0; if ( zSign ) z = - z; @@ -3535,11 +3503,11 @@ static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, | exception is raised and the largest unsigned integer is returned. *----------------------------------------------------------------------------*/ -static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0, +static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0, uint64_t absZ1, float_status *status) { int8_t roundingMode; - flag roundNearestEven, increment; + bool roundNearestEven, increment; roundingMode = status->float_rounding_mode; roundNearestEven = (roundingMode == float_round_nearest_even); @@ -3569,7 +3537,9 @@ static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0, float_raise(float_flag_invalid, status); return UINT64_MAX; } - absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven); + if (!(absZ1 << 1) && roundNearestEven) { + absZ0 &= ~1; + } } if (zSign && absZ0) { @@ -3623,13 +3593,13 @@ static void | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, +static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, float_status *status) { int8_t roundingMode; - flag roundNearestEven; + bool roundNearestEven; int8_t roundIncrement, roundBits; - flag isTiny; + bool isTiny; roundingMode = status->float_rounding_mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); @@ -3670,11 +3640,9 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, float_raise(float_flag_output_denormal, status); return packFloat32(zSign, 0, 0); } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ( zSig + roundIncrement < 0x80000000 ); + isTiny = status->tininess_before_rounding + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); shift32RightJamming( zSig, - zExp, &zSig ); zExp = 0; roundBits = zSig & 0x7F; @@ -3694,7 +3662,9 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, status->float_exception_flags |= float_flag_inexact; } zSig = ( zSig + roundIncrement )>>7; - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x40) && roundNearestEven) { + zSig &= ~1; + } if ( zSig == 0 ) zExp = 0; return packFloat32( zSign, zExp, zSig ); @@ -3710,7 +3680,7 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, *----------------------------------------------------------------------------*/ static float32 - normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, + normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, float_status *status) { int8_t shiftCount; @@ -3750,7 +3720,7 @@ static void | significand. *----------------------------------------------------------------------------*/ -static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig) +static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig) { return make_float64( @@ -3780,13 +3750,13 @@ static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig) | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, +static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, float_status *status) { int8_t roundingMode; - flag roundNearestEven; + bool roundNearestEven; int roundIncrement, roundBits; - flag isTiny; + bool isTiny; roundingMode = status->float_rounding_mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); @@ -3826,11 +3796,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, float_raise(float_flag_output_denormal, status); return packFloat64(zSign, 0, 0); } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ( zSig + roundIncrement < UINT64_C(0x8000000000000000) ); + isTiny = status->tininess_before_rounding + || ( zExp < -1 ) + || ( zSig + roundIncrement < UINT64_C(0x8000000000000000) ); shift64RightJamming( zSig, - zExp, &zSig ); zExp = 0; roundBits = zSig & 0x3FF; @@ -3850,7 +3818,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, status->float_exception_flags |= float_flag_inexact; } zSig = ( zSig + roundIncrement )>>10; - zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x200) && roundNearestEven) { + zSig &= ~1; + } if ( zSig == 0 ) zExp = 0; return packFloat64( zSign, zExp, zSig ); @@ -3866,7 +3836,7 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, *----------------------------------------------------------------------------*/ static float64 - normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, + normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, float_status *status) { int8_t shiftCount; @@ -3918,12 +3888,12 @@ void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, +floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) { int8_t roundingMode; - flag roundNearestEven, increment, isTiny; + bool roundNearestEven, increment, isTiny; int64_t roundIncrement, roundMask, roundBits; roundingMode = status->float_rounding_mode; @@ -3969,11 +3939,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, float_raise(float_flag_output_denormal, status); return packFloatx80(zSign, 0, 0); } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < 0 ) - || ( zSig0 <= zSig0 + roundIncrement ); + isTiny = status->tininess_before_rounding + || ( zExp < 0 ) + || ( zSig0 <= zSig0 + roundIncrement ); shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); zExp = 0; roundBits = zSig0 & roundMask; @@ -4047,12 +4015,10 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, floatx80_infinity_low); } if ( zExp <= 0 ) { - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < 0 ) - || ! increment - || ( zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF) ); + isTiny = status->tininess_before_rounding + || ( zExp < 0 ) + || ! increment + || ( zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF) ); shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); zExp = 0; if (isTiny && zSig1) { @@ -4080,8 +4046,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, } if ( increment ) { ++zSig0; - zSig0 &= - ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + if (!(zSig1 << 1) && roundNearestEven) { + zSig0 &= ~1; + } if ( (int64_t) zSig0 < 0 ) zExp = 1; } return packFloatx80( zSign, zExp, zSig0 ); @@ -4097,7 +4064,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, zSig0 = UINT64_C(0x8000000000000000); } else { - zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + if (!(zSig1 << 1) && roundNearestEven) { + zSig0 &= ~1; + } } } else { @@ -4117,7 +4086,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, *----------------------------------------------------------------------------*/ floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, - flag zSign, int32_t zExp, + bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) { @@ -4176,10 +4145,10 @@ static inline int32_t extractFloat128Exp( float128 a ) | Returns the sign bit of the quadruple-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloat128Sign( float128 a ) +static inline bool extractFloat128Sign( float128 a ) { - return a.high>>63; + return a.high >> 63; } @@ -4238,7 +4207,7 @@ static void *----------------------------------------------------------------------------*/ static inline float128 - packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 ) + packFloat128( bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 ) { float128 z; @@ -4269,12 +4238,12 @@ static inline float128 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 roundAndPackFloat128(flag zSign, int32_t zExp, +static float128 roundAndPackFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2, float_status *status) { int8_t roundingMode; - flag roundNearestEven, increment, isTiny; + bool roundNearestEven, increment, isTiny; roundingMode = status->float_rounding_mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); @@ -4331,17 +4300,12 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp, float_raise(float_flag_output_denormal, status); return packFloat128(zSign, 0, 0, 0); } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ! increment - || lt128( - zSig0, - zSig1, - UINT64_C(0x0001FFFFFFFFFFFF), - UINT64_C(0xFFFFFFFFFFFFFFFF) - ); + isTiny = status->tininess_before_rounding + || ( zExp < -1 ) + || ! increment + || lt128(zSig0, zSig1, + UINT64_C(0x0001FFFFFFFFFFFF), + UINT64_C(0xFFFFFFFFFFFFFFFF)); shift128ExtraRightJamming( zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); zExp = 0; @@ -4375,7 +4339,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp, } if ( increment ) { add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); - zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); + if ((zSig2 + zSig2 == 0) && roundNearestEven) { + zSig1 &= ~1; + } } else { if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; @@ -4394,7 +4360,7 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp, | point exponent. *----------------------------------------------------------------------------*/ -static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp, +static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) { @@ -4430,7 +4396,7 @@ static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp, floatx80 int32_to_floatx80(int32_t a, float_status *status) { - flag zSign; + bool zSign; uint32_t absA; int8_t shiftCount; uint64_t zSig; @@ -4452,7 +4418,7 @@ floatx80 int32_to_floatx80(int32_t a, float_status *status) float128 int32_to_float128(int32_t a, float_status *status) { - flag zSign; + bool zSign; uint32_t absA; int8_t shiftCount; uint64_t zSig0; @@ -4475,7 +4441,7 @@ float128 int32_to_float128(int32_t a, float_status *status) floatx80 int64_to_floatx80(int64_t a, float_status *status) { - flag zSign; + bool zSign; uint64_t absA; int8_t shiftCount; @@ -4495,7 +4461,7 @@ floatx80 int64_to_floatx80(int64_t a, float_status *status) float128 int64_to_float128(int64_t a, float_status *status) { - flag zSign; + bool zSign; uint64_t absA; int8_t shiftCount; int32_t zExp; @@ -4543,7 +4509,7 @@ float128 uint64_to_float128(uint64_t a, float_status *status) floatx80 float32_to_floatx80(float32 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint32_t aSig; @@ -4553,7 +4519,8 @@ floatx80 float32_to_floatx80(float32 a, float_status *status) aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { if (aSig) { - return commonNaNToFloatx80(float32ToCommonNaN(a, status), status); + floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status), status); + return floatx80_silence_nan(res, status); } return packFloatx80(aSign, floatx80_infinity_high, @@ -4577,7 +4544,7 @@ floatx80 float32_to_floatx80(float32 a, float_status *status) float128 float32_to_float128(float32 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint32_t aSig; @@ -4608,7 +4575,7 @@ float128 float32_to_float128(float32 a, float_status *status) float32 float32_rem(float32 a, float32 b, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int aExp, bExp, expDiff; uint32_t aSig, bSig; uint32_t q; @@ -4751,7 +4718,7 @@ static const float64 float32_exp2_coefficients[15] = float32 float32_exp2(float32 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint32_t aSig; float64 r, x, xn; @@ -4801,7 +4768,7 @@ float32 float32_exp2(float32 a, float_status *status) *----------------------------------------------------------------------------*/ float32 float32_log2(float32 a, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int aExp; uint32_t aSig, zSig, i; @@ -4848,222 +4815,6 @@ float32 float32_log2(float32 a, float_status *status) return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status); } -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_eq(float32 a, float32 b, float_status *status) -{ - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - av = float32_val(a); - bv = float32_val(b); - return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_le(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_lt(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_unordered(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_eq_quiet(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return ( float32_val(a) == float32_val(b) ) || - ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 ); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_le_quiet(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_lt_quiet(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_unordered_quiet(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - /*---------------------------------------------------------------------------- | Returns the result of converting the double-precision floating-point value | `a' to the extended double-precision floating-point format. The conversion @@ -5073,7 +4824,7 @@ int float32_unordered_quiet(float32 a, float32 b, float_status *status) floatx80 float64_to_floatx80(float64 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint64_t aSig; @@ -5083,7 +4834,8 @@ floatx80 float64_to_floatx80(float64 a, float_status *status) aSign = extractFloat64Sign( a ); if ( aExp == 0x7FF ) { if (aSig) { - return commonNaNToFloatx80(float64ToCommonNaN(a, status), status); + floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status), status); + return floatx80_silence_nan(res, status); } return packFloatx80(aSign, floatx80_infinity_high, @@ -5108,7 +4860,7 @@ floatx80 float64_to_floatx80(float64 a, float_status *status) float128 float64_to_float128(float64 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint64_t aSig, zSig0, zSig1; @@ -5141,7 +4893,7 @@ float128 float64_to_float128(float64 a, float_status *status) float64 float64_rem(float64 a, float64 b, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int aExp, bExp, expDiff; uint64_t aSig, bSig; uint64_t q, alternateASig; @@ -5236,7 +4988,7 @@ float64 float64_rem(float64 a, float64 b, float_status *status) *----------------------------------------------------------------------------*/ float64 float64_log2(float64 a, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int aExp; uint64_t aSig, aSig0, aSig1, zSig, i; a = float64_squash_input_denormal(a, status); @@ -5283,361 +5035,141 @@ float64 float64_log2(float64 a, float_status *status) } /*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. The invalid exception is raised -| if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, the +| largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int float64_eq(float64 a, float64 b, float_status *status) +int32_t floatx80_to_int32(floatx80 a, float_status *status) { - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); + bool aSign; + int32_t aExp, shiftCount; + uint64_t aSig; - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { + if (floatx80_invalid_encoding(a)) { float_raise(float_flag_invalid, status); - return 0; + return 1 << 31; } - av = float64_val(a); - bv = float64_val(b); - return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32(aSign, aSig, status); } /*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN, the largest positive integer is returned. +| Otherwise, if the conversion overflows, the largest integer with the same +| sign as `a' is returned. *----------------------------------------------------------------------------*/ -int float64_le(float64 a, float64 b, float_status *status) +int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) { - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); + bool aSign; + int32_t aExp, shiftCount; + uint64_t aSig, savedASig; + int32_t z; - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { + if (floatx80_invalid_encoding(a)) { float_raise(float_flag_invalid, status); + return 1 << 31; + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( 0x401E < aExp ) { + if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; + goto invalid; + } + else if ( aExp < 0x3FFF ) { + if (aExp || aSig) { + status->float_exception_flags |= float_flag_inexact; + } return 0; } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); + shiftCount = 0x403E - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise(float_flag_invalid, status); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return z; } /*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 64-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, +| the largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int float64_lt(float64 a, float64 b, float_status *status) +int64_t floatx80_to_int64(floatx80 a, float_status *status) { - flag aSign, bSign; - uint64_t av, bv; + bool aSign; + int32_t aExp, shiftCount; + uint64_t aSig, aSigExtra; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { + if (floatx80_invalid_encoding(a)) { float_raise(float_flag_invalid, status); - return 0; + return 1ULL << 63; } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = 0x403E - aExp; + if ( shiftCount <= 0 ) { + if ( shiftCount ) { + float_raise(float_flag_invalid, status); + if (!aSign || floatx80_is_any_nan(a)) { + return INT64_MAX; + } + return INT64_MIN; + } + aSigExtra = 0; + } + else { + shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); + } + return roundAndPackInt64(aSign, aSig, aSigExtra, status); } /*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 64-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN, the largest positive integer is returned. +| Otherwise, if the conversion overflows, the largest integer with the same +| sign as `a' is returned. *----------------------------------------------------------------------------*/ -int float64_unordered(float64 a, float64 b, float_status *status) +int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) { - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception.The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_eq_quiet(float64 a, float64 b, float_status *status) -{ - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - av = float64_val(a); - bv = float64_val(b); - return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_le_quiet(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_lt_quiet(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_unordered_quiet(float64 a, float64 b, float_status *status) -{ - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, the -| largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t floatx80_to_int32(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32(aSign, aSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig, savedASig; - int32_t z; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if (aExp || aSig) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - shiftCount = 0x403E - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, -| the largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t floatx80_to_int64(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig, aSigExtra; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1ULL << 63; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = 0x403E - aExp; - if ( shiftCount <= 0 ) { - if ( shiftCount ) { - float_raise(float_flag_invalid, status); - if (!aSign || floatx80_is_any_nan(a)) { - return INT64_MAX; - } - return INT64_MIN; - } - aSigExtra = 0; - } - else { - shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); - } - return roundAndPackInt64(aSign, aSig, aSigExtra, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) -{ - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig; int64_t z; @@ -5684,7 +5216,7 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) float32 floatx80_to_float32(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -5697,7 +5229,8 @@ float32 floatx80_to_float32(floatx80 a, float_status *status) aSign = extractFloatx80Sign( a ); if ( aExp == 0x7FFF ) { if ( (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status); + float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status), status); + return float32_silence_nan(res, status); } return packFloat32( aSign, 0xFF, 0 ); } @@ -5716,7 +5249,7 @@ float32 floatx80_to_float32(floatx80 a, float_status *status) float64 floatx80_to_float64(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig, zSig; @@ -5729,7 +5262,8 @@ float64 floatx80_to_float64(floatx80 a, float_status *status) aSign = extractFloatx80Sign( a ); if ( aExp == 0x7FFF ) { if ( (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status); + float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status), status); + return float64_silence_nan(res, status); } return packFloat64( aSign, 0x7FF, 0 ); } @@ -5748,7 +5282,7 @@ float64 floatx80_to_float64(floatx80 a, float_status *status) float128 floatx80_to_float128(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint64_t aSig, zSig0, zSig1; @@ -5760,7 +5294,8 @@ float128 floatx80_to_float128(floatx80 a, float_status *status) aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status); + float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status), status); + return float128_silence_nan(res, status); } shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); return packFloat128( aSign, aExp, zSig0, zSig1 ); @@ -5792,7 +5327,7 @@ floatx80 floatx80_round(floatx80 a, float_status *status) floatx80 floatx80_round_to_int(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t lastBitMask, roundBitsMask; floatx80 z; @@ -5810,7 +5345,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status) } if ( aExp < 0x3FFF ) { if ( ( aExp == 0 ) - && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { + && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) { return a; } status->float_exception_flags |= float_flag_inexact; @@ -5837,6 +5372,10 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status) return aSign ? packFloatx80( 1, 0, 0 ) : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000)); + case float_round_to_zero: + break; + default: + g_assert_not_reached(); } return packFloatx80( aSign, 0, 0 ); } @@ -5889,7 +5428,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, +static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, float_status *status) { int32_t aExp, bExp, zExp; @@ -5935,6 +5474,12 @@ static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, zSig1 = 0; zSig0 = aSig + bSig; if ( aExp == 0 ) { + if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) { + /* At least one of the values is a pseudo-denormal, + * and there is a carry out of the result. */ + zExp = 1; + goto shiftRight1; + } if (zSig0 == 0) { return packFloatx80(zSign, 0, 0); } @@ -5963,7 +5508,7 @@ static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, +static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, float_status *status) { int32_t aExp, bExp, zExp; @@ -6032,7 +5577,7 @@ static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { float_raise(float_flag_invalid, status); @@ -6057,7 +5602,7 @@ floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { float_raise(float_flag_invalid, status); @@ -6082,7 +5627,7 @@ floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign, zSign; + bool aSign, bSign, zSign; int32_t aExp, bExp, zExp; uint64_t aSig, bSig, zSig0, zSig1; @@ -6144,7 +5689,7 @@ floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign, zSign; + bool aSign, bSign, zSign; int32_t aExp, bExp, zExp; uint64_t aSig, bSig, zSig0, zSig1; uint64_t rem0, rem1, rem2, term0, term1, term2; @@ -6226,13 +5771,16 @@ floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) /*---------------------------------------------------------------------------- | Returns the remainder of the extended double-precision floating-point value | `a' with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic, +| if 'mod' is false; if 'mod' is true, return the remainder based on truncating +| the quotient toward zero instead. '*quotient' is set to the low 64 bits of +| the absolute value of the integer quotient. *----------------------------------------------------------------------------*/ -floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) +floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient, float_status *status) { - flag aSign, zSign; - int32_t aExp, bExp, expDiff; + bool aSign, zSign; + int32_t aExp, bExp, expDiff, aExpOrig; uint64_t aSig0, aSig1, bSig; uint64_t q, term0, term1, alternateASig0, alternateASig1; @@ -6241,7 +5789,7 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) return floatx80_default_nan(status); } aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); + aExpOrig = aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); bSig = extractFloatx80Frac( b ); bExp = extractFloatx80Exp( b ); @@ -6256,6 +5804,13 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) if ((uint64_t)(bSig << 1)) { return propagateFloatx80NaN(a, b, status); } + if (aExp == 0 && aSig0 >> 63) { + /* + * Pseudo-denormal argument must be returned in normalized + * form. + */ + return packFloatx80(aSign, 1, aSig0); + } return a; } if ( bExp == 0 ) { @@ -6267,19 +5822,26 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } if ( aExp == 0 ) { - if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; + if ( aSig0 == 0 ) return a; normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); } - bSig |= UINT64_C(0x8000000000000000); zSign = aSign; expDiff = aExp - bExp; aSig1 = 0; if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; + if ( mod || expDiff < -1 ) { + if (aExp == 1 && aExpOrig == 0) { + /* + * Pseudo-denormal argument must be returned in + * normalized form. + */ + return packFloatx80(aSign, aExp, aSig0); + } + } shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); expDiff = 0; } - q = ( bSig <= aSig0 ); + *quotient = q = ( bSig <= aSig0 ); if ( q ) aSig0 -= bSig; expDiff -= 64; while ( 0 < expDiff ) { @@ -6289,6 +5851,8 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); expDiff -= 62; + *quotient <<= 62; + *quotient += q; } expDiff += 64; if ( 0 < expDiff ) { @@ -6302,19 +5866,28 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) ++q; sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); } + if (expDiff < 64) { + *quotient <<= expDiff; + } else { + *quotient = 0; + } + *quotient += q; } else { term1 = 0; term0 = bSig; } - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( q & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; + if (!mod) { + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + ++*quotient; + } } return normalizeRoundAndPackFloatx80( @@ -6322,6 +5895,30 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) } +/*---------------------------------------------------------------------------- +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) +{ + uint64_t quotient; + return floatx80_modrem(a, b, false, "ient, status); +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b', with the quotient truncated +| toward zero. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) +{ + uint64_t quotient; + return floatx80_modrem(a, b, true, "ient, status); +} + /*---------------------------------------------------------------------------- | Returns the square root of the extended double-precision floating-point | value `a'. The operation is performed according to the IEC/IEEE Standard @@ -6330,7 +5927,7 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_sqrt(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, zExp; uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; @@ -6393,263 +5990,6 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *status) 0, zExp, zSig0, zSig1, status); } -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is equal -| to the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_eq(floatx80 a, floatx80 b, float_status *status) -{ - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than or equal to the corresponding value `b', and 0 otherwise. The -| invalid exception is raised if either operand is a NaN. The comparison is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_le(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_lt(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point values `a' and `b' -| cannot be compared, and 0 otherwise. The invalid exception is raised if -| either operand is a NaN. The comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -int floatx80_unordered(floatx80 a, floatx80 b, float_status *status) -{ - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status) -{ - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is less -| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs -| do not cause an exception. Otherwise, the comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is less -| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause -| an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point values `a' and `b' -| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception. -| The comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status) -{ - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 1; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - /*---------------------------------------------------------------------------- | Returns the result of converting the quadruple-precision floating-point | value `a' to the 32-bit two's complement integer format. The conversion @@ -6662,7 +6002,7 @@ int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status) int32_t float128_to_int32(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig0, aSig1; @@ -6691,7 +6031,7 @@ int32_t float128_to_int32(float128 a, float_status *status) int32_t float128_to_int32_round_to_zero(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig0, aSig1, savedASig; int32_t z; @@ -6741,7 +6081,7 @@ int32_t float128_to_int32_round_to_zero(float128 a, float_status *status) int64_t float128_to_int64(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig0, aSig1; @@ -6784,7 +6124,7 @@ int64_t float128_to_int64(float128 a, float_status *status) int64_t float128_to_int64_round_to_zero(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig0, aSig1; int64_t z; @@ -6849,7 +6189,7 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *status) uint64_t float128_to_uint64(float128 a, float_status *status) { - flag aSign; + bool aSign; int aExp; int shiftCount; uint64_t aSig0, aSig1; @@ -6960,7 +6300,7 @@ uint32_t float128_to_uint32(float128 a, float_status *status) float32 float128_to_float32(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig0, aSig1; uint32_t zSig; @@ -6995,7 +6335,7 @@ float32 float128_to_float32(float128 a, float_status *status) float64 float128_to_float64(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig0, aSig1; @@ -7028,7 +6368,7 @@ float64 float128_to_float64(float128 a, float_status *status) floatx80 float128_to_floatx80(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig0, aSig1; @@ -7038,7 +6378,8 @@ floatx80 float128_to_floatx80(float128 a, float_status *status) aSign = extractFloat128Sign( a ); if ( aExp == 0x7FFF ) { if ( aSig0 | aSig1 ) { - return commonNaNToFloatx80(float128ToCommonNaN(a, status), status); + floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status), status); + return floatx80_silence_nan(res, status); } return packFloatx80(aSign, floatx80_infinity_high, floatx80_infinity_low); @@ -7064,7 +6405,7 @@ floatx80 float128_to_floatx80(float128 a, float_status *status) float128 float128_round_to_int(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t lastBitMask, roundBitsMask; float128 z; @@ -7161,6 +6502,8 @@ float128 float128_round_to_int(float128 a, float_status *status) case float_round_to_odd: return packFloat128(aSign, 0x3FFF, 0, 0); + case float_round_to_zero: + break; } return packFloat128( aSign, 0, 0, 0 ); } @@ -7219,7 +6562,7 @@ float128 float128_round_to_int(float128 a, float_status *status) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 addFloat128Sigs(float128 a, float128 b, flag zSign, +static float128 addFloat128Sigs(float128 a, float128 b, bool zSign, float_status *status) { int32_t aExp, bExp, zExp; @@ -7310,7 +6653,7 @@ static float128 addFloat128Sigs(float128 a, float128 b, flag zSign, | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 subFloat128Sigs(float128 a, float128 b, flag zSign, +static float128 subFloat128Sigs(float128 a, float128 b, bool zSign, float_status *status) { int32_t aExp, bExp, zExp; @@ -7398,7 +6741,7 @@ static float128 subFloat128Sigs(float128 a, float128 b, flag zSign, float128 float128_add(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; aSign = extractFloat128Sign( a ); bSign = extractFloat128Sign( b ); @@ -7419,7 +6762,7 @@ float128 float128_add(float128 a, float128 b, float_status *status) float128 float128_sub(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; aSign = extractFloat128Sign( a ); bSign = extractFloat128Sign( b ); @@ -7440,7 +6783,7 @@ float128 float128_sub(float128 a, float128 b, float_status *status) float128 float128_mul(float128 a, float128 b, float_status *status) { - flag aSign, bSign, zSign; + bool aSign, bSign, zSign; int32_t aExp, bExp, zExp; uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; @@ -7503,7 +6846,7 @@ float128 float128_mul(float128 a, float128 b, float_status *status) float128 float128_div(float128 a, float128 b, float_status *status) { - flag aSign, bSign, zSign; + bool aSign, bSign, zSign; int32_t aExp, bExp, zExp; uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; @@ -7590,7 +6933,7 @@ float128 float128_div(float128 a, float128 b, float_status *status) float128 float128_rem(float128 a, float128 b, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int32_t aExp, bExp, expDiff; uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; uint64_t allZero, alternateASig0, alternateASig1, sigMean1; @@ -7697,7 +7040,7 @@ float128 float128_rem(float128 a, float128 b, float_status *status) float128 float128_sqrt(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, zExp; uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; @@ -7757,248 +7100,10 @@ float128 float128_sqrt(float128 a, float_status *status) } -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_eq(float128 a, float128 b, float_status *status) -{ - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_le(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_lt(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_unordered(float128 a, float128 b, float_status *status) -{ - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_eq_quiet(float128 a, float128 b, float_status *status) -{ - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_le_quiet(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_lt_quiet(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_unordered_quiet(float128 a, float128 b, float_status *status) -{ - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - static inline int floatx80_compare_internal(floatx80 a, floatx80 b, int is_quiet, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { float_raise(float_flag_invalid, status); @@ -8027,6 +7132,13 @@ static inline int floatx80_compare_internal(floatx80 a, floatx80 b, return 1 - (2 * aSign); } } else { + /* Normalize pseudo-denormals before comparison */ + if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) { + ++a.high; + } + if ((b.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) { + ++b.high; + } if (a.low == b.low && a.high == b.high) { return float_relation_equal; } else { @@ -8035,20 +7147,20 @@ static inline int floatx80_compare_internal(floatx80 a, floatx80 b, } } -int floatx80_compare(floatx80 a, floatx80 b, float_status *status) +FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status) { return floatx80_compare_internal(a, b, 0, status); } -int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status) +FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status) { return floatx80_compare_internal(a, b, 1, status); } -static inline int float128_compare_internal(float128 a, float128 b, - int is_quiet, float_status *status) +static inline FloatRelation float128_compare_internal(float128 a, float128 b, + bool is_quiet, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; if (( ( extractFloat128Exp( a ) == 0x7fff ) && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) || @@ -8079,19 +7191,19 @@ static inline int float128_compare_internal(float128 a, float128 b, } } -int float128_compare(float128 a, float128 b, float_status *status) +FloatRelation float128_compare(float128 a, float128 b, float_status *status) { return float128_compare_internal(a, b, 0, status); } -int float128_compare_quiet(float128 a, float128 b, float_status *status) +FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *status) { return float128_compare_internal(a, b, 1, status); } floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -8130,7 +7242,7 @@ floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) float128 float128_scalbn(float128 a, int n, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig0, aSig1; diff --git a/qemu/include/elf.h b/qemu/include/elf.h index 8fbfe60e09..5b06b55f28 100644 --- a/qemu/include/elf.h +++ b/qemu/include/elf.h @@ -160,6 +160,8 @@ typedef struct mips_elf_abiflags_v0 { #define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ +#define EM_AVR 83 /* AVR 8-bit microcontroller */ + #define EM_V850 87 /* NEC v850 */ #define EM_H8_300H 47 /* Hitachi H8/300H */ @@ -202,6 +204,8 @@ typedef struct mips_elf_abiflags_v0 { #define EM_MOXIE 223 /* Moxie processor family */ #define EM_MOXIE_OLD 0xFEED +#define EF_AVR_MACH 0x7F /* Mask for AVR e_flags to get core type */ + /* This is the info that is needed to parse the dynamic section of the file */ #define DT_NULL 0 #define DT_NEEDED 1 diff --git a/qemu/include/exec/cpu-all.h b/qemu/include/exec/cpu-all.h index ddac720740..48c7635daf 100644 --- a/qemu/include/exec/cpu-all.h +++ b/qemu/include/exec/cpu-all.h @@ -368,6 +368,7 @@ static inline bool tlb_hit(struct uc_struct *uc, target_ulong tlb_addr, target_u return tlb_hit_page(uc, tlb_addr, addr & TARGET_PAGE_MASK); } +/* Returns: 0 on success, -1 on error */ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, void *ptr, target_ulong len, bool is_write); diff --git a/qemu/include/exec/cpu-common.h b/qemu/include/exec/cpu-common.h index 28ba0e0e22..a532215518 100644 --- a/qemu/include/exec/cpu-common.h +++ b/qemu/include/exec/cpu-common.h @@ -31,9 +31,6 @@ typedef uintptr_t ram_addr_t; /* memory API */ -typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value); -typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr); - /* This should not be used by devices. */ ram_addr_t qemu_ram_addr_from_host(struct uc_struct *uc, void *ptr); RAMBlock *qemu_ram_block_from_host(struct uc_struct *uc, void *ptr, diff --git a/qemu/include/exec/cpu-defs.h b/qemu/include/exec/cpu-defs.h index 5c11015565..a7da99f7d0 100644 --- a/qemu/include/exec/cpu-defs.h +++ b/qemu/include/exec/cpu-defs.h @@ -96,8 +96,13 @@ typedef uint64_t target_ulong; * Skylake's Level-2 STLB has 16 1G entries. * Also, make sure we do not size the TLB past the guest's address space. */ -# define CPU_TLB_DYN_MAX_BITS \ +# ifdef TARGET_PAGE_BITS_VARY +# define CPU_TLB_DYN_MAX_BITS \ MIN(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS) +# else +# define CPU_TLB_DYN_MAX_BITS \ + MIN_CONST(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS) +# endif # endif typedef struct CPUTLBEntry { diff --git a/qemu/include/exec/cpu_ldst.h b/qemu/include/exec/cpu_ldst.h index b8482bced1..29c21c6678 100644 --- a/qemu/include/exec/cpu_ldst.h +++ b/qemu/include/exec/cpu_ldst.h @@ -25,13 +25,13 @@ * * The syntax for the accessors is: * - * load: cpu_ld{sign}{size}_{mmusuffix}(env, ptr) - * cpu_ld{sign}{size}_{mmusuffix}_ra(env, ptr, retaddr) - * cpu_ld{sign}{size}_mmuidx_ra(env, ptr, mmu_idx, retaddr) + * load: cpu_ld{sign}{size}{end}_{mmusuffix}(env, ptr) + * cpu_ld{sign}{size}{end}_{mmusuffix}_ra(env, ptr, retaddr) + * cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmu_idx, retaddr) * - * store: cpu_st{size}_{mmusuffix}(env, ptr, val) - * cpu_st{size}_{mmusuffix}_ra(env, ptr, val, retaddr) - * cpu_st{size}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr) + * store: cpu_st{size}{end}_{mmusuffix}(env, ptr, val) + * cpu_st{size}{end}_{mmusuffix}_ra(env, ptr, val, retaddr) + * cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr) * * sign is: * (empty): for 32 and 64 bit sizes @@ -44,6 +44,11 @@ * l: 32 bits * q: 64 bits * + * end is: + * (empty): for target native endian, or for 8 bit access + * _be: for forced big endian + * _le: for forced little endian + * * mmusuffix is one of the generic suffixes "data" or "code", or "mmuidx". * The "mmuidx" suffix carries an extra mmu_idx argument that specifies * the index to use; the "data" and "code" suffixes take the index from @@ -59,32 +64,58 @@ typedef target_ulong abi_ptr; #define TARGET_ABI_FMT_ptr TARGET_ABI_FMT_lx uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_lduw_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_ldl_data(CPUArchState *env, abi_ptr ptr); -uint64_t cpu_ldq_data(CPUArchState *env, abi_ptr ptr); int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr); -int cpu_ldsw_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint32_t cpu_lduw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint32_t cpu_ldl_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint64_t cpu_ldq_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -int cpu_ldsw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); +uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr); +uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr); +uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr); +uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr); +uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stw_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stl_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stq_data(CPUArchState *env, abi_ptr ptr, uint64_t val); + +void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val); + +void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val); void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, uintptr_t retaddr); -void cpu_stw_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr); -void cpu_stl_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr); -void cpu_stq_data_ra(CPUArchState *env, abi_ptr ptr, - uint64_t val, uintptr_t retaddr); + +void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t ra); + +void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t ra); + /* Needed for TCG_OVERSIZED_GUEST */ #include "tcg/tcg.h" @@ -98,9 +129,7 @@ static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, target_ulong addr) { -#ifdef TARGET_ARM - struct uc_struct *uc = env->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = env->uc; uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS; return (addr >> TARGET_PAGE_BITS) & size_mask; @@ -115,27 +144,90 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, int mmu_idx, uintptr_t ra); -uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, int mmu_idx, uintptr_t ra); -int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); + +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); + +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, int mmu_idx, uintptr_t retaddr); -void cpu_stw_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stl_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stq_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, - int mmu_idx, uintptr_t retaddr); +void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); + +void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); + +#ifdef TARGET_WORDS_BIGENDIAN +# define cpu_lduw_data cpu_lduw_be_data +# define cpu_ldsw_data cpu_ldsw_be_data +# define cpu_ldl_data cpu_ldl_be_data +# define cpu_ldq_data cpu_ldq_be_data +# define cpu_lduw_data_ra cpu_lduw_be_data_ra +# define cpu_ldsw_data_ra cpu_ldsw_be_data_ra +# define cpu_ldl_data_ra cpu_ldl_be_data_ra +# define cpu_ldq_data_ra cpu_ldq_be_data_ra +# define cpu_lduw_mmuidx_ra cpu_lduw_be_mmuidx_ra +# define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra +# define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra +# define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra +# define cpu_stw_data cpu_stw_be_data +# define cpu_stl_data cpu_stl_be_data +# define cpu_stq_data cpu_stq_be_data +# define cpu_stw_data_ra cpu_stw_be_data_ra +# define cpu_stl_data_ra cpu_stl_be_data_ra +# define cpu_stq_data_ra cpu_stq_be_data_ra +# define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra +# define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra +# define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra +#else +# define cpu_lduw_data cpu_lduw_le_data +# define cpu_ldsw_data cpu_ldsw_le_data +# define cpu_ldl_data cpu_ldl_le_data +# define cpu_ldq_data cpu_ldq_le_data +# define cpu_lduw_data_ra cpu_lduw_le_data_ra +# define cpu_ldsw_data_ra cpu_ldsw_le_data_ra +# define cpu_ldl_data_ra cpu_ldl_le_data_ra +# define cpu_ldq_data_ra cpu_ldq_le_data_ra +# define cpu_lduw_mmuidx_ra cpu_lduw_le_mmuidx_ra +# define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra +# define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra +# define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra +# define cpu_stw_data cpu_stw_le_data +# define cpu_stl_data cpu_stl_le_data +# define cpu_stq_data cpu_stq_le_data +# define cpu_stw_data_ra cpu_stw_le_data_ra +# define cpu_stl_data_ra cpu_stl_le_data_ra +# define cpu_stq_data_ra cpu_stq_le_data_ra +# define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra +# define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra +# define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra +#endif uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr); uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr); diff --git a/qemu/include/exec/exec-all.h b/qemu/include/exec/exec-all.h index 68c656787f..a717c75adc 100644 --- a/qemu/include/exec/exec-all.h +++ b/qemu/include/exec/exec-all.h @@ -108,6 +108,11 @@ void cpu_address_space_init(CPUState *cpu, int asidx, MemoryRegion *mr); * @cpu: CPU whose TLB should be initialized */ void tlb_init(CPUState *cpu); +/** + * tlb_destroy - destroy a CPU's TLB + * @cpu: CPU whose TLB should be destroyed + */ +void tlb_destroy(CPUState *cpu); /** * tlb_flush_page: * @cpu: CPU whose TLB should be flushed @@ -264,6 +269,23 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size); +/** + * probe_access: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @size: size of the access + * @access_type: read, write or execute permission + * @mmu_idx: MMU index to use for lookup + * @retaddr: return address for unwinding + * + * Look up the guest virtual address @addr. Raise an exception if the + * page does not satisfy @access_type. Raise an exception if the + * access (@addr, @size) hits a watchpoint. For writes, mark a clean + * page as dirty. + * + * Finally, return the host address for a page that is backed by RAM, + * or NULL if the page requires I/O. + */ void *probe_access(CPUArchState *env, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr); @@ -279,6 +301,28 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size, return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); } +/** + * probe_access_flags: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @access_type: read, write or execute permission + * @mmu_idx: MMU index to use for lookup + * @nonfault: suppress the fault + * @phost: return value for host address + * @retaddr: return address for unwinding + * + * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for + * the page, and storing the host address for RAM in @phost. + * + * If @nonfault is set, do not raise an exception but return TLB_INVALID_MASK. + * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned flags. + * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags. + * For simplicity, all "mmio-like" flags are folded to TLB_MMIO. + */ +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr); + #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ /* Estimated block size for TB allocation. */ diff --git a/qemu/include/fpu/softfloat-helpers.h b/qemu/include/fpu/softfloat-helpers.h index e0baf24c8f..9ddecba70f 100644 --- a/qemu/include/fpu/softfloat-helpers.h +++ b/qemu/include/fpu/softfloat-helpers.h @@ -53,12 +53,12 @@ this code that are retained. #include "fpu/softfloat-types.h" -static inline void set_float_detect_tininess(int val, float_status *status) +static inline void set_float_detect_tininess(bool val, float_status *status) { - status->float_detect_tininess = val; + status->tininess_before_rounding = val; } -static inline void set_float_rounding_mode(int val, float_status *status) +static inline void set_float_rounding_mode(FloatRoundMode val, float_status *status) { status->float_rounding_mode = val; } @@ -74,32 +74,32 @@ static inline void set_floatx80_rounding_precision(int val, status->floatx80_rounding_precision = val; } -static inline void set_flush_to_zero(flag val, float_status *status) +static inline void set_flush_to_zero(bool val, float_status *status) { status->flush_to_zero = val; } -static inline void set_flush_inputs_to_zero(flag val, float_status *status) +static inline void set_flush_inputs_to_zero(bool val, float_status *status) { status->flush_inputs_to_zero = val; } -static inline void set_default_nan_mode(flag val, float_status *status) +static inline void set_default_nan_mode(bool val, float_status *status) { status->default_nan_mode = val; } -static inline void set_snan_bit_is_one(flag val, float_status *status) +static inline void set_snan_bit_is_one(bool val, float_status *status) { status->snan_bit_is_one = val; } static inline int get_float_detect_tininess(float_status *status) { - return status->float_detect_tininess; + return status->tininess_before_rounding; } -static inline int get_float_rounding_mode(float_status *status) +static inline FloatRoundMode get_float_rounding_mode(float_status *status) { return status->float_rounding_mode; } @@ -114,17 +114,17 @@ static inline int get_floatx80_rounding_precision(float_status *status) return status->floatx80_rounding_precision; } -static inline flag get_flush_to_zero(float_status *status) +static inline bool get_flush_to_zero(float_status *status) { return status->flush_to_zero; } -static inline flag get_flush_inputs_to_zero(float_status *status) +static inline bool get_flush_inputs_to_zero(float_status *status) { return status->flush_inputs_to_zero; } -static inline flag get_default_nan_mode(float_status *status) +static inline bool get_default_nan_mode(float_status *status) { return status->default_nan_mode; } diff --git a/qemu/include/fpu/softfloat-macros.h b/qemu/include/fpu/softfloat-macros.h index afae4f7404..38d8c97dce 100644 --- a/qemu/include/fpu/softfloat-macros.h +++ b/qemu/include/fpu/softfloat-macros.h @@ -756,10 +756,10 @@ static inline uint32_t estimateSqrt32(int aExp, uint32_t a) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) +static inline bool eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { - return ( a0 == b0 ) && ( a1 == b1 ); + return a0 == b0 && a1 == b1; } @@ -769,10 +769,10 @@ static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) +static inline bool le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + return a0 < b0 || (a0 == b0 && a1 <= b1); } @@ -782,10 +782,10 @@ static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) | returns 0. *----------------------------------------------------------------------------*/ -static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) +static inline bool lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + return a0 < b0 || (a0 == b0 && a1 < b1); } @@ -795,10 +795,10 @@ static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) +static inline bool ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { - return ( a0 != b0 ) || ( a1 != b1 ); + return a0 != b0 || a1 != b1; } diff --git a/qemu/include/fpu/softfloat-types.h b/qemu/include/fpu/softfloat-types.h index 565dced559..00bc527d4a 100644 --- a/qemu/include/fpu/softfloat-types.h +++ b/qemu/include/fpu/softfloat-types.h @@ -82,12 +82,6 @@ this code that are retained. #include -/* This 'flag' type must be able to hold at least 0 and 1. It should - * probably be replaced with 'bool' but the uses would need to be audited - * to check that they weren't accidentally relying on it being a larger type. - */ -typedef uint8_t flag; - /* * Software IEC/IEEE floating-point types. */ @@ -124,16 +118,25 @@ typedef struct { * Software IEC/IEEE floating-point underflow tininess-detection mode. */ -enum { - float_tininess_after_rounding = 0, - float_tininess_before_rounding = 1 -}; +#define float_tininess_after_rounding false +#define float_tininess_before_rounding true /* *Software IEC/IEEE floating-point rounding mode. */ -enum { +#ifdef _MSC_VER +#define ENUM_PACKED \ + __pragma(pack(push, 1)) \ + enum +#define ENUM_PACKED_END \ + __pragma(pack(pop)) +#else +#define ENUM_PACKED enum __attribute__((packed)) +#define ENUM_PACKED_END +#endif + +typedef ENUM_PACKED { float_round_nearest_even = 0, float_round_down = 1, float_round_up = 2, @@ -141,7 +144,7 @@ enum { float_round_ties_away = 4, /* Not an IEEE rounding mode: round to the closest odd mantissa value */ float_round_to_odd = 5, -}; +} ENUM_PACKED_END FloatRoundMode; /* * Software IEC/IEEE floating-point exception flags. @@ -166,17 +169,17 @@ enum { */ typedef struct float_status { - signed char float_detect_tininess; - signed char float_rounding_mode; + FloatRoundMode float_rounding_mode; uint8_t float_exception_flags; signed char floatx80_rounding_precision; + bool tininess_before_rounding; /* should denormalised results go to zero and set the inexact flag? */ - flag flush_to_zero; + bool flush_to_zero; /* should denormalised inputs go to zero and set the input_denormal flag? */ - flag flush_inputs_to_zero; - flag default_nan_mode; + bool flush_inputs_to_zero; + bool default_nan_mode; /* not always used -- see snan_bit_is_one() in softfloat-specialize.h */ - flag snan_bit_is_one; + bool snan_bit_is_one; } float_status; #endif /* SOFTFLOAT_TYPES_H */ diff --git a/qemu/include/fpu/softfloat.h b/qemu/include/fpu/softfloat.h index ecb8ba0114..76d023725c 100644 --- a/qemu/include/fpu/softfloat.h +++ b/qemu/include/fpu/softfloat.h @@ -85,12 +85,12 @@ this code that are retained. /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point ordering relations *----------------------------------------------------------------------------*/ -enum { +typedef enum { float_relation_less = -1, float_relation_equal = 0, float_relation_greater = 1, float_relation_unordered = 2 -}; +} FloatRelation; #include "fpu/softfloat-types.h" #include "fpu/softfloat-helpers.h" @@ -186,9 +186,9 @@ float32 float16_to_float32(float16, bool ieee, float_status *status); float16 float64_to_float16(float64 a, bool ieee, float_status *status); float64 float16_to_float64(float16 a, bool ieee, float_status *status); -int16_t float16_to_int16_scalbn(float16, int, int, float_status *status); -int32_t float16_to_int32_scalbn(float16, int, int, float_status *status); -int64_t float16_to_int64_scalbn(float16, int, int, float_status *status); +int16_t float16_to_int16_scalbn(float16, FloatRoundMode, int, float_status *status); +int32_t float16_to_int32_scalbn(float16, FloatRoundMode, int, float_status *status); +int64_t float16_to_int64_scalbn(float16, FloatRoundMode, int, float_status *status); int16_t float16_to_int16(float16, float_status *status); int32_t float16_to_int32(float16, float_status *status); @@ -198,9 +198,9 @@ int16_t float16_to_int16_round_to_zero(float16, float_status *status); int32_t float16_to_int32_round_to_zero(float16, float_status *status); int64_t float16_to_int64_round_to_zero(float16, float_status *status); -uint16_t float16_to_uint16_scalbn(float16 a, int, int, float_status *status); -uint32_t float16_to_uint32_scalbn(float16 a, int, int, float_status *status); -uint64_t float16_to_uint64_scalbn(float16 a, int, int, float_status *status); +uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode, int, float_status *status); +uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode, int, float_status *status); +uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode, int, float_status *status); uint16_t float16_to_uint16(float16 a, float_status *status); uint32_t float16_to_uint32(float16 a, float_status *status); @@ -228,34 +228,34 @@ float16 float16_maxnum(float16, float16, float_status *status); float16 float16_minnummag(float16, float16, float_status *status); float16 float16_maxnummag(float16, float16, float_status *status); float16 float16_sqrt(float16, float_status *status); -int float16_compare(float16, float16, float_status *status); -int float16_compare_quiet(float16, float16, float_status *status); +FloatRelation float16_compare(float16, float16, float_status *status); +FloatRelation float16_compare_quiet(float16, float16, float_status *status); -int float16_is_quiet_nan(float16, float_status *status); -int float16_is_signaling_nan(float16, float_status *status); +bool float16_is_quiet_nan(float16, float_status *status); +bool float16_is_signaling_nan(float16, float_status *status); float16 float16_silence_nan(float16, float_status *status); -static inline int float16_is_any_nan(float16 a) +static inline bool float16_is_any_nan(float16 a) { return ((float16_val(a) & ~0x8000) > 0x7c00); } -static inline int float16_is_neg(float16 a) +static inline bool float16_is_neg(float16 a) { return float16_val(a) >> 15; } -static inline int float16_is_infinity(float16 a) +static inline bool float16_is_infinity(float16 a) { return (float16_val(a) & 0x7fff) == 0x7c00; } -static inline int float16_is_zero(float16 a) +static inline bool float16_is_zero(float16 a) { return (float16_val(a) & 0x7fff) == 0; } -static inline int float16_is_zero_or_denormal(float16 a) +static inline bool float16_is_zero_or_denormal(float16 a) { return (float16_val(a) & 0x7c00) == 0; } @@ -298,9 +298,9 @@ float16 float16_default_nan(float_status *status); | Software IEC/IEEE single-precision conversion routines. *----------------------------------------------------------------------------*/ -int16_t float32_to_int16_scalbn(float32, int, int, float_status *status); -int32_t float32_to_int32_scalbn(float32, int, int, float_status *status); -int64_t float32_to_int64_scalbn(float32, int, int, float_status *status); +int16_t float32_to_int16_scalbn(float32, FloatRoundMode, int, float_status *status); +int32_t float32_to_int32_scalbn(float32, FloatRoundMode, int, float_status *status); +int64_t float32_to_int64_scalbn(float32, FloatRoundMode, int, float_status *status); int16_t float32_to_int16(float32, float_status *status); int32_t float32_to_int32(float32, float_status *status); @@ -310,9 +310,9 @@ int16_t float32_to_int16_round_to_zero(float32, float_status *status); int32_t float32_to_int32_round_to_zero(float32, float_status *status); int64_t float32_to_int64_round_to_zero(float32, float_status *status); -uint16_t float32_to_uint16_scalbn(float32, int, int, float_status *status); -uint32_t float32_to_uint32_scalbn(float32, int, int, float_status *status); -uint64_t float32_to_uint64_scalbn(float32, int, int, float_status *status); +uint16_t float32_to_uint16_scalbn(float32, FloatRoundMode, int, float_status *status); +uint32_t float32_to_uint32_scalbn(float32, FloatRoundMode, int, float_status *status); +uint64_t float32_to_uint64_scalbn(float32, FloatRoundMode, int, float_status *status); uint16_t float32_to_uint16(float32, float_status *status); uint32_t float32_to_uint32(float32, float_status *status); @@ -339,24 +339,16 @@ float32 float32_muladd(float32, float32, float32, int, float_status *status); float32 float32_sqrt(float32, float_status *status); float32 float32_exp2(float32, float_status *status); float32 float32_log2(float32, float_status *status); -int float32_eq(float32, float32, float_status *status); -int float32_le(float32, float32, float_status *status); -int float32_lt(float32, float32, float_status *status); -int float32_unordered(float32, float32, float_status *status); -int float32_eq_quiet(float32, float32, float_status *status); -int float32_le_quiet(float32, float32, float_status *status); -int float32_lt_quiet(float32, float32, float_status *status); -int float32_unordered_quiet(float32, float32, float_status *status); -int float32_compare(float32, float32, float_status *status); -int float32_compare_quiet(float32, float32, float_status *status); +FloatRelation float32_compare(float32, float32, float_status *status); +FloatRelation float32_compare_quiet(float32, float32, float_status *status); float32 float32_min(float32, float32, float_status *status); float32 float32_max(float32, float32, float_status *status); float32 float32_minnum(float32, float32, float_status *status); float32 float32_maxnum(float32, float32, float_status *status); float32 float32_minnummag(float32, float32, float_status *status); float32 float32_maxnummag(float32, float32, float_status *status); -int float32_is_quiet_nan(float32, float_status *status); -int float32_is_signaling_nan(float32, float_status *status); +bool float32_is_quiet_nan(float32, float_status *status); +bool float32_is_signaling_nan(float32, float_status *status); float32 float32_silence_nan(float32, float_status *status); float32 float32_scalbn(float32, int, float_status *status); @@ -376,27 +368,27 @@ static inline float32 float32_chs(float32 a) return make_float32(float32_val(a) ^ 0x80000000); } -static inline int float32_is_infinity(float32 a) +static inline bool float32_is_infinity(float32 a) { return (float32_val(a) & 0x7fffffff) == 0x7f800000; } -static inline int float32_is_neg(float32 a) +static inline bool float32_is_neg(float32 a) { return float32_val(a) >> 31; } -static inline int float32_is_zero(float32 a) +static inline bool float32_is_zero(float32 a) { return (float32_val(a) & 0x7fffffff) == 0; } -static inline int float32_is_any_nan(float32 a) +static inline bool float32_is_any_nan(float32 a) { return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL); } -static inline int float32_is_zero_or_denormal(float32 a) +static inline bool float32_is_zero_or_denormal(float32 a) { return (float32_val(a) & 0x7f800000) == 0; } @@ -421,6 +413,47 @@ static inline float32 float32_set_sign(float32 a, int sign) return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31)); } +static inline bool float32_eq(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) == float_relation_equal; +} + +static inline bool float32_le(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float32_lt(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) < float_relation_equal; +} + +static inline bool float32_unordered(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float32_eq_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float32_le_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float32_lt_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float32_unordered_quiet(float32 a, float32 b, + float_status *s) +{ + return float32_compare_quiet(a, b, s) == float_relation_unordered; +} + #define float32_zero make_float32(0) #define float32_half make_float32(0x3f000000) #define float32_one make_float32(0x3f800000) @@ -440,7 +473,7 @@ static inline float32 float32_set_sign(float32 a, int sign) | significand. *----------------------------------------------------------------------------*/ -static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig) +static inline float32 packFloat32(bool zSign, int zExp, uint32_t zSig) { return make_float32( (((uint32_t)zSign) << 31) + (((uint32_t)zExp) << 23) + zSig); @@ -455,9 +488,9 @@ float32 float32_default_nan(float_status *status); | Software IEC/IEEE double-precision conversion routines. *----------------------------------------------------------------------------*/ -int16_t float64_to_int16_scalbn(float64, int, int, float_status *status); -int32_t float64_to_int32_scalbn(float64, int, int, float_status *status); -int64_t float64_to_int64_scalbn(float64, int, int, float_status *status); +int16_t float64_to_int16_scalbn(float64, FloatRoundMode, int, float_status *status); +int32_t float64_to_int32_scalbn(float64, FloatRoundMode, int, float_status *status); +int64_t float64_to_int64_scalbn(float64, FloatRoundMode, int, float_status *status); int16_t float64_to_int16(float64, float_status *status); int32_t float64_to_int32(float64, float_status *status); @@ -467,9 +500,9 @@ int16_t float64_to_int16_round_to_zero(float64, float_status *status); int32_t float64_to_int32_round_to_zero(float64, float_status *status); int64_t float64_to_int64_round_to_zero(float64, float_status *status); -uint16_t float64_to_uint16_scalbn(float64, int, int, float_status *status); -uint32_t float64_to_uint32_scalbn(float64, int, int, float_status *status); -uint64_t float64_to_uint64_scalbn(float64, int, int, float_status *status); +uint16_t float64_to_uint16_scalbn(float64, FloatRoundMode, int, float_status *status); +uint32_t float64_to_uint32_scalbn(float64, FloatRoundMode, int, float_status *status); +uint64_t float64_to_uint64_scalbn(float64, FloatRoundMode, int, float_status *status); uint16_t float64_to_uint16(float64, float_status *status); uint32_t float64_to_uint32(float64, float_status *status); @@ -495,24 +528,16 @@ float64 float64_rem(float64, float64, float_status *status); float64 float64_muladd(float64, float64, float64, int, float_status *status); float64 float64_sqrt(float64, float_status *status); float64 float64_log2(float64, float_status *status); -int float64_eq(float64, float64, float_status *status); -int float64_le(float64, float64, float_status *status); -int float64_lt(float64, float64, float_status *status); -int float64_unordered(float64, float64, float_status *status); -int float64_eq_quiet(float64, float64, float_status *status); -int float64_le_quiet(float64, float64, float_status *status); -int float64_lt_quiet(float64, float64, float_status *status); -int float64_unordered_quiet(float64, float64, float_status *status); -int float64_compare(float64, float64, float_status *status); -int float64_compare_quiet(float64, float64, float_status *status); +FloatRelation float64_compare(float64, float64, float_status *status); +FloatRelation float64_compare_quiet(float64, float64, float_status *status); float64 float64_min(float64, float64, float_status *status); float64 float64_max(float64, float64, float_status *status); float64 float64_minnum(float64, float64, float_status *status); float64 float64_maxnum(float64, float64, float_status *status); float64 float64_minnummag(float64, float64, float_status *status); float64 float64_maxnummag(float64, float64, float_status *status); -int float64_is_quiet_nan(float64 a, float_status *status); -int float64_is_signaling_nan(float64, float_status *status); +bool float64_is_quiet_nan(float64 a, float_status *status); +bool float64_is_signaling_nan(float64, float_status *status); float64 float64_silence_nan(float64, float_status *status); float64 float64_scalbn(float64, int, float_status *status); @@ -532,27 +557,27 @@ static inline float64 float64_chs(float64 a) return make_float64(float64_val(a) ^ 0x8000000000000000LL); } -static inline int float64_is_infinity(float64 a) +static inline bool float64_is_infinity(float64 a) { return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL; } -static inline int float64_is_neg(float64 a) +static inline bool float64_is_neg(float64 a) { return float64_val(a) >> 63; } -static inline int float64_is_zero(float64 a) +static inline bool float64_is_zero(float64 a) { return (float64_val(a) & 0x7fffffffffffffffLL) == 0; } -static inline int float64_is_any_nan(float64 a) +static inline bool float64_is_any_nan(float64 a) { return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL); } -static inline int float64_is_zero_or_denormal(float64 a) +static inline bool float64_is_zero_or_denormal(float64 a) { return (float64_val(a) & 0x7ff0000000000000LL) == 0; } @@ -578,6 +603,47 @@ static inline float64 float64_set_sign(float64 a, int sign) | ((int64_t)sign << 63)); } +static inline bool float64_eq(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) == float_relation_equal; +} + +static inline bool float64_le(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float64_lt(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) < float_relation_equal; +} + +static inline bool float64_unordered(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float64_eq_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float64_le_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float64_lt_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float64_unordered_quiet(float64 a, float64 b, + float_status *s) +{ + return float64_compare_quiet(a, b, s) == float_relation_unordered; +} + #define float64_zero make_float64(0) #define float64_half make_float64(0x3fe0000000000000LL) #define float64_one make_float64(0x3ff0000000000000LL) @@ -617,18 +683,12 @@ floatx80 floatx80_add(floatx80, floatx80, float_status *status); floatx80 floatx80_sub(floatx80, floatx80, float_status *status); floatx80 floatx80_mul(floatx80, floatx80, float_status *status); floatx80 floatx80_div(floatx80, floatx80, float_status *status); +floatx80 floatx80_modrem(floatx80, floatx80, bool, uint64_t *, float_status *status); +floatx80 floatx80_mod(floatx80, floatx80, float_status *status); floatx80 floatx80_rem(floatx80, floatx80, float_status *status); floatx80 floatx80_sqrt(floatx80, float_status *status); -int floatx80_eq(floatx80, floatx80, float_status *status); -int floatx80_le(floatx80, floatx80, float_status *status); -int floatx80_lt(floatx80, floatx80, float_status *status); -int floatx80_unordered(floatx80, floatx80, float_status *status); -int floatx80_eq_quiet(floatx80, floatx80, float_status *status); -int floatx80_le_quiet(floatx80, floatx80, float_status *status); -int floatx80_lt_quiet(floatx80, floatx80, float_status *status); -int floatx80_unordered_quiet(floatx80, floatx80, float_status *status); -int floatx80_compare(floatx80, floatx80, float_status *status); -int floatx80_compare_quiet(floatx80, floatx80, float_status *status); +FloatRelation floatx80_compare(floatx80, floatx80, float_status *status); +FloatRelation floatx80_compare_quiet(floatx80, floatx80, float_status *status); int floatx80_is_quiet_nan(floatx80, float_status *status); int floatx80_is_signaling_nan(floatx80, float_status *status); floatx80 floatx80_silence_nan(floatx80, float_status *status); @@ -646,7 +706,7 @@ static inline floatx80 floatx80_chs(floatx80 a) return a; } -static inline int floatx80_is_infinity(floatx80 a) +static inline bool floatx80_is_infinity(floatx80 a) { #if defined(TARGET_M68K) return (a.high & 0x7fff) == floatx80_infinity.high && !(a.low << 1); @@ -656,26 +716,67 @@ static inline int floatx80_is_infinity(floatx80 a) #endif } -static inline int floatx80_is_neg(floatx80 a) +static inline bool floatx80_is_neg(floatx80 a) { return a.high >> 15; } -static inline int floatx80_is_zero(floatx80 a) +static inline bool floatx80_is_zero(floatx80 a) { return (a.high & 0x7fff) == 0 && a.low == 0; } -static inline int floatx80_is_zero_or_denormal(floatx80 a) +static inline bool floatx80_is_zero_or_denormal(floatx80 a) { return (a.high & 0x7fff) == 0; } -static inline int floatx80_is_any_nan(floatx80 a) +static inline bool floatx80_is_any_nan(floatx80 a) { return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1); } +static inline bool floatx80_eq(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) == float_relation_equal; +} + +static inline bool floatx80_le(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) <= float_relation_equal; +} + +static inline bool floatx80_lt(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) < float_relation_equal; +} + +static inline bool floatx80_unordered(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) == float_relation_unordered; +} + +static inline bool floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool floatx80_le_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool floatx80_unordered_quiet(floatx80 a, floatx80 b, + float_status *s) +{ + return floatx80_compare_quiet(a, b, s) == float_relation_unordered; +} + /*---------------------------------------------------------------------------- | Return whether the given value is an invalid floatx80 encoding. | Invalid floatx80 encodings arise when the integer bit is not set, but @@ -688,10 +789,35 @@ static inline int floatx80_is_any_nan(floatx80 a) *----------------------------------------------------------------------------*/ static inline bool floatx80_invalid_encoding(floatx80 a) { +#if defined(TARGET_M68K) + /*------------------------------------------------------------------------- + | With m68k, the explicit integer bit can be zero in the case of: + | - zeros (exp == 0, mantissa == 0) + | - denormalized numbers (exp == 0, mantissa != 0) + | - unnormalized numbers (exp != 0, exp < 0x7FFF) + | - infinities (exp == 0x7FFF, mantissa == 0) + | - not-a-numbers (exp == 0x7FFF, mantissa != 0) + | + | For infinities and NaNs, the explicit integer bit can be either one or + | zero. + | + | The IEEE 754 standard does not define a zero integer bit. Such a number + | is an unnormalized number. Hardware does not directly support + | denormalized and unnormalized numbers, but implicitly supports them by + | trapping them as unimplemented data types, allowing efficient conversion + | in software. + | + | See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL", + | "1.6 FLOATING-POINT DATA TYPES" + *------------------------------------------------------------------------*/ + return false; +#else return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0; +#endif } #define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL) +#define floatx80_zero_init make_floatx80_init(0x0000, 0x0000000000000000LL) #define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL) #define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL) #define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL) @@ -722,7 +848,7 @@ static inline int32_t extractFloatx80Exp(floatx80 a) | `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloatx80Sign(floatx80 a) +static inline bool extractFloatx80Sign(floatx80 a) { return a.high >> 15; } @@ -732,7 +858,7 @@ static inline flag extractFloatx80Sign(floatx80 a) | extended double-precision floating-point value, returning the result. *----------------------------------------------------------------------------*/ -static inline floatx80 packFloatx80(flag zSign, int32_t zExp, uint64_t zSig) +static inline floatx80 packFloatx80(bool zSign, int32_t zExp, uint64_t zSig) { floatx80 z; @@ -783,7 +909,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status); | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, +floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); @@ -797,7 +923,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, *----------------------------------------------------------------------------*/ floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, - flag zSign, int32_t zExp, + bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); @@ -831,18 +957,10 @@ float128 float128_mul(float128, float128, float_status *status); float128 float128_div(float128, float128, float_status *status); float128 float128_rem(float128, float128, float_status *status); float128 float128_sqrt(float128, float_status *status); -int float128_eq(float128, float128, float_status *status); -int float128_le(float128, float128, float_status *status); -int float128_lt(float128, float128, float_status *status); -int float128_unordered(float128, float128, float_status *status); -int float128_eq_quiet(float128, float128, float_status *status); -int float128_le_quiet(float128, float128, float_status *status); -int float128_lt_quiet(float128, float128, float_status *status); -int float128_unordered_quiet(float128, float128, float_status *status); -int float128_compare(float128, float128, float_status *status); -int float128_compare_quiet(float128, float128, float_status *status); -int float128_is_quiet_nan(float128, float_status *status); -int float128_is_signaling_nan(float128, float_status *status); +FloatRelation float128_compare(float128, float128, float_status *status); +FloatRelation float128_compare_quiet(float128, float128, float_status *status); +bool float128_is_quiet_nan(float128, float_status *status); +bool float128_is_signaling_nan(float128, float_status *status); float128 float128_silence_nan(float128, float_status *status); float128 float128_scalbn(float128, int, float_status *status); @@ -858,22 +976,22 @@ static inline float128 float128_chs(float128 a) return a; } -static inline int float128_is_infinity(float128 a) +static inline bool float128_is_infinity(float128 a) { return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0; } -static inline int float128_is_neg(float128 a) +static inline bool float128_is_neg(float128 a) { return a.high >> 63; } -static inline int float128_is_zero(float128 a) +static inline bool float128_is_zero(float128 a) { return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0; } -static inline int float128_is_zero_or_denormal(float128 a) +static inline bool float128_is_zero_or_denormal(float128 a) { return (a.high & 0x7fff000000000000LL) == 0; } @@ -888,12 +1006,53 @@ static inline bool float128_is_denormal(float128 a) return float128_is_zero_or_denormal(a) && !float128_is_zero(a); } -static inline int float128_is_any_nan(float128 a) +static inline bool float128_is_any_nan(float128 a) { return ((a.high >> 48) & 0x7fff) == 0x7fff && ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0)); } +static inline bool float128_eq(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) == float_relation_equal; +} + +static inline bool float128_le(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float128_lt(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) < float_relation_equal; +} + +static inline bool float128_unordered(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float128_eq_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float128_le_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float128_lt_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float128_unordered_quiet(float128 a, float128 b, + float_status *s) +{ + return float128_compare_quiet(a, b, s) == float_relation_unordered; +} + #define float128_zero make_float128(0, 0) /*---------------------------------------------------------------------------- diff --git a/qemu/include/hw/registerfields.h b/qemu/include/hw/registerfields.h index 686aca1225..972876f98c 100644 --- a/qemu/include/hw/registerfields.h +++ b/qemu/include/hw/registerfields.h @@ -66,30 +66,30 @@ #define FIELD_DP8(storage, reg, field, val, d) { \ struct { \ unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ - } v = { .v = val }; \ + } _v = { .v = val }; \ d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ - R_ ## reg ## _ ## field ## _LENGTH, v.v); \ + R_ ## reg ## _ ## field ## _LENGTH, _v.v); \ } #define FIELD_DP16(storage, reg, field, val, d) { \ struct { \ unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ - } v = { .v = val }; \ + } _v = { .v = val }; \ d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ - R_ ## reg ## _ ## field ## _LENGTH, v.v); \ + R_ ## reg ## _ ## field ## _LENGTH, _v.v); \ } -#define FIELD_DP32(storage, reg, field, val, d) { \ - struct { \ - unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ - } v = { .v = val }; \ - d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ - R_ ## reg ## _ ## field ## _LENGTH, v.v); \ +#define FIELD_DP32(storage, reg, field, val, d) { \ + struct { \ + unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + } v = { .v = val }; \ + d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH, v.v); \ } -#define FIELD_DP64(storage, reg, field, val, d) { \ - struct { \ - unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ - } v = { .v = val }; \ - d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT, \ - R_ ## reg ## _ ## field ## _LENGTH, v.v); \ +#define FIELD_DP64(storage, reg, field, val, d) { \ + struct { \ + unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + } v = { .v = val }; \ + d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH, v.v); \ } /* Deposit a field to array of registers. */ diff --git a/qemu/include/qemu/bswap.h b/qemu/include/qemu/bswap.h index 7591f6c88e..5afcf853f0 100644 --- a/qemu/include/qemu/bswap.h +++ b/qemu/include/qemu/bswap.h @@ -9,6 +9,8 @@ # include #elif defined(__FreeBSD__) # include +#elif defined(__HAIKU__) +# include #elif defined(CONFIG_BYTESWAP_H) # include diff --git a/qemu/include/qemu/compiler.h b/qemu/include/qemu/compiler.h index 971aa12721..e0cb4b3dd1 100644 --- a/qemu/include/qemu/compiler.h +++ b/qemu/include/qemu/compiler.h @@ -89,6 +89,8 @@ static union MSVC_FLOAT_HACK __NAN = {{0x00, 0x00, 0xC0, 0x7F}}; #define QEMU_FLATTEN #define QEMU_ALWAYS_INLINE __declspec(inline) +#define qemu_build_not_reached() __assume(0) + #else // Unix compilers #ifndef NAN diff --git a/qemu/include/qemu/host-utils.h b/qemu/include/qemu/host-utils.h index 0c5b30ff67..50063ffffe 100644 --- a/qemu/include/qemu/host-utils.h +++ b/qemu/include/qemu/host-utils.h @@ -100,8 +100,8 @@ static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) } } #else -void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b); -void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b); +void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b); +void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); int divs128(int64_t *plow, int64_t *phigh, int64_t divisor); diff --git a/qemu/include/qemu/osdep.h b/qemu/include/qemu/osdep.h index ad18b8ddd6..80d0869acd 100644 --- a/qemu/include/qemu/osdep.h +++ b/qemu/include/qemu/osdep.h @@ -96,7 +96,7 @@ struct uc_struct; #include #include -#ifdef __OpenBSD__ +#ifdef HAVE_SYS_SIGNAL_H #include #endif @@ -189,6 +189,9 @@ struct uc_struct; #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0 +#endif #ifndef ENOMEDIUM #define ENOMEDIUM ENODEV #endif @@ -252,18 +255,72 @@ struct uc_struct; #define SIZE_MAX ((size_t)-1) #endif -#ifndef MIN -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +/* + * Two variations of MIN/MAX macros. The first is for runtime use, and + * evaluates arguments only once (so it is safe even with side + * effects), but will not work in constant contexts (such as array + * size declarations) because of the '{}'. The second is for constant + * expression use, where evaluating arguments twice is safe because + * the result is going to be constant anyway, but will not work in a + * runtime context because of a void expression where a value is + * expected. Thus, both gcc and clang will fail to compile if you use + * the wrong macro (even if the error may seem a bit cryptic). + * + * Note that neither form is usable as an #if condition; if you truly + * need to write conditional code that depends on a minimum or maximum + * determined by the pre-processor instead of the compiler, you'll + * have to open-code it. Sadly, Coverity is severely confused by the + * constant variants, so we have to dumb things down there. + */ +#undef MIN +#ifdef _MSC_VER +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#else +#define MIN(a, b) \ + ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a < _b ? _a : _b; \ + }) +#endif + +#undef MAX +#ifdef _MSC_VER + // MSVC version + #define MAX(a, b) ((a) > (b) ? (a) : (b)) +#else + // GCC/Clang version with statement expression + #define MAX(a, b) ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a > _b ? _a : _b; \ + }) #endif -#ifndef MAX -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +#if defined(__COVERITY__) || defined(_MSC_VER) +# define MIN_CONST(a, b) ((a) < (b) ? (a) : (b)) +# define MAX_CONST(a, b) ((a) > (b) ? (a) : (b)) +#else +# define MIN_CONST(a, b) \ + __builtin_choose_expr( \ + __builtin_constant_p(a) && __builtin_constant_p(b), \ + (a) < (b) ? (a) : (b), \ + ((void)0)) +# define MAX_CONST(a, b) \ + __builtin_choose_expr( \ + __builtin_constant_p(a) && __builtin_constant_p(b), \ + (a) > (b) ? (a) : (b), \ + ((void)0)) #endif -/* Minimum function that returns zero only iff both values are zero. - * Intended for use with unsigned values only. */ +/* + * Minimum function that returns zero only if both values are zero. + * Intended for use with unsigned values only. + */ #ifndef MIN_NON_ZERO -#define MIN_NON_ZERO(a, b) ((a) == 0 ? (b) : \ - ((b) == 0 ? (a) : (MIN(a, b)))) +#define MIN_NON_ZERO(a, b) \ + ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a == 0 ? _b : (_b == 0 || _b > _a) ? _a : _b; \ + }) #endif /* Round number down to multiple */ @@ -408,7 +465,7 @@ void qemu_anon_ram_free(struct uc_struct *uc, void *ptr, size_t size); #define HAVE_CHARDEV_SERIAL 1 #elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \ || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \ - || defined(__GLIBC__) + || defined(__GLIBC__) || defined(__APPLE__) #define HAVE_CHARDEV_SERIAL 1 #endif @@ -417,6 +474,10 @@ void qemu_anon_ram_free(struct uc_struct *uc, void *ptr, size_t size); #define HAVE_CHARDEV_PARPORT 1 #endif +#if defined(__HAIKU__) +#define SIGIO SIGPOLL +#endif + #if defined(CONFIG_LINUX) #ifndef BUS_MCEERR_AR #define BUS_MCEERR_AR 4 diff --git a/qemu/include/tcg/tcg-op-gvec.h b/qemu/include/tcg/tcg-op-gvec.h index dd414fc768..5610e89f99 100644 --- a/qemu/include/tcg/tcg-op-gvec.h +++ b/qemu/include/tcg/tcg-op-gvec.h @@ -39,56 +39,61 @@ void tcg_gen_gvec_2_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, gen_helper_gvec_2 *fn); /* Similarly, passing an extra data value. */ -typedef void gen_helper_gvec_2i(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); -void tcg_gen_gvec_2i_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, TCGv_i64 c, - uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_2i *fn); +typedef void gen_helper_gvec_2i(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i64, + TCGv_i32); +void tcg_gen_gvec_2i_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_2i *fn); /* Similarly, passing an extra pointer (e.g. env or float_status). */ -typedef void gen_helper_gvec_2_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void gen_helper_gvec_2_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_i32); void tcg_gen_gvec_2_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz, int32_t data, gen_helper_gvec_2_ptr *fn); /* Similarly, with three vector operands. */ -typedef void gen_helper_gvec_3(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_3_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_3 *fn); +typedef void gen_helper_gvec_3(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_i32); +void tcg_gen_gvec_3_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_3 *fn); /* Similarly, with four vector operands. */ typedef void gen_helper_gvec_4(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_4_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t cofs, uint32_t oprsz, uint32_t maxsz, - int32_t data, gen_helper_gvec_4 *fn); +void tcg_gen_gvec_4_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t oprsz, + uint32_t maxsz, int32_t data, gen_helper_gvec_4 *fn); /* Similarly, with five vector operands. */ -typedef void gen_helper_gvec_5(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, - TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_5_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t cofs, uint32_t xofs, uint32_t oprsz, - uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn); +typedef void gen_helper_gvec_5(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_5_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t xofs, + uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_5 *fn); typedef void gen_helper_gvec_3_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_3_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz, - int32_t data, gen_helper_gvec_3_ptr *fn); +void tcg_gen_gvec_3_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, TCGv_ptr ptr, uint32_t oprsz, + uint32_t maxsz, int32_t data, + gen_helper_gvec_3_ptr *fn); typedef void gen_helper_gvec_4_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_4_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz, - uint32_t maxsz, int32_t data, +void tcg_gen_gvec_4_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, TCGv_ptr ptr, + uint32_t oprsz, uint32_t maxsz, int32_t data, gen_helper_gvec_4_ptr *fn); -typedef void gen_helper_gvec_5_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, - TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t cofs, uint32_t eofs, TCGv_ptr ptr, - uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_5_ptr *fn); +typedef void gen_helper_gvec_5_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t eofs, + TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_5_ptr *fn); /* Expand a gvec operation. Either inline or out-of-line depending on the actual vector size and the operations supported by the host. */ @@ -109,6 +114,8 @@ typedef struct { uint8_t vece; /* Prefer i64 to v64. */ bool prefer_i64; + /* Load dest as a 2nd source operand. */ + bool load_dest; } GVecGen2; typedef struct { @@ -201,7 +208,8 @@ typedef struct { void (*fni8)(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64); void (*fni4)(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); /* Expand inline with a host vector type. */ - void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec); + void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec, + TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_4 *fno; /* The optional opcodes, if any, utilized by .fniv. */ @@ -218,110 +226,146 @@ typedef struct { void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *); -void tcg_gen_gvec_2i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - uint32_t maxsz, int64_t c, const GVecGen2i *); -void tcg_gen_gvec_2s(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - uint32_t maxsz, TCGv_i64 c, const GVecGen2s *); -void tcg_gen_gvec_3(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, uint32_t maxsz, const GVecGen3 *); -void tcg_gen_gvec_3i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, +void tcg_gen_gvec_2i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int64_t c, + const GVecGen2i *); +void tcg_gen_gvec_2s(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz, TCGv_i64 c, + const GVecGen2s *); +void tcg_gen_gvec_3(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz, + const GVecGen3 *); +void tcg_gen_gvec_3i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen3i *); -void tcg_gen_gvec_4(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, - uint32_t oprsz, uint32_t maxsz, const GVecGen4 *); +void tcg_gen_gvec_4(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t oprsz, + uint32_t maxsz, const GVecGen4 *); /* Expand a specific vector operation. */ -void tcg_gen_gvec_mov(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_neg(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_abs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_add(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_sub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_mul(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_addi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_muli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_adds(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_subs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_muls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_mov(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_neg(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_abs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_add(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_sub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_mul(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_addi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_muli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_adds(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_subs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_muls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); /* Saturated arithmetic. */ -void tcg_gen_gvec_ssadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_sssub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_usadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_ussub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_ssadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_sssub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_usadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_ussub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); /* Min/max. */ -void tcg_gen_gvec_smin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_umin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_smax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_umax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_and(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_or(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_nand(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_nor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_andi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_xori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_ori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_ands(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_xors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_ors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t s, uint32_t m); +void tcg_gen_gvec_smin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_umin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_smax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_umax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_and(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_or(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_nand(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_nor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_andi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_xori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_ori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_ands(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_xors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_ors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t s, uint32_t m); +void tcg_gen_gvec_dup_imm(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s, + uint32_t m, uint64_t imm); void tcg_gen_gvec_dup_i32(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s, uint32_t m, TCGv_i32); void tcg_gen_gvec_dup_i64(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s, uint32_t m, TCGv_i64); -void tcg_gen_gvec_dup8i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint8_t x); -void tcg_gen_gvec_dup16i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint16_t x); -void tcg_gen_gvec_dup32i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint32_t x); -void tcg_gen_gvec_dup64i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint64_t x); +#if TARGET_LONG_BITS == 64 +#define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i64 +#else +#define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i32 +#endif void tcg_gen_gvec_shli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); @@ -329,6 +373,10 @@ void tcg_gen_gvec_shri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 int64_t shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_sari(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_shls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); @@ -336,6 +384,8 @@ void tcg_gen_gvec_shrs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_sars(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); /* * Perform vector shift by vector element, modulo the element size. @@ -347,6 +397,10 @@ void tcg_gen_gvec_shrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 uint32_t bofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_sarv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotlv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_cmp(TCGContext *tcg_ctx, TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, @@ -383,5 +437,7 @@ void tcg_gen_vec_shr8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t) void tcg_gen_vec_shr16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_sar8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_sar16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_rotl8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c); +void tcg_gen_vec_rotl16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c); #endif diff --git a/qemu/include/tcg/tcg-op.h b/qemu/include/tcg/tcg-op.h index 93026d1d51..5b9685da03 100644 --- a/qemu/include/tcg/tcg-op.h +++ b/qemu/include/tcg/tcg-op.h @@ -359,9 +359,9 @@ void tcg_gen_ctzi_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, uint32_t void tcg_gen_clrsb_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ctpop_i32(TCGContext *tcg_ctx, TCGv_i32 a1, TCGv_i32 a2); void tcg_gen_rotl_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); -void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); void tcg_gen_rotr_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); -void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); void tcg_gen_deposit_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, unsigned int ofs, unsigned int len); void tcg_gen_deposit_z_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg, @@ -569,9 +569,9 @@ void tcg_gen_ctzi_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, uint64_t void tcg_gen_clrsb_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ctpop_i64(TCGContext *tcg_ctx, TCGv_i64 a1, TCGv_i64 a2); void tcg_gen_rotl_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); -void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); void tcg_gen_rotr_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); -void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); void tcg_gen_deposit_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, unsigned int ofs, unsigned int len); void tcg_gen_deposit_z_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg, @@ -1078,14 +1078,19 @@ void tcg_gen_umax_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a void tcg_gen_shli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); void tcg_gen_shri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); void tcg_gen_sari_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_rotli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_rotri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); void tcg_gen_shls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); void tcg_gen_shrs_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); void tcg_gen_sars_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); +void tcg_gen_rotls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); void tcg_gen_shlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); void tcg_gen_shrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); void tcg_gen_sarv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_rotlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_rotrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); void tcg_gen_cmp_vec(TCGContext *tcg_ctx, TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); diff --git a/qemu/include/tcg/tcg-opc.h b/qemu/include/tcg/tcg-opc.h index 22033870bf..a583ca4900 100644 --- a/qemu/include/tcg/tcg-opc.h +++ b/qemu/include/tcg/tcg-opc.h @@ -270,19 +270,28 @@ DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec)) DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) +DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec)) DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) +DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec)) #ifdef _MSC_VER -DEF(shlv_vec, 1, 2, 0, IMPLVEC) -DEF(shrv_vec, 1, 2, 0, IMPLVEC) -DEF(sarv_vec, 1, 2, 0, IMPLVEC) +// For MSVC, pre-compute the flags since it can't evaluate the OR at compile time +#define VEC_FLAGS (TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT) +DEF(shlv_vec, 1, 2, 0, VEC_FLAGS) +DEF(shrv_vec, 1, 2, 0, VEC_FLAGS) +DEF(sarv_vec, 1, 2, 0, VEC_FLAGS) +DEF(rotlv_vec, 1, 2, 0, VEC_FLAGS) +DEF(rotrv_vec, 1, 2, 0, VEC_FLAGS) +#undef VEC_FLAGS #else -DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) -DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) -DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec)) +DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec)) #endif DEF(cmp_vec, 1, 2, 1, IMPLVEC) diff --git a/qemu/include/tcg/tcg.h b/qemu/include/tcg/tcg.h index ade583e43f..966103e25d 100644 --- a/qemu/include/tcg/tcg.h +++ b/qemu/include/tcg/tcg.h @@ -182,6 +182,9 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_andc_vec 0 #define TCG_TARGET_HAS_orc_vec 0 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 0 #define TCG_TARGET_HAS_shi_vec 0 #define TCG_TARGET_HAS_shs_vec 0 #define TCG_TARGET_HAS_shv_vec 0 @@ -721,7 +724,7 @@ struct TCGContext { void *tb_ret_addr; /* target/riscv/translate.c */ - TCGv cpu_gpr[32], cpu_pc; // also target/mips/translate.c + TCGv cpu_gpr[32], cpu_pc, cpu_vl; // also target/mips/translate.c TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */ TCGv load_res; TCGv load_val; diff --git a/qemu/m68k.h b/qemu/m68k.h index 1b1703d19c..065357bbe1 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_m68k #define tcg_gen_shr_i64 tcg_gen_shr_i64_m68k #define tcg_gen_st_i64 tcg_gen_st_i64_m68k +#define tcg_gen_add_i64 tcg_gen_add_i64_m68k +#define tcg_gen_sub_i64 tcg_gen_sub_i64_m68k #define tcg_gen_xor_i64 tcg_gen_xor_i64_m68k +#define tcg_gen_neg_i64 tcg_gen_neg_i64_m68k #define cpu_icount_to_ns cpu_icount_to_ns_m68k #define cpu_is_stopped cpu_is_stopped_m68k #define cpu_get_ticks cpu_get_ticks_m68k @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_m68k #define floatx80_mul floatx80_mul_m68k #define floatx80_div floatx80_div_m68k +#define floatx80_modrem floatx80_modrem_m68k +#define floatx80_mod floatx80_mod_m68k #define floatx80_rem floatx80_rem_m68k #define floatx80_sqrt floatx80_sqrt_m68k #define floatx80_eq floatx80_eq_m68k @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_m68k #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_m68k #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_m68k +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_m68k #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_m68k #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_m68k #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_m68k @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_m68k #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_m68k #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_m68k +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_m68k +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_m68k #define tcg_gen_gvec_sari tcg_gen_gvec_sari_m68k +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_m68k +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_m68k #define tcg_gen_gvec_shls tcg_gen_gvec_shls_m68k #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_m68k #define tcg_gen_gvec_sars tcg_gen_gvec_sars_m68k +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_m68k #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_m68k #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_m68k #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_m68k +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_m68k +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_m68k #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_m68k #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_m68k #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_m68k @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_m68k #define tcg_gen_shri_vec tcg_gen_shri_vec_m68k #define tcg_gen_sari_vec tcg_gen_sari_vec_m68k +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_m68k +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_m68k #define tcg_gen_cmp_vec tcg_gen_cmp_vec_m68k #define tcg_gen_add_vec tcg_gen_add_vec_m68k #define tcg_gen_sub_vec tcg_gen_sub_vec_m68k @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_m68k #define tcg_gen_shrv_vec tcg_gen_shrv_vec_m68k #define tcg_gen_sarv_vec tcg_gen_sarv_vec_m68k +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_m68k +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_m68k #define tcg_gen_shls_vec tcg_gen_shls_vec_m68k #define tcg_gen_shrs_vec tcg_gen_shrs_vec_m68k #define tcg_gen_sars_vec tcg_gen_sars_vec_m68k +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_m68k #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_m68k #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_m68k #define tb_htable_lookup tb_htable_lookup_m68k @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_m68k #define cpu_loop_exit_atomic cpu_loop_exit_atomic_m68k #define tlb_init tlb_init_m68k +#define tlb_destroy tlb_destroy_m68k #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_m68k #define tlb_flush tlb_flush_m68k #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_m68k @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_m68k #define get_page_addr_code_hostp get_page_addr_code_hostp_m68k #define get_page_addr_code get_page_addr_code_m68k +#define probe_access_flags probe_access_flags_m68k #define probe_access probe_access_m68k #define tlb_vaddr_to_host tlb_vaddr_to_host_m68k #define helper_ret_ldub_mmu helper_ret_ldub_mmu_m68k @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_m68k #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_m68k #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_m68k -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_m68k -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_m68k -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_m68k -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_m68k +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_m68k +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_m68k +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_m68k +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_m68k +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_m68k +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_m68k +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_m68k +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_m68k #define cpu_ldub_data_ra cpu_ldub_data_ra_m68k #define cpu_ldsb_data_ra cpu_ldsb_data_ra_m68k -#define cpu_lduw_data_ra cpu_lduw_data_ra_m68k -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_m68k -#define cpu_ldl_data_ra cpu_ldl_data_ra_m68k -#define cpu_ldq_data_ra cpu_ldq_data_ra_m68k +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_m68k +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_m68k +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_m68k +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_m68k +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_m68k +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_m68k +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_m68k +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_m68k #define cpu_ldub_data cpu_ldub_data_m68k #define cpu_ldsb_data cpu_ldsb_data_m68k -#define cpu_lduw_data cpu_lduw_data_m68k -#define cpu_ldsw_data cpu_ldsw_data_m68k -#define cpu_ldl_data cpu_ldl_data_m68k -#define cpu_ldq_data cpu_ldq_data_m68k +#define cpu_lduw_be_data cpu_lduw_be_data_m68k +#define cpu_lduw_le_data cpu_lduw_le_data_m68k +#define cpu_ldsw_be_data cpu_ldsw_be_data_m68k +#define cpu_ldsw_le_data cpu_ldsw_le_data_m68k +#define cpu_ldl_be_data cpu_ldl_be_data_m68k +#define cpu_ldl_le_data cpu_ldl_le_data_m68k +#define cpu_ldq_le_data cpu_ldq_le_data_m68k +#define cpu_ldq_be_data cpu_ldq_be_data_m68k #define helper_ret_stb_mmu helper_ret_stb_mmu_m68k #define helper_le_stw_mmu helper_le_stw_mmu_m68k #define helper_be_stw_mmu helper_be_stw_mmu_m68k @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_m68k #define helper_be_stq_mmu helper_be_stq_mmu_m68k #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_m68k -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_m68k -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_m68k -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_m68k +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_m68k +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_m68k +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_m68k +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_m68k +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_m68k +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_m68k #define cpu_stb_data_ra cpu_stb_data_ra_m68k -#define cpu_stw_data_ra cpu_stw_data_ra_m68k -#define cpu_stl_data_ra cpu_stl_data_ra_m68k -#define cpu_stq_data_ra cpu_stq_data_ra_m68k +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_m68k +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_m68k +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_m68k +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_m68k +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_m68k +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_m68k #define cpu_stb_data cpu_stb_data_m68k -#define cpu_stw_data cpu_stw_data_m68k -#define cpu_stl_data cpu_stl_data_m68k -#define cpu_stq_data cpu_stq_data_m68k +#define cpu_stw_be_data cpu_stw_be_data_m68k +#define cpu_stw_le_data cpu_stw_le_data_m68k +#define cpu_stl_be_data cpu_stl_be_data_m68k +#define cpu_stl_le_data cpu_stl_le_data_m68k +#define cpu_stq_be_data cpu_stq_be_data_m68k +#define cpu_stq_le_data cpu_stq_le_data_m68k #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_m68k #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_m68k #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_m68k @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_m68k #define cpu_ldl_code cpu_ldl_code_m68k #define cpu_ldq_code cpu_ldq_code_m68k +#define cpu_interrupt_handler cpu_interrupt_handler_m68k #define helper_div_i32 helper_div_i32_m68k #define helper_rem_i32 helper_rem_i32_m68k #define helper_divu_i32 helper_divu_i32_m68k @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_m68k #define helper_gvec_sar32i helper_gvec_sar32i_m68k #define helper_gvec_sar64i helper_gvec_sar64i_m68k +#define helper_gvec_rotl8i helper_gvec_rotl8i_m68k +#define helper_gvec_rotl16i helper_gvec_rotl16i_m68k +#define helper_gvec_rotl32i helper_gvec_rotl32i_m68k +#define helper_gvec_rotl64i helper_gvec_rotl64i_m68k #define helper_gvec_shl8v helper_gvec_shl8v_m68k #define helper_gvec_shl16v helper_gvec_shl16v_m68k #define helper_gvec_shl32v helper_gvec_shl32v_m68k @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_m68k #define helper_gvec_sar32v helper_gvec_sar32v_m68k #define helper_gvec_sar64v helper_gvec_sar64v_m68k +#define helper_gvec_rotl8v helper_gvec_rotl8v_m68k +#define helper_gvec_rotl16v helper_gvec_rotl16v_m68k +#define helper_gvec_rotl32v helper_gvec_rotl32v_m68k +#define helper_gvec_rotl64v helper_gvec_rotl64v_m68k +#define helper_gvec_rotr8v helper_gvec_rotr8v_m68k +#define helper_gvec_rotr16v helper_gvec_rotr16v_m68k +#define helper_gvec_rotr32v helper_gvec_rotr32v_m68k +#define helper_gvec_rotr64v helper_gvec_rotr64v_m68k #define helper_gvec_eq8 helper_gvec_eq8_m68k #define helper_gvec_ne8 helper_gvec_ne8_m68k #define helper_gvec_lt8 helper_gvec_lt8_m68k @@ -1420,7 +1474,6 @@ #define helper_bfffo_mem helper_bfffo_mem_m68k #define helper_chk helper_chk_m68k #define helper_chk2 helper_chk2_m68k -#define floatx80_mod floatx80_mod_m68k #define floatx80_getman floatx80_getman_m68k #define floatx80_getexp floatx80_getexp_m68k #define floatx80_scale floatx80_scale_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 3a005710c7..b55e68792d 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips #define tcg_gen_st_i64 tcg_gen_st_i64_mips +#define tcg_gen_add_i64 tcg_gen_add_i64_mips +#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips +#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips #define cpu_icount_to_ns cpu_icount_to_ns_mips #define cpu_is_stopped cpu_is_stopped_mips #define cpu_get_ticks cpu_get_ticks_mips @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_mips #define floatx80_mul floatx80_mul_mips #define floatx80_div floatx80_div_mips +#define floatx80_modrem floatx80_modrem_mips +#define floatx80_mod floatx80_mod_mips #define floatx80_rem floatx80_rem_mips #define floatx80_sqrt floatx80_sqrt_mips #define floatx80_eq floatx80_eq_mips @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_mips #define tcg_gen_shri_vec tcg_gen_shri_vec_mips #define tcg_gen_sari_vec tcg_gen_sari_vec_mips +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips #define tcg_gen_add_vec tcg_gen_add_vec_mips #define tcg_gen_sub_vec tcg_gen_sub_vec_mips @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips #define tcg_gen_shls_vec tcg_gen_shls_vec_mips #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips #define tcg_gen_sars_vec tcg_gen_sars_vec_mips +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips #define tb_htable_lookup tb_htable_lookup_mips @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_mips #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips #define tlb_init tlb_init_mips +#define tlb_destroy tlb_destroy_mips #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips #define tlb_flush tlb_flush_mips #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_mips #define get_page_addr_code_hostp get_page_addr_code_hostp_mips #define get_page_addr_code get_page_addr_code_mips +#define probe_access_flags probe_access_flags_mips #define probe_access probe_access_mips #define tlb_vaddr_to_host tlb_vaddr_to_host_mips #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips #define cpu_ldub_data_ra cpu_ldub_data_ra_mips #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips -#define cpu_lduw_data_ra cpu_lduw_data_ra_mips -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips -#define cpu_ldl_data_ra cpu_ldl_data_ra_mips -#define cpu_ldq_data_ra cpu_ldq_data_ra_mips +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips #define cpu_ldub_data cpu_ldub_data_mips #define cpu_ldsb_data cpu_ldsb_data_mips -#define cpu_lduw_data cpu_lduw_data_mips -#define cpu_ldsw_data cpu_ldsw_data_mips -#define cpu_ldl_data cpu_ldl_data_mips -#define cpu_ldq_data cpu_ldq_data_mips +#define cpu_lduw_be_data cpu_lduw_be_data_mips +#define cpu_lduw_le_data cpu_lduw_le_data_mips +#define cpu_ldsw_be_data cpu_ldsw_be_data_mips +#define cpu_ldsw_le_data cpu_ldsw_le_data_mips +#define cpu_ldl_be_data cpu_ldl_be_data_mips +#define cpu_ldl_le_data cpu_ldl_le_data_mips +#define cpu_ldq_le_data cpu_ldq_le_data_mips +#define cpu_ldq_be_data cpu_ldq_be_data_mips #define helper_ret_stb_mmu helper_ret_stb_mmu_mips #define helper_le_stw_mmu helper_le_stw_mmu_mips #define helper_be_stw_mmu helper_be_stw_mmu_mips @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_mips #define helper_be_stq_mmu helper_be_stq_mmu_mips #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips #define cpu_stb_data_ra cpu_stb_data_ra_mips -#define cpu_stw_data_ra cpu_stw_data_ra_mips -#define cpu_stl_data_ra cpu_stl_data_ra_mips -#define cpu_stq_data_ra cpu_stq_data_ra_mips +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips #define cpu_stb_data cpu_stb_data_mips -#define cpu_stw_data cpu_stw_data_mips -#define cpu_stl_data cpu_stl_data_mips -#define cpu_stq_data cpu_stq_data_mips +#define cpu_stw_be_data cpu_stw_be_data_mips +#define cpu_stw_le_data cpu_stw_le_data_mips +#define cpu_stl_be_data cpu_stl_be_data_mips +#define cpu_stl_le_data cpu_stl_le_data_mips +#define cpu_stq_be_data cpu_stq_be_data_mips +#define cpu_stq_le_data cpu_stq_le_data_mips #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_mips #define cpu_ldl_code cpu_ldl_code_mips #define cpu_ldq_code cpu_ldq_code_mips +#define cpu_interrupt_handler cpu_interrupt_handler_mips #define helper_div_i32 helper_div_i32_mips #define helper_rem_i32 helper_rem_i32_mips #define helper_divu_i32 helper_divu_i32_mips @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips #define helper_gvec_sar32i helper_gvec_sar32i_mips #define helper_gvec_sar64i helper_gvec_sar64i_mips +#define helper_gvec_rotl8i helper_gvec_rotl8i_mips +#define helper_gvec_rotl16i helper_gvec_rotl16i_mips +#define helper_gvec_rotl32i helper_gvec_rotl32i_mips +#define helper_gvec_rotl64i helper_gvec_rotl64i_mips #define helper_gvec_shl8v helper_gvec_shl8v_mips #define helper_gvec_shl16v helper_gvec_shl16v_mips #define helper_gvec_shl32v helper_gvec_shl32v_mips @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_mips #define helper_gvec_sar32v helper_gvec_sar32v_mips #define helper_gvec_sar64v helper_gvec_sar64v_mips +#define helper_gvec_rotl8v helper_gvec_rotl8v_mips +#define helper_gvec_rotl16v helper_gvec_rotl16v_mips +#define helper_gvec_rotl32v helper_gvec_rotl32v_mips +#define helper_gvec_rotl64v helper_gvec_rotl64v_mips +#define helper_gvec_rotr8v helper_gvec_rotr8v_mips +#define helper_gvec_rotr16v helper_gvec_rotr16v_mips +#define helper_gvec_rotr32v helper_gvec_rotr32v_mips +#define helper_gvec_rotr64v helper_gvec_rotr64v_mips #define helper_gvec_eq8 helper_gvec_eq8_mips #define helper_gvec_ne8 helper_gvec_ne8_mips #define helper_gvec_lt8 helper_gvec_lt8_mips @@ -1677,7 +1731,6 @@ #define helper_rddsp helper_rddsp_mips #define helper_cfc1 helper_cfc1_mips #define helper_ctc1 helper_ctc1_mips -#define ieee_ex_to_mips ieee_ex_to_mips_mips #define helper_float_sqrt_d helper_float_sqrt_d_mips #define helper_float_sqrt_s helper_float_sqrt_s_mips #define helper_float_cvtd_s helper_float_cvtd_s_mips @@ -2232,23 +2285,59 @@ #define helper_msa_srlri_df helper_msa_srlri_df_mips #define helper_msa_binsli_df helper_msa_binsli_df_mips #define helper_msa_binsri_df helper_msa_binsri_df_mips -#define helper_msa_subv_df helper_msa_subv_df_mips -#define helper_msa_subs_s_df helper_msa_subs_s_df_mips -#define helper_msa_subs_u_df helper_msa_subs_u_df_mips -#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips -#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips -#define helper_msa_mulv_df helper_msa_mulv_df_mips -#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips -#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips +#define helper_msa_subv_b helper_msa_subv_b_mips +#define helper_msa_subv_h helper_msa_subv_h_mips +#define helper_msa_subv_w helper_msa_subv_w_mips +#define helper_msa_subv_d helper_msa_subv_d_mips +#define helper_msa_subs_s_b helper_msa_subs_s_b_mips +#define helper_msa_subs_s_h helper_msa_subs_s_h_mips +#define helper_msa_subs_s_w helper_msa_subs_s_w_mips +#define helper_msa_subs_s_d helper_msa_subs_s_d_mips +#define helper_msa_subs_u_b helper_msa_subs_u_b_mips +#define helper_msa_subs_u_h helper_msa_subs_u_h_mips +#define helper_msa_subs_u_w helper_msa_subs_u_w_mips +#define helper_msa_subs_u_d helper_msa_subs_u_d_mips +#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips +#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips +#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips +#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips +#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips +#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips +#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips +#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips +#define helper_msa_mulv_b helper_msa_mulv_b_mips +#define helper_msa_mulv_h helper_msa_mulv_h_mips +#define helper_msa_mulv_w helper_msa_mulv_w_mips +#define helper_msa_mulv_d helper_msa_mulv_d_mips +#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips +#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips +#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips +#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips +#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips +#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips #define helper_msa_mul_q_df helper_msa_mul_q_df_mips #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips #define helper_msa_sld_df helper_msa_sld_df_mips -#define helper_msa_maddv_df helper_msa_maddv_df_mips -#define helper_msa_msubv_df helper_msa_msubv_df_mips -#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips -#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips -#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips -#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips +#define helper_msa_maddv_b helper_msa_maddv_b_mips +#define helper_msa_maddv_h helper_msa_maddv_h_mips +#define helper_msa_maddv_w helper_msa_maddv_w_mips +#define helper_msa_maddv_d helper_msa_maddv_d_mips +#define helper_msa_msubv_b helper_msa_msubv_b_mips +#define helper_msa_msubv_h helper_msa_msubv_h_mips +#define helper_msa_msubv_w helper_msa_msubv_w_mips +#define helper_msa_msubv_d helper_msa_msubv_d_mips +#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips +#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips +#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips +#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips +#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips +#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips +#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips +#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips +#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips +#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips +#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips +#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips #define helper_msa_binsl_df helper_msa_binsl_df_mips #define helper_msa_binsr_df helper_msa_binsr_df_mips #define helper_msa_madd_q_df helper_msa_madd_q_df_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 367c6b7e79..76990196b2 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips64 #define tcg_gen_st_i64 tcg_gen_st_i64_mips64 +#define tcg_gen_add_i64 tcg_gen_add_i64_mips64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips64 #define cpu_icount_to_ns cpu_icount_to_ns_mips64 #define cpu_is_stopped cpu_is_stopped_mips64 #define cpu_get_ticks cpu_get_ticks_mips64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_mips64 #define floatx80_mul floatx80_mul_mips64 #define floatx80_div floatx80_div_mips64 +#define floatx80_modrem floatx80_modrem_mips64 +#define floatx80_mod floatx80_mod_mips64 #define floatx80_rem floatx80_rem_mips64 #define floatx80_sqrt floatx80_sqrt_mips64 #define floatx80_eq floatx80_eq_mips64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_mips64 #define tcg_gen_shri_vec tcg_gen_shri_vec_mips64 #define tcg_gen_sari_vec tcg_gen_sari_vec_mips64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips64 #define tcg_gen_add_vec tcg_gen_add_vec_mips64 #define tcg_gen_sub_vec tcg_gen_sub_vec_mips64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips64 #define tcg_gen_shls_vec tcg_gen_shls_vec_mips64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips64 #define tcg_gen_sars_vec tcg_gen_sars_vec_mips64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips64 #define tb_htable_lookup tb_htable_lookup_mips64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_mips64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips64 #define tlb_init tlb_init_mips64 +#define tlb_destroy tlb_destroy_mips64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips64 #define tlb_flush tlb_flush_mips64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_mips64 #define get_page_addr_code_hostp get_page_addr_code_hostp_mips64 #define get_page_addr_code get_page_addr_code_mips64 +#define probe_access_flags probe_access_flags_mips64 #define probe_access probe_access_mips64 #define tlb_vaddr_to_host tlb_vaddr_to_host_mips64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips64 #define cpu_ldub_data_ra cpu_ldub_data_ra_mips64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_mips64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_mips64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_mips64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips64 #define cpu_ldub_data cpu_ldub_data_mips64 #define cpu_ldsb_data cpu_ldsb_data_mips64 -#define cpu_lduw_data cpu_lduw_data_mips64 -#define cpu_ldsw_data cpu_ldsw_data_mips64 -#define cpu_ldl_data cpu_ldl_data_mips64 -#define cpu_ldq_data cpu_ldq_data_mips64 +#define cpu_lduw_be_data cpu_lduw_be_data_mips64 +#define cpu_lduw_le_data cpu_lduw_le_data_mips64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_mips64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_mips64 +#define cpu_ldl_be_data cpu_ldl_be_data_mips64 +#define cpu_ldl_le_data cpu_ldl_le_data_mips64 +#define cpu_ldq_le_data cpu_ldq_le_data_mips64 +#define cpu_ldq_be_data cpu_ldq_be_data_mips64 #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64 #define helper_le_stw_mmu helper_le_stw_mmu_mips64 #define helper_be_stw_mmu helper_be_stw_mmu_mips64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_mips64 #define helper_be_stq_mmu helper_be_stq_mmu_mips64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips64 #define cpu_stb_data_ra cpu_stb_data_ra_mips64 -#define cpu_stw_data_ra cpu_stw_data_ra_mips64 -#define cpu_stl_data_ra cpu_stl_data_ra_mips64 -#define cpu_stq_data_ra cpu_stq_data_ra_mips64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips64 #define cpu_stb_data cpu_stb_data_mips64 -#define cpu_stw_data cpu_stw_data_mips64 -#define cpu_stl_data cpu_stl_data_mips64 -#define cpu_stq_data cpu_stq_data_mips64 +#define cpu_stw_be_data cpu_stw_be_data_mips64 +#define cpu_stw_le_data cpu_stw_le_data_mips64 +#define cpu_stl_be_data cpu_stl_be_data_mips64 +#define cpu_stl_le_data cpu_stl_le_data_mips64 +#define cpu_stq_be_data cpu_stq_be_data_mips64 +#define cpu_stq_le_data cpu_stq_le_data_mips64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_mips64 #define cpu_ldl_code cpu_ldl_code_mips64 #define cpu_ldq_code cpu_ldq_code_mips64 +#define cpu_interrupt_handler cpu_interrupt_handler_mips64 #define helper_div_i32 helper_div_i32_mips64 #define helper_rem_i32 helper_rem_i32_mips64 #define helper_divu_i32 helper_divu_i32_mips64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips64 #define helper_gvec_sar32i helper_gvec_sar32i_mips64 #define helper_gvec_sar64i helper_gvec_sar64i_mips64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_mips64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_mips64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_mips64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_mips64 #define helper_gvec_shl8v helper_gvec_shl8v_mips64 #define helper_gvec_shl16v helper_gvec_shl16v_mips64 #define helper_gvec_shl32v helper_gvec_shl32v_mips64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_mips64 #define helper_gvec_sar32v helper_gvec_sar32v_mips64 #define helper_gvec_sar64v helper_gvec_sar64v_mips64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_mips64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_mips64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_mips64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_mips64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_mips64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64 #define helper_gvec_eq8 helper_gvec_eq8_mips64 #define helper_gvec_ne8 helper_gvec_ne8_mips64 #define helper_gvec_lt8 helper_gvec_lt8_mips64 @@ -1677,7 +1731,6 @@ #define helper_rddsp helper_rddsp_mips64 #define helper_cfc1 helper_cfc1_mips64 #define helper_ctc1 helper_ctc1_mips64 -#define ieee_ex_to_mips ieee_ex_to_mips_mips64 #define helper_float_sqrt_d helper_float_sqrt_d_mips64 #define helper_float_sqrt_s helper_float_sqrt_s_mips64 #define helper_float_cvtd_s helper_float_cvtd_s_mips64 @@ -2232,23 +2285,59 @@ #define helper_msa_srlri_df helper_msa_srlri_df_mips64 #define helper_msa_binsli_df helper_msa_binsli_df_mips64 #define helper_msa_binsri_df helper_msa_binsri_df_mips64 -#define helper_msa_subv_df helper_msa_subv_df_mips64 -#define helper_msa_subs_s_df helper_msa_subs_s_df_mips64 -#define helper_msa_subs_u_df helper_msa_subs_u_df_mips64 -#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips64 -#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips64 -#define helper_msa_mulv_df helper_msa_mulv_df_mips64 -#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips64 -#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips64 +#define helper_msa_subv_b helper_msa_subv_b_mips64 +#define helper_msa_subv_h helper_msa_subv_h_mips64 +#define helper_msa_subv_w helper_msa_subv_w_mips64 +#define helper_msa_subv_d helper_msa_subv_d_mips64 +#define helper_msa_subs_s_b helper_msa_subs_s_b_mips64 +#define helper_msa_subs_s_h helper_msa_subs_s_h_mips64 +#define helper_msa_subs_s_w helper_msa_subs_s_w_mips64 +#define helper_msa_subs_s_d helper_msa_subs_s_d_mips64 +#define helper_msa_subs_u_b helper_msa_subs_u_b_mips64 +#define helper_msa_subs_u_h helper_msa_subs_u_h_mips64 +#define helper_msa_subs_u_w helper_msa_subs_u_w_mips64 +#define helper_msa_subs_u_d helper_msa_subs_u_d_mips64 +#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips64 +#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips64 +#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips64 +#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips64 +#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips64 +#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips64 +#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips64 +#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips64 +#define helper_msa_mulv_b helper_msa_mulv_b_mips64 +#define helper_msa_mulv_h helper_msa_mulv_h_mips64 +#define helper_msa_mulv_w helper_msa_mulv_w_mips64 +#define helper_msa_mulv_d helper_msa_mulv_d_mips64 +#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips64 +#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips64 +#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips64 +#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips64 +#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips64 +#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips64 #define helper_msa_mul_q_df helper_msa_mul_q_df_mips64 #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips64 #define helper_msa_sld_df helper_msa_sld_df_mips64 -#define helper_msa_maddv_df helper_msa_maddv_df_mips64 -#define helper_msa_msubv_df helper_msa_msubv_df_mips64 -#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips64 -#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips64 -#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips64 -#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips64 +#define helper_msa_maddv_b helper_msa_maddv_b_mips64 +#define helper_msa_maddv_h helper_msa_maddv_h_mips64 +#define helper_msa_maddv_w helper_msa_maddv_w_mips64 +#define helper_msa_maddv_d helper_msa_maddv_d_mips64 +#define helper_msa_msubv_b helper_msa_msubv_b_mips64 +#define helper_msa_msubv_h helper_msa_msubv_h_mips64 +#define helper_msa_msubv_w helper_msa_msubv_w_mips64 +#define helper_msa_msubv_d helper_msa_msubv_d_mips64 +#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips64 +#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips64 +#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips64 +#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips64 +#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips64 +#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips64 +#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips64 +#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips64 +#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips64 +#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips64 +#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips64 +#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips64 #define helper_msa_binsl_df helper_msa_binsl_df_mips64 #define helper_msa_binsr_df helper_msa_binsr_df_mips64 #define helper_msa_madd_q_df helper_msa_madd_q_df_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 1c3f8ca26f..d8c1ac16b7 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips64el #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips64el #define tcg_gen_st_i64 tcg_gen_st_i64_mips64el +#define tcg_gen_add_i64 tcg_gen_add_i64_mips64el +#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips64el #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips64el +#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips64el #define cpu_icount_to_ns cpu_icount_to_ns_mips64el #define cpu_is_stopped cpu_is_stopped_mips64el #define cpu_get_ticks cpu_get_ticks_mips64el @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_mips64el #define floatx80_mul floatx80_mul_mips64el #define floatx80_div floatx80_div_mips64el +#define floatx80_modrem floatx80_modrem_mips64el +#define floatx80_mod floatx80_mod_mips64el #define floatx80_rem floatx80_rem_mips64el #define floatx80_sqrt floatx80_sqrt_mips64el #define floatx80_eq floatx80_eq_mips64el @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips64el #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips64el #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips64el +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips64el #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips64el #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips64el #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips64el @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips64el #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips64el #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips64el +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips64el +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips64el #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips64el +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips64el +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips64el #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips64el #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips64el #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips64el +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips64el #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips64el #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips64el #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips64el +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips64el +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips64el #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips64el #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips64el #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips64el @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_mips64el #define tcg_gen_shri_vec tcg_gen_shri_vec_mips64el #define tcg_gen_sari_vec tcg_gen_sari_vec_mips64el +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips64el +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips64el #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips64el #define tcg_gen_add_vec tcg_gen_add_vec_mips64el #define tcg_gen_sub_vec tcg_gen_sub_vec_mips64el @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips64el #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips64el #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips64el +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips64el +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips64el #define tcg_gen_shls_vec tcg_gen_shls_vec_mips64el #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips64el #define tcg_gen_sars_vec tcg_gen_sars_vec_mips64el +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips64el #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips64el #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips64el #define tb_htable_lookup tb_htable_lookup_mips64el @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_mips64el #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips64el #define tlb_init tlb_init_mips64el +#define tlb_destroy tlb_destroy_mips64el #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips64el #define tlb_flush tlb_flush_mips64el #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips64el @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_mips64el #define get_page_addr_code_hostp get_page_addr_code_hostp_mips64el #define get_page_addr_code get_page_addr_code_mips64el +#define probe_access_flags probe_access_flags_mips64el #define probe_access probe_access_mips64el #define tlb_vaddr_to_host tlb_vaddr_to_host_mips64el #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips64el @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips64el #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips64el #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips64el -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips64el -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips64el -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips64el -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips64el +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips64el +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips64el +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips64el +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips64el +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips64el +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips64el +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips64el +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips64el #define cpu_ldub_data_ra cpu_ldub_data_ra_mips64el #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips64el -#define cpu_lduw_data_ra cpu_lduw_data_ra_mips64el -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips64el -#define cpu_ldl_data_ra cpu_ldl_data_ra_mips64el -#define cpu_ldq_data_ra cpu_ldq_data_ra_mips64el +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips64el +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips64el +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips64el +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips64el +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips64el +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips64el +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips64el +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips64el #define cpu_ldub_data cpu_ldub_data_mips64el #define cpu_ldsb_data cpu_ldsb_data_mips64el -#define cpu_lduw_data cpu_lduw_data_mips64el -#define cpu_ldsw_data cpu_ldsw_data_mips64el -#define cpu_ldl_data cpu_ldl_data_mips64el -#define cpu_ldq_data cpu_ldq_data_mips64el +#define cpu_lduw_be_data cpu_lduw_be_data_mips64el +#define cpu_lduw_le_data cpu_lduw_le_data_mips64el +#define cpu_ldsw_be_data cpu_ldsw_be_data_mips64el +#define cpu_ldsw_le_data cpu_ldsw_le_data_mips64el +#define cpu_ldl_be_data cpu_ldl_be_data_mips64el +#define cpu_ldl_le_data cpu_ldl_le_data_mips64el +#define cpu_ldq_le_data cpu_ldq_le_data_mips64el +#define cpu_ldq_be_data cpu_ldq_be_data_mips64el #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64el #define helper_le_stw_mmu helper_le_stw_mmu_mips64el #define helper_be_stw_mmu helper_be_stw_mmu_mips64el @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_mips64el #define helper_be_stq_mmu helper_be_stq_mmu_mips64el #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips64el -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips64el -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips64el -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips64el +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips64el +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips64el +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips64el +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips64el +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips64el +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips64el #define cpu_stb_data_ra cpu_stb_data_ra_mips64el -#define cpu_stw_data_ra cpu_stw_data_ra_mips64el -#define cpu_stl_data_ra cpu_stl_data_ra_mips64el -#define cpu_stq_data_ra cpu_stq_data_ra_mips64el +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips64el +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips64el +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips64el +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips64el +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips64el +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips64el #define cpu_stb_data cpu_stb_data_mips64el -#define cpu_stw_data cpu_stw_data_mips64el -#define cpu_stl_data cpu_stl_data_mips64el -#define cpu_stq_data cpu_stq_data_mips64el +#define cpu_stw_be_data cpu_stw_be_data_mips64el +#define cpu_stw_le_data cpu_stw_le_data_mips64el +#define cpu_stl_be_data cpu_stl_be_data_mips64el +#define cpu_stl_le_data cpu_stl_le_data_mips64el +#define cpu_stq_be_data cpu_stq_be_data_mips64el +#define cpu_stq_le_data cpu_stq_le_data_mips64el #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips64el #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips64el #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips64el @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_mips64el #define cpu_ldl_code cpu_ldl_code_mips64el #define cpu_ldq_code cpu_ldq_code_mips64el +#define cpu_interrupt_handler cpu_interrupt_handler_mips64el #define helper_div_i32 helper_div_i32_mips64el #define helper_rem_i32 helper_rem_i32_mips64el #define helper_divu_i32 helper_divu_i32_mips64el @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips64el #define helper_gvec_sar32i helper_gvec_sar32i_mips64el #define helper_gvec_sar64i helper_gvec_sar64i_mips64el +#define helper_gvec_rotl8i helper_gvec_rotl8i_mips64el +#define helper_gvec_rotl16i helper_gvec_rotl16i_mips64el +#define helper_gvec_rotl32i helper_gvec_rotl32i_mips64el +#define helper_gvec_rotl64i helper_gvec_rotl64i_mips64el #define helper_gvec_shl8v helper_gvec_shl8v_mips64el #define helper_gvec_shl16v helper_gvec_shl16v_mips64el #define helper_gvec_shl32v helper_gvec_shl32v_mips64el @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_mips64el #define helper_gvec_sar32v helper_gvec_sar32v_mips64el #define helper_gvec_sar64v helper_gvec_sar64v_mips64el +#define helper_gvec_rotl8v helper_gvec_rotl8v_mips64el +#define helper_gvec_rotl16v helper_gvec_rotl16v_mips64el +#define helper_gvec_rotl32v helper_gvec_rotl32v_mips64el +#define helper_gvec_rotl64v helper_gvec_rotl64v_mips64el +#define helper_gvec_rotr8v helper_gvec_rotr8v_mips64el +#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64el +#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64el +#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64el #define helper_gvec_eq8 helper_gvec_eq8_mips64el #define helper_gvec_ne8 helper_gvec_ne8_mips64el #define helper_gvec_lt8 helper_gvec_lt8_mips64el @@ -1677,7 +1731,6 @@ #define helper_rddsp helper_rddsp_mips64el #define helper_cfc1 helper_cfc1_mips64el #define helper_ctc1 helper_ctc1_mips64el -#define ieee_ex_to_mips ieee_ex_to_mips_mips64el #define helper_float_sqrt_d helper_float_sqrt_d_mips64el #define helper_float_sqrt_s helper_float_sqrt_s_mips64el #define helper_float_cvtd_s helper_float_cvtd_s_mips64el @@ -2232,23 +2285,59 @@ #define helper_msa_srlri_df helper_msa_srlri_df_mips64el #define helper_msa_binsli_df helper_msa_binsli_df_mips64el #define helper_msa_binsri_df helper_msa_binsri_df_mips64el -#define helper_msa_subv_df helper_msa_subv_df_mips64el -#define helper_msa_subs_s_df helper_msa_subs_s_df_mips64el -#define helper_msa_subs_u_df helper_msa_subs_u_df_mips64el -#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips64el -#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips64el -#define helper_msa_mulv_df helper_msa_mulv_df_mips64el -#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips64el -#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips64el +#define helper_msa_subv_b helper_msa_subv_b_mips64el +#define helper_msa_subv_h helper_msa_subv_h_mips64el +#define helper_msa_subv_w helper_msa_subv_w_mips64el +#define helper_msa_subv_d helper_msa_subv_d_mips64el +#define helper_msa_subs_s_b helper_msa_subs_s_b_mips64el +#define helper_msa_subs_s_h helper_msa_subs_s_h_mips64el +#define helper_msa_subs_s_w helper_msa_subs_s_w_mips64el +#define helper_msa_subs_s_d helper_msa_subs_s_d_mips64el +#define helper_msa_subs_u_b helper_msa_subs_u_b_mips64el +#define helper_msa_subs_u_h helper_msa_subs_u_h_mips64el +#define helper_msa_subs_u_w helper_msa_subs_u_w_mips64el +#define helper_msa_subs_u_d helper_msa_subs_u_d_mips64el +#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips64el +#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips64el +#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips64el +#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips64el +#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips64el +#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips64el +#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips64el +#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips64el +#define helper_msa_mulv_b helper_msa_mulv_b_mips64el +#define helper_msa_mulv_h helper_msa_mulv_h_mips64el +#define helper_msa_mulv_w helper_msa_mulv_w_mips64el +#define helper_msa_mulv_d helper_msa_mulv_d_mips64el +#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips64el +#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips64el +#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips64el +#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips64el +#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips64el +#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips64el #define helper_msa_mul_q_df helper_msa_mul_q_df_mips64el #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips64el #define helper_msa_sld_df helper_msa_sld_df_mips64el -#define helper_msa_maddv_df helper_msa_maddv_df_mips64el -#define helper_msa_msubv_df helper_msa_msubv_df_mips64el -#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips64el -#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips64el -#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips64el -#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips64el +#define helper_msa_maddv_b helper_msa_maddv_b_mips64el +#define helper_msa_maddv_h helper_msa_maddv_h_mips64el +#define helper_msa_maddv_w helper_msa_maddv_w_mips64el +#define helper_msa_maddv_d helper_msa_maddv_d_mips64el +#define helper_msa_msubv_b helper_msa_msubv_b_mips64el +#define helper_msa_msubv_h helper_msa_msubv_h_mips64el +#define helper_msa_msubv_w helper_msa_msubv_w_mips64el +#define helper_msa_msubv_d helper_msa_msubv_d_mips64el +#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips64el +#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips64el +#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips64el +#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips64el +#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips64el +#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips64el +#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips64el +#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips64el +#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips64el +#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips64el +#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips64el +#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips64el #define helper_msa_binsl_df helper_msa_binsl_df_mips64el #define helper_msa_binsr_df helper_msa_binsr_df_mips64el #define helper_msa_madd_q_df helper_msa_madd_q_df_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index 511cfcfb0c..5e47f6184d 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_mipsel #define tcg_gen_shr_i64 tcg_gen_shr_i64_mipsel #define tcg_gen_st_i64 tcg_gen_st_i64_mipsel +#define tcg_gen_add_i64 tcg_gen_add_i64_mipsel +#define tcg_gen_sub_i64 tcg_gen_sub_i64_mipsel #define tcg_gen_xor_i64 tcg_gen_xor_i64_mipsel +#define tcg_gen_neg_i64 tcg_gen_neg_i64_mipsel #define cpu_icount_to_ns cpu_icount_to_ns_mipsel #define cpu_is_stopped cpu_is_stopped_mipsel #define cpu_get_ticks cpu_get_ticks_mipsel @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_mipsel #define floatx80_mul floatx80_mul_mipsel #define floatx80_div floatx80_div_mipsel +#define floatx80_modrem floatx80_modrem_mipsel +#define floatx80_mod floatx80_mod_mipsel #define floatx80_rem floatx80_rem_mipsel #define floatx80_sqrt floatx80_sqrt_mipsel #define floatx80_eq floatx80_eq_mipsel @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mipsel #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mipsel #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mipsel +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mipsel #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mipsel #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mipsel #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mipsel @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mipsel #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mipsel #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mipsel +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mipsel +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mipsel #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mipsel +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mipsel +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mipsel #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mipsel #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mipsel #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mipsel +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mipsel #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mipsel #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mipsel #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mipsel +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mipsel +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mipsel #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mipsel #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mipsel #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mipsel @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_mipsel #define tcg_gen_shri_vec tcg_gen_shri_vec_mipsel #define tcg_gen_sari_vec tcg_gen_sari_vec_mipsel +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mipsel +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mipsel #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mipsel #define tcg_gen_add_vec tcg_gen_add_vec_mipsel #define tcg_gen_sub_vec tcg_gen_sub_vec_mipsel @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mipsel #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mipsel #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mipsel +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mipsel +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mipsel #define tcg_gen_shls_vec tcg_gen_shls_vec_mipsel #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mipsel #define tcg_gen_sars_vec tcg_gen_sars_vec_mipsel +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mipsel #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mipsel #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mipsel #define tb_htable_lookup tb_htable_lookup_mipsel @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_mipsel #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mipsel #define tlb_init tlb_init_mipsel +#define tlb_destroy tlb_destroy_mipsel #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mipsel #define tlb_flush tlb_flush_mipsel #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mipsel @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_mipsel #define get_page_addr_code_hostp get_page_addr_code_hostp_mipsel #define get_page_addr_code get_page_addr_code_mipsel +#define probe_access_flags probe_access_flags_mipsel #define probe_access probe_access_mipsel #define tlb_vaddr_to_host tlb_vaddr_to_host_mipsel #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mipsel @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mipsel #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mipsel #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mipsel -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mipsel -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mipsel -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mipsel -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mipsel +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mipsel +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mipsel +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mipsel +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mipsel +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mipsel +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mipsel +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mipsel +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mipsel #define cpu_ldub_data_ra cpu_ldub_data_ra_mipsel #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mipsel -#define cpu_lduw_data_ra cpu_lduw_data_ra_mipsel -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mipsel -#define cpu_ldl_data_ra cpu_ldl_data_ra_mipsel -#define cpu_ldq_data_ra cpu_ldq_data_ra_mipsel +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mipsel +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mipsel +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mipsel +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mipsel +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mipsel +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mipsel +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mipsel +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mipsel #define cpu_ldub_data cpu_ldub_data_mipsel #define cpu_ldsb_data cpu_ldsb_data_mipsel -#define cpu_lduw_data cpu_lduw_data_mipsel -#define cpu_ldsw_data cpu_ldsw_data_mipsel -#define cpu_ldl_data cpu_ldl_data_mipsel -#define cpu_ldq_data cpu_ldq_data_mipsel +#define cpu_lduw_be_data cpu_lduw_be_data_mipsel +#define cpu_lduw_le_data cpu_lduw_le_data_mipsel +#define cpu_ldsw_be_data cpu_ldsw_be_data_mipsel +#define cpu_ldsw_le_data cpu_ldsw_le_data_mipsel +#define cpu_ldl_be_data cpu_ldl_be_data_mipsel +#define cpu_ldl_le_data cpu_ldl_le_data_mipsel +#define cpu_ldq_le_data cpu_ldq_le_data_mipsel +#define cpu_ldq_be_data cpu_ldq_be_data_mipsel #define helper_ret_stb_mmu helper_ret_stb_mmu_mipsel #define helper_le_stw_mmu helper_le_stw_mmu_mipsel #define helper_be_stw_mmu helper_be_stw_mmu_mipsel @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_mipsel #define helper_be_stq_mmu helper_be_stq_mmu_mipsel #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mipsel -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mipsel -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mipsel -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mipsel +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mipsel +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mipsel +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mipsel +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mipsel +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mipsel +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mipsel #define cpu_stb_data_ra cpu_stb_data_ra_mipsel -#define cpu_stw_data_ra cpu_stw_data_ra_mipsel -#define cpu_stl_data_ra cpu_stl_data_ra_mipsel -#define cpu_stq_data_ra cpu_stq_data_ra_mipsel +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mipsel +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mipsel +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mipsel +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mipsel +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mipsel +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mipsel #define cpu_stb_data cpu_stb_data_mipsel -#define cpu_stw_data cpu_stw_data_mipsel -#define cpu_stl_data cpu_stl_data_mipsel -#define cpu_stq_data cpu_stq_data_mipsel +#define cpu_stw_be_data cpu_stw_be_data_mipsel +#define cpu_stw_le_data cpu_stw_le_data_mipsel +#define cpu_stl_be_data cpu_stl_be_data_mipsel +#define cpu_stl_le_data cpu_stl_le_data_mipsel +#define cpu_stq_be_data cpu_stq_be_data_mipsel +#define cpu_stq_le_data cpu_stq_le_data_mipsel #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mipsel #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mipsel #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mipsel @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_mipsel #define cpu_ldl_code cpu_ldl_code_mipsel #define cpu_ldq_code cpu_ldq_code_mipsel +#define cpu_interrupt_handler cpu_interrupt_handler_mipsel #define helper_div_i32 helper_div_i32_mipsel #define helper_rem_i32 helper_rem_i32_mipsel #define helper_divu_i32 helper_divu_i32_mipsel @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mipsel #define helper_gvec_sar32i helper_gvec_sar32i_mipsel #define helper_gvec_sar64i helper_gvec_sar64i_mipsel +#define helper_gvec_rotl8i helper_gvec_rotl8i_mipsel +#define helper_gvec_rotl16i helper_gvec_rotl16i_mipsel +#define helper_gvec_rotl32i helper_gvec_rotl32i_mipsel +#define helper_gvec_rotl64i helper_gvec_rotl64i_mipsel #define helper_gvec_shl8v helper_gvec_shl8v_mipsel #define helper_gvec_shl16v helper_gvec_shl16v_mipsel #define helper_gvec_shl32v helper_gvec_shl32v_mipsel @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_mipsel #define helper_gvec_sar32v helper_gvec_sar32v_mipsel #define helper_gvec_sar64v helper_gvec_sar64v_mipsel +#define helper_gvec_rotl8v helper_gvec_rotl8v_mipsel +#define helper_gvec_rotl16v helper_gvec_rotl16v_mipsel +#define helper_gvec_rotl32v helper_gvec_rotl32v_mipsel +#define helper_gvec_rotl64v helper_gvec_rotl64v_mipsel +#define helper_gvec_rotr8v helper_gvec_rotr8v_mipsel +#define helper_gvec_rotr16v helper_gvec_rotr16v_mipsel +#define helper_gvec_rotr32v helper_gvec_rotr32v_mipsel +#define helper_gvec_rotr64v helper_gvec_rotr64v_mipsel #define helper_gvec_eq8 helper_gvec_eq8_mipsel #define helper_gvec_ne8 helper_gvec_ne8_mipsel #define helper_gvec_lt8 helper_gvec_lt8_mipsel @@ -1677,7 +1731,6 @@ #define helper_rddsp helper_rddsp_mipsel #define helper_cfc1 helper_cfc1_mipsel #define helper_ctc1 helper_ctc1_mipsel -#define ieee_ex_to_mips ieee_ex_to_mips_mipsel #define helper_float_sqrt_d helper_float_sqrt_d_mipsel #define helper_float_sqrt_s helper_float_sqrt_s_mipsel #define helper_float_cvtd_s helper_float_cvtd_s_mipsel @@ -2232,23 +2285,59 @@ #define helper_msa_srlri_df helper_msa_srlri_df_mipsel #define helper_msa_binsli_df helper_msa_binsli_df_mipsel #define helper_msa_binsri_df helper_msa_binsri_df_mipsel -#define helper_msa_subv_df helper_msa_subv_df_mipsel -#define helper_msa_subs_s_df helper_msa_subs_s_df_mipsel -#define helper_msa_subs_u_df helper_msa_subs_u_df_mipsel -#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mipsel -#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mipsel -#define helper_msa_mulv_df helper_msa_mulv_df_mipsel -#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mipsel -#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mipsel +#define helper_msa_subv_b helper_msa_subv_b_mipsel +#define helper_msa_subv_h helper_msa_subv_h_mipsel +#define helper_msa_subv_w helper_msa_subv_w_mipsel +#define helper_msa_subv_d helper_msa_subv_d_mipsel +#define helper_msa_subs_s_b helper_msa_subs_s_b_mipsel +#define helper_msa_subs_s_h helper_msa_subs_s_h_mipsel +#define helper_msa_subs_s_w helper_msa_subs_s_w_mipsel +#define helper_msa_subs_s_d helper_msa_subs_s_d_mipsel +#define helper_msa_subs_u_b helper_msa_subs_u_b_mipsel +#define helper_msa_subs_u_h helper_msa_subs_u_h_mipsel +#define helper_msa_subs_u_w helper_msa_subs_u_w_mipsel +#define helper_msa_subs_u_d helper_msa_subs_u_d_mipsel +#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mipsel +#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mipsel +#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mipsel +#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mipsel +#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mipsel +#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mipsel +#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mipsel +#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mipsel +#define helper_msa_mulv_b helper_msa_mulv_b_mipsel +#define helper_msa_mulv_h helper_msa_mulv_h_mipsel +#define helper_msa_mulv_w helper_msa_mulv_w_mipsel +#define helper_msa_mulv_d helper_msa_mulv_d_mipsel +#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mipsel +#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mipsel +#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mipsel +#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mipsel +#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mipsel +#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mipsel #define helper_msa_mul_q_df helper_msa_mul_q_df_mipsel #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mipsel #define helper_msa_sld_df helper_msa_sld_df_mipsel -#define helper_msa_maddv_df helper_msa_maddv_df_mipsel -#define helper_msa_msubv_df helper_msa_msubv_df_mipsel -#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mipsel -#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mipsel -#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mipsel -#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mipsel +#define helper_msa_maddv_b helper_msa_maddv_b_mipsel +#define helper_msa_maddv_h helper_msa_maddv_h_mipsel +#define helper_msa_maddv_w helper_msa_maddv_w_mipsel +#define helper_msa_maddv_d helper_msa_maddv_d_mipsel +#define helper_msa_msubv_b helper_msa_msubv_b_mipsel +#define helper_msa_msubv_h helper_msa_msubv_h_mipsel +#define helper_msa_msubv_w helper_msa_msubv_w_mipsel +#define helper_msa_msubv_d helper_msa_msubv_d_mipsel +#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mipsel +#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mipsel +#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mipsel +#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mipsel +#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mipsel +#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mipsel +#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mipsel +#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mipsel +#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mipsel +#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mipsel +#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mipsel +#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mipsel #define helper_msa_binsl_df helper_msa_binsl_df_mipsel #define helper_msa_binsr_df helper_msa_binsr_df_mipsel #define helper_msa_madd_q_df helper_msa_madd_q_df_mipsel diff --git a/qemu/ppc.h b/qemu/ppc.h index 7fd122913d..28ca9753e2 100644 --- a/qemu/ppc.h +++ b/qemu/ppc.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_ppc #define tcg_gen_shr_i64 tcg_gen_shr_i64_ppc #define tcg_gen_st_i64 tcg_gen_st_i64_ppc +#define tcg_gen_add_i64 tcg_gen_add_i64_ppc +#define tcg_gen_sub_i64 tcg_gen_sub_i64_ppc #define tcg_gen_xor_i64 tcg_gen_xor_i64_ppc +#define tcg_gen_neg_i64 tcg_gen_neg_i64_ppc #define cpu_icount_to_ns cpu_icount_to_ns_ppc #define cpu_is_stopped cpu_is_stopped_ppc #define cpu_get_ticks cpu_get_ticks_ppc @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_ppc #define floatx80_mul floatx80_mul_ppc #define floatx80_div floatx80_div_ppc +#define floatx80_modrem floatx80_modrem_ppc +#define floatx80_mod floatx80_mod_ppc #define floatx80_rem floatx80_rem_ppc #define floatx80_sqrt floatx80_sqrt_ppc #define floatx80_eq floatx80_eq_ppc @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_ppc #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_ppc #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_ppc +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_ppc #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_ppc #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_ppc #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_ppc @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_ppc #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_ppc #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_ppc +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_ppc +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_ppc #define tcg_gen_gvec_sari tcg_gen_gvec_sari_ppc +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_ppc +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_ppc #define tcg_gen_gvec_shls tcg_gen_gvec_shls_ppc #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_ppc #define tcg_gen_gvec_sars tcg_gen_gvec_sars_ppc +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_ppc #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_ppc #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_ppc #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_ppc +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_ppc +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_ppc #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_ppc #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_ppc #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_ppc @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_ppc #define tcg_gen_shri_vec tcg_gen_shri_vec_ppc #define tcg_gen_sari_vec tcg_gen_sari_vec_ppc +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_ppc +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_ppc #define tcg_gen_cmp_vec tcg_gen_cmp_vec_ppc #define tcg_gen_add_vec tcg_gen_add_vec_ppc #define tcg_gen_sub_vec tcg_gen_sub_vec_ppc @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_ppc #define tcg_gen_shrv_vec tcg_gen_shrv_vec_ppc #define tcg_gen_sarv_vec tcg_gen_sarv_vec_ppc +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_ppc +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_ppc #define tcg_gen_shls_vec tcg_gen_shls_vec_ppc #define tcg_gen_shrs_vec tcg_gen_shrs_vec_ppc #define tcg_gen_sars_vec tcg_gen_sars_vec_ppc +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_ppc #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_ppc #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_ppc #define tb_htable_lookup tb_htable_lookup_ppc @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_ppc #define cpu_loop_exit_atomic cpu_loop_exit_atomic_ppc #define tlb_init tlb_init_ppc +#define tlb_destroy tlb_destroy_ppc #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_ppc #define tlb_flush tlb_flush_ppc #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_ppc @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_ppc #define get_page_addr_code_hostp get_page_addr_code_hostp_ppc #define get_page_addr_code get_page_addr_code_ppc +#define probe_access_flags probe_access_flags_ppc #define probe_access probe_access_ppc #define tlb_vaddr_to_host tlb_vaddr_to_host_ppc #define helper_ret_ldub_mmu helper_ret_ldub_mmu_ppc @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_ppc #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_ppc #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_ppc -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_ppc -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_ppc -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_ppc -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_ppc +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_ppc +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_ppc +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_ppc +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_ppc +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_ppc +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_ppc +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_ppc +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_ppc #define cpu_ldub_data_ra cpu_ldub_data_ra_ppc #define cpu_ldsb_data_ra cpu_ldsb_data_ra_ppc -#define cpu_lduw_data_ra cpu_lduw_data_ra_ppc -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_ppc -#define cpu_ldl_data_ra cpu_ldl_data_ra_ppc -#define cpu_ldq_data_ra cpu_ldq_data_ra_ppc +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_ppc +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_ppc +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_ppc +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_ppc +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_ppc +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_ppc +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_ppc +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_ppc #define cpu_ldub_data cpu_ldub_data_ppc #define cpu_ldsb_data cpu_ldsb_data_ppc -#define cpu_lduw_data cpu_lduw_data_ppc -#define cpu_ldsw_data cpu_ldsw_data_ppc -#define cpu_ldl_data cpu_ldl_data_ppc -#define cpu_ldq_data cpu_ldq_data_ppc +#define cpu_lduw_be_data cpu_lduw_be_data_ppc +#define cpu_lduw_le_data cpu_lduw_le_data_ppc +#define cpu_ldsw_be_data cpu_ldsw_be_data_ppc +#define cpu_ldsw_le_data cpu_ldsw_le_data_ppc +#define cpu_ldl_be_data cpu_ldl_be_data_ppc +#define cpu_ldl_le_data cpu_ldl_le_data_ppc +#define cpu_ldq_le_data cpu_ldq_le_data_ppc +#define cpu_ldq_be_data cpu_ldq_be_data_ppc #define helper_ret_stb_mmu helper_ret_stb_mmu_ppc #define helper_le_stw_mmu helper_le_stw_mmu_ppc #define helper_be_stw_mmu helper_be_stw_mmu_ppc @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_ppc #define helper_be_stq_mmu helper_be_stq_mmu_ppc #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_ppc -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_ppc -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_ppc -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_ppc +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_ppc +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_ppc +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_ppc +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_ppc +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_ppc +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_ppc #define cpu_stb_data_ra cpu_stb_data_ra_ppc -#define cpu_stw_data_ra cpu_stw_data_ra_ppc -#define cpu_stl_data_ra cpu_stl_data_ra_ppc -#define cpu_stq_data_ra cpu_stq_data_ra_ppc +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_ppc +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_ppc +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_ppc +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_ppc +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_ppc +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_ppc #define cpu_stb_data cpu_stb_data_ppc -#define cpu_stw_data cpu_stw_data_ppc -#define cpu_stl_data cpu_stl_data_ppc -#define cpu_stq_data cpu_stq_data_ppc +#define cpu_stw_be_data cpu_stw_be_data_ppc +#define cpu_stw_le_data cpu_stw_le_data_ppc +#define cpu_stl_be_data cpu_stl_be_data_ppc +#define cpu_stl_le_data cpu_stl_le_data_ppc +#define cpu_stq_be_data cpu_stq_be_data_ppc +#define cpu_stq_le_data cpu_stq_le_data_ppc #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_ppc #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_ppc #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_ppc @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_ppc #define cpu_ldl_code cpu_ldl_code_ppc #define cpu_ldq_code cpu_ldq_code_ppc +#define cpu_interrupt_handler cpu_interrupt_handler_ppc #define helper_div_i32 helper_div_i32_ppc #define helper_rem_i32 helper_rem_i32_ppc #define helper_divu_i32 helper_divu_i32_ppc @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_ppc #define helper_gvec_sar32i helper_gvec_sar32i_ppc #define helper_gvec_sar64i helper_gvec_sar64i_ppc +#define helper_gvec_rotl8i helper_gvec_rotl8i_ppc +#define helper_gvec_rotl16i helper_gvec_rotl16i_ppc +#define helper_gvec_rotl32i helper_gvec_rotl32i_ppc +#define helper_gvec_rotl64i helper_gvec_rotl64i_ppc #define helper_gvec_shl8v helper_gvec_shl8v_ppc #define helper_gvec_shl16v helper_gvec_shl16v_ppc #define helper_gvec_shl32v helper_gvec_shl32v_ppc @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_ppc #define helper_gvec_sar32v helper_gvec_sar32v_ppc #define helper_gvec_sar64v helper_gvec_sar64v_ppc +#define helper_gvec_rotl8v helper_gvec_rotl8v_ppc +#define helper_gvec_rotl16v helper_gvec_rotl16v_ppc +#define helper_gvec_rotl32v helper_gvec_rotl32v_ppc +#define helper_gvec_rotl64v helper_gvec_rotl64v_ppc +#define helper_gvec_rotr8v helper_gvec_rotr8v_ppc +#define helper_gvec_rotr16v helper_gvec_rotr16v_ppc +#define helper_gvec_rotr32v helper_gvec_rotr32v_ppc +#define helper_gvec_rotr64v helper_gvec_rotr64v_ppc #define helper_gvec_eq8 helper_gvec_eq8_ppc #define helper_gvec_ne8 helper_gvec_ne8_ppc #define helper_gvec_lt8 helper_gvec_lt8_ppc @@ -1575,6 +1629,33 @@ #define helper_tbegin helper_tbegin_ppc #define helper_load_dump_spr helper_load_dump_spr_ppc #define helper_store_dump_spr helper_store_dump_spr_ppc +#define store_fpscr store_fpscr_ppc +#define helper_store_fpscr helper_store_fpscr_ppc +#define helper_float_check_status helper_float_check_status_ppc +#define helper_reset_fpstatus helper_reset_fpstatus_ppc +#define helper_fadd helper_fadd_ppc +#define helper_fsub helper_fsub_ppc +#define helper_fmul helper_fmul_ppc +#define helper_fdiv helper_fdiv_ppc +#define helper_fctiw helper_fctiw_ppc +#define helper_fctiwz helper_fctiwz_ppc +#define helper_fctiwuz helper_fctiwuz_ppc +#define helper_fctid helper_fctid_ppc +#define helper_fctidz helper_fctidz_ppc +#define helper_fctidu helper_fctidu_ppc +#define helper_fctiduz helper_fctiduz_ppc +#define helper_fcfid helper_fcfid_ppc +#define helper_fcfids helper_fcfids_ppc +#define helper_fcfidu helper_fcfidu_ppc +#define helper_fcfidus helper_fcfidus_ppc +#define helper_frin helper_frin_ppc +#define helper_friz helper_friz_ppc +#define helper_frip helper_frip_ppc +#define helper_frim helper_frim_ppc +#define helper_fmadd helper_fmadd_ppc +#define helper_fnmadd helper_fnmadd_ppc +#define helper_fmsub helper_fmsub_ppc +#define helper_fnmsub helper_fnmsub_ppc #define helper_hfscr_facility_check helper_hfscr_facility_check_ppc #define helper_fscr_facility_check helper_fscr_facility_check_ppc #define helper_msr_facility_check helper_msr_facility_check_ppc @@ -1726,6 +1807,243 @@ #define ppc_hash32_handle_mmu_fault ppc_hash32_handle_mmu_fault_ppc #define gen_helper_store_booke_tsr gen_helper_store_booke_tsr_ppc #define gen_helper_store_booke_tcr gen_helper_store_booke_tcr_ppc +#define gen_helper_store_fpscr gen_helper_store_fpscr_ppc #define store_booke_tcr store_booke_tcr_ppc #define ppc_hash32_get_phys_page_debug ppc_hash32_get_phys_page_debug_ppc +#define helper_compute_fprf_float128 helper_compute_fprf_float128_ppc +#define helper_compute_fprf_float16 helper_compute_fprf_float16_ppc +#define helper_compute_fprf_float32 helper_compute_fprf_float32_ppc +#define helper_compute_fprf_float64 helper_compute_fprf_float64_ppc +#define helper_efdadd helper_efdadd_ppc +#define helper_efdcfs helper_efdcfs_ppc +#define helper_efdcfsf helper_efdcfsf_ppc +#define helper_efdcfsi helper_efdcfsi_ppc +#define helper_efdcfsid helper_efdcfsid_ppc +#define helper_efdcfuf helper_efdcfuf_ppc +#define helper_efdcfui helper_efdcfui_ppc +#define helper_efdcfuid helper_efdcfuid_ppc +#define helper_efdcmpeq helper_efdcmpeq_ppc +#define helper_efdcmpgt helper_efdcmpgt_ppc +#define helper_efdcmplt helper_efdcmplt_ppc +#define helper_efdctsf helper_efdctsf_ppc +#define helper_efdctsi helper_efdctsi_ppc +#define helper_efdctsidz helper_efdctsidz_ppc +#define helper_efdctsiz helper_efdctsiz_ppc +#define helper_efdctuf helper_efdctuf_ppc +#define helper_efdctui helper_efdctui_ppc +#define helper_efdctuidz helper_efdctuidz_ppc +#define helper_efdctuiz helper_efdctuiz_ppc +#define helper_efddiv helper_efddiv_ppc +#define helper_efdmul helper_efdmul_ppc +#define helper_efdsub helper_efdsub_ppc +#define helper_efdtsteq helper_efdtsteq_ppc +#define helper_efdtstgt helper_efdtstgt_ppc +#define helper_efdtstlt helper_efdtstlt_ppc +#define helper_efsadd helper_efsadd_ppc +#define helper_efscfd helper_efscfd_ppc +#define helper_efscfsf helper_efscfsf_ppc +#define helper_efscfsi helper_efscfsi_ppc +#define helper_efscfuf helper_efscfuf_ppc +#define helper_efscfui helper_efscfui_ppc +#define helper_efscmpeq helper_efscmpeq_ppc +#define helper_efscmpgt helper_efscmpgt_ppc +#define helper_efscmplt helper_efscmplt_ppc +#define helper_efsctsf helper_efsctsf_ppc +#define helper_efsctsi helper_efsctsi_ppc +#define helper_efsctsiz helper_efsctsiz_ppc +#define helper_efsctuf helper_efsctuf_ppc +#define helper_efsctui helper_efsctui_ppc +#define helper_efsctuiz helper_efsctuiz_ppc +#define helper_efsdiv helper_efsdiv_ppc +#define helper_efsmul helper_efsmul_ppc +#define helper_efssub helper_efssub_ppc +#define helper_efststeq helper_efststeq_ppc +#define helper_efststgt helper_efststgt_ppc +#define helper_efststlt helper_efststlt_ppc +#define helper_evfsadd helper_evfsadd_ppc +#define helper_evfscfsf helper_evfscfsf_ppc +#define helper_evfscfsi helper_evfscfsi_ppc +#define helper_evfscfuf helper_evfscfuf_ppc +#define helper_evfscfui helper_evfscfui_ppc +#define helper_evfscmpeq helper_evfscmpeq_ppc +#define helper_evfscmpgt helper_evfscmpgt_ppc +#define helper_evfscmplt helper_evfscmplt_ppc +#define helper_evfsctsf helper_evfsctsf_ppc +#define helper_evfsctsi helper_evfsctsi_ppc +#define helper_evfsctsiz helper_evfsctsiz_ppc +#define helper_evfsctuf helper_evfsctuf_ppc +#define helper_evfsctui helper_evfsctui_ppc +#define helper_evfsctuiz helper_evfsctuiz_ppc +#define helper_evfsdiv helper_evfsdiv_ppc +#define helper_evfsmul helper_evfsmul_ppc +#define helper_evfssub helper_evfssub_ppc +#define helper_evfststeq helper_evfststeq_ppc +#define helper_evfststgt helper_evfststgt_ppc +#define helper_evfststlt helper_evfststlt_ppc +#define helper_fcmpo helper_fcmpo_ppc +#define helper_fcmpu helper_fcmpu_ppc +#define helper_fctiwu helper_fctiwu_ppc +#define helper_fpscr_clrbit helper_fpscr_clrbit_ppc +#define helper_fpscr_setbit helper_fpscr_setbit_ppc +#define helper_fre helper_fre_ppc +#define helper_fres helper_fres_ppc +#define helper_frsp helper_frsp_ppc +#define helper_frsqrte helper_frsqrte_ppc +#define helper_fsel helper_fsel_ppc +#define helper_fsqrt helper_fsqrt_ppc +#define helper_ftdiv helper_ftdiv_ppc +#define helper_ftsqrt helper_ftsqrt_ppc +#define helper_todouble helper_todouble_ppc +#define helper_tosingle helper_tosingle_ppc +#define helper_xsadddp helper_xsadddp_ppc +#define helper_xsaddqp helper_xsaddqp_ppc +#define helper_xsaddsp helper_xsaddsp_ppc +#define helper_xscmpeqdp helper_xscmpeqdp_ppc +#define helper_xscmpexpdp helper_xscmpexpdp_ppc +#define helper_xscmpexpqp helper_xscmpexpqp_ppc +#define helper_xscmpgedp helper_xscmpgedp_ppc +#define helper_xscmpgtdp helper_xscmpgtdp_ppc +#define helper_xscmpnedp helper_xscmpnedp_ppc +#define helper_xscmpodp helper_xscmpodp_ppc +#define helper_xscmpoqp helper_xscmpoqp_ppc +#define helper_xscmpudp helper_xscmpudp_ppc +#define helper_xscmpuqp helper_xscmpuqp_ppc +#define helper_xscvdphp helper_xscvdphp_ppc +#define helper_xscvdpqp helper_xscvdpqp_ppc +#define helper_xscvdpsp helper_xscvdpsp_ppc +#define helper_xscvdpspn helper_xscvdpspn_ppc +#define helper_xscvdpsxds helper_xscvdpsxds_ppc +#define helper_xscvdpsxws helper_xscvdpsxws_ppc +#define helper_xscvdpuxds helper_xscvdpuxds_ppc +#define helper_xscvdpuxws helper_xscvdpuxws_ppc +#define helper_xscvhpdp helper_xscvhpdp_ppc +#define helper_xscvqpdp helper_xscvqpdp_ppc +#define helper_xscvqpsdz helper_xscvqpsdz_ppc +#define helper_xscvqpswz helper_xscvqpswz_ppc +#define helper_xscvqpudz helper_xscvqpudz_ppc +#define helper_xscvqpuwz helper_xscvqpuwz_ppc +#define helper_xscvsdqp helper_xscvsdqp_ppc +#define helper_xscvspdp helper_xscvspdp_ppc +#define helper_xscvspdpn helper_xscvspdpn_ppc +#define helper_xscvsxddp helper_xscvsxddp_ppc +#define helper_xscvsxdsp helper_xscvsxdsp_ppc +#define helper_xscvudqp helper_xscvudqp_ppc +#define helper_xscvuxddp helper_xscvuxddp_ppc +#define helper_xscvuxdsp helper_xscvuxdsp_ppc +#define helper_xsdivdp helper_xsdivdp_ppc +#define helper_xsdivqp helper_xsdivqp_ppc +#define helper_xsdivsp helper_xsdivsp_ppc +#define helper_xsmadddp helper_xsmadddp_ppc +#define helper_xsmaddsp helper_xsmaddsp_ppc +#define helper_xsmaxcdp helper_xsmaxcdp_ppc +#define helper_xsmaxdp helper_xsmaxdp_ppc +#define helper_xsmaxjdp helper_xsmaxjdp_ppc +#define helper_xsmincdp helper_xsmincdp_ppc +#define helper_xsmindp helper_xsmindp_ppc +#define helper_xsminjdp helper_xsminjdp_ppc +#define helper_xsmsubdp helper_xsmsubdp_ppc +#define helper_xsmsubsp helper_xsmsubsp_ppc +#define helper_xsmuldp helper_xsmuldp_ppc +#define helper_xsmulqp helper_xsmulqp_ppc +#define helper_xsmulsp helper_xsmulsp_ppc +#define helper_xsnmadddp helper_xsnmadddp_ppc +#define helper_xsnmaddsp helper_xsnmaddsp_ppc +#define helper_xsnmsubdp helper_xsnmsubdp_ppc +#define helper_xsnmsubsp helper_xsnmsubsp_ppc +#define helper_xsrdpi helper_xsrdpi_ppc +#define helper_xsrdpic helper_xsrdpic_ppc +#define helper_xsrdpim helper_xsrdpim_ppc +#define helper_xsrdpip helper_xsrdpip_ppc +#define helper_xsrdpiz helper_xsrdpiz_ppc +#define helper_xsredp helper_xsredp_ppc +#define helper_xsresp helper_xsresp_ppc +#define helper_xsrqpi helper_xsrqpi_ppc +#define helper_xsrqpxp helper_xsrqpxp_ppc +#define helper_xsrsp helper_xsrsp_ppc +#define helper_xsrsqrtedp helper_xsrsqrtedp_ppc +#define helper_xsrsqrtesp helper_xsrsqrtesp_ppc +#define helper_xssqrtdp helper_xssqrtdp_ppc +#define helper_xssqrtqp helper_xssqrtqp_ppc +#define helper_xssqrtsp helper_xssqrtsp_ppc +#define helper_xssubdp helper_xssubdp_ppc +#define helper_xssubqp helper_xssubqp_ppc +#define helper_xssubsp helper_xssubsp_ppc +#define helper_xstdivdp helper_xstdivdp_ppc +#define helper_xstsqrtdp helper_xstsqrtdp_ppc +#define helper_xststdcdp helper_xststdcdp_ppc +#define helper_xststdcqp helper_xststdcqp_ppc +#define helper_xststdcsp helper_xststdcsp_ppc +#define helper_xvadddp helper_xvadddp_ppc +#define helper_xvaddsp helper_xvaddsp_ppc +#define helper_xvcmpeqdp helper_xvcmpeqdp_ppc +#define helper_xvcmpeqsp helper_xvcmpeqsp_ppc +#define helper_xvcmpgedp helper_xvcmpgedp_ppc +#define helper_xvcmpgesp helper_xvcmpgesp_ppc +#define helper_xvcmpgtdp helper_xvcmpgtdp_ppc +#define helper_xvcmpgtsp helper_xvcmpgtsp_ppc +#define helper_xvcmpnedp helper_xvcmpnedp_ppc +#define helper_xvcmpnesp helper_xvcmpnesp_ppc +#define helper_xvcvdpsp helper_xvcvdpsp_ppc +#define helper_xvcvdpsxds helper_xvcvdpsxds_ppc +#define helper_xvcvdpsxws helper_xvcvdpsxws_ppc +#define helper_xvcvdpuxds helper_xvcvdpuxds_ppc +#define helper_xvcvdpuxws helper_xvcvdpuxws_ppc +#define helper_xvcvhpsp helper_xvcvhpsp_ppc +#define helper_xvcvspdp helper_xvcvspdp_ppc +#define helper_xvcvsphp helper_xvcvsphp_ppc +#define helper_xvcvspsxds helper_xvcvspsxds_ppc +#define helper_xvcvspsxws helper_xvcvspsxws_ppc +#define helper_xvcvspuxds helper_xvcvspuxds_ppc +#define helper_xvcvspuxws helper_xvcvspuxws_ppc +#define helper_xvcvsxddp helper_xvcvsxddp_ppc +#define helper_xvcvsxdsp helper_xvcvsxdsp_ppc +#define helper_xvcvsxwdp helper_xvcvsxwdp_ppc +#define helper_xvcvsxwsp helper_xvcvsxwsp_ppc +#define helper_xvcvuxddp helper_xvcvuxddp_ppc +#define helper_xvcvuxdsp helper_xvcvuxdsp_ppc +#define helper_xvcvuxwdp helper_xvcvuxwdp_ppc +#define helper_xvcvuxwsp helper_xvcvuxwsp_ppc +#define helper_xvdivdp helper_xvdivdp_ppc +#define helper_xvdivsp helper_xvdivsp_ppc +#define helper_xvmadddp helper_xvmadddp_ppc +#define helper_xvmaddsp helper_xvmaddsp_ppc +#define helper_xvmaxdp helper_xvmaxdp_ppc +#define helper_xvmaxsp helper_xvmaxsp_ppc +#define helper_xvmindp helper_xvmindp_ppc +#define helper_xvminsp helper_xvminsp_ppc +#define helper_xvmsubdp helper_xvmsubdp_ppc +#define helper_xvmsubsp helper_xvmsubsp_ppc +#define helper_xvmuldp helper_xvmuldp_ppc +#define helper_xvmulsp helper_xvmulsp_ppc +#define helper_xvnmadddp helper_xvnmadddp_ppc +#define helper_xvnmaddsp helper_xvnmaddsp_ppc +#define helper_xvnmsubdp helper_xvnmsubdp_ppc +#define helper_xvnmsubsp helper_xvnmsubsp_ppc +#define helper_xvrdpi helper_xvrdpi_ppc +#define helper_xvrdpic helper_xvrdpic_ppc +#define helper_xvrdpim helper_xvrdpim_ppc +#define helper_xvrdpip helper_xvrdpip_ppc +#define helper_xvrdpiz helper_xvrdpiz_ppc +#define helper_xvredp helper_xvredp_ppc +#define helper_xvresp helper_xvresp_ppc +#define helper_xvrspi helper_xvrspi_ppc +#define helper_xvrspic helper_xvrspic_ppc +#define helper_xvrspim helper_xvrspim_ppc +#define helper_xvrspip helper_xvrspip_ppc +#define helper_xvrspiz helper_xvrspiz_ppc +#define helper_xvrsqrtedp helper_xvrsqrtedp_ppc +#define helper_xvrsqrtesp helper_xvrsqrtesp_ppc +#define helper_xvsqrtdp helper_xvsqrtdp_ppc +#define helper_xvsqrtsp helper_xvsqrtsp_ppc +#define helper_xvsubdp helper_xvsubdp_ppc +#define helper_xvsubsp helper_xvsubsp_ppc +#define helper_xvtdivdp helper_xvtdivdp_ppc +#define helper_xvtdivsp helper_xvtdivsp_ppc +#define helper_xvtsqrtdp helper_xvtsqrtdp_ppc +#define helper_xvtsqrtsp helper_xvtsqrtsp_ppc +#define helper_xvtstdcdp helper_xvtstdcdp_ppc +#define helper_xvtstdcsp helper_xvtstdcsp_ppc +#define helper_xvxsigsp helper_xvxsigsp_ppc +#define helper_xxperm helper_xxperm_ppc +#define helper_xxpermr helper_xxpermr_ppc #endif diff --git a/qemu/ppc64.h b/qemu/ppc64.h index 9e4d79ed11..4944950a82 100644 --- a/qemu/ppc64.h +++ b/qemu/ppc64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_ppc64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_ppc64 #define tcg_gen_st_i64 tcg_gen_st_i64_ppc64 +#define tcg_gen_add_i64 tcg_gen_add_i64_ppc64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_ppc64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_ppc64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_ppc64 #define cpu_icount_to_ns cpu_icount_to_ns_ppc64 #define cpu_is_stopped cpu_is_stopped_ppc64 #define cpu_get_ticks cpu_get_ticks_ppc64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_ppc64 #define floatx80_mul floatx80_mul_ppc64 #define floatx80_div floatx80_div_ppc64 +#define floatx80_modrem floatx80_modrem_ppc64 +#define floatx80_mod floatx80_mod_ppc64 #define floatx80_rem floatx80_rem_ppc64 #define floatx80_sqrt floatx80_sqrt_ppc64 #define floatx80_eq floatx80_eq_ppc64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_ppc64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_ppc64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_ppc64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_ppc64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_ppc64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_ppc64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_ppc64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_ppc64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_ppc64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_ppc64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_ppc64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_ppc64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_ppc64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_ppc64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_ppc64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_ppc64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_ppc64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_ppc64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_ppc64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_ppc64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_ppc64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_ppc64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_ppc64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_ppc64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_ppc64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_ppc64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_ppc64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_ppc64 #define tcg_gen_shri_vec tcg_gen_shri_vec_ppc64 #define tcg_gen_sari_vec tcg_gen_sari_vec_ppc64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_ppc64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_ppc64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_ppc64 #define tcg_gen_add_vec tcg_gen_add_vec_ppc64 #define tcg_gen_sub_vec tcg_gen_sub_vec_ppc64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_ppc64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_ppc64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_ppc64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_ppc64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_ppc64 #define tcg_gen_shls_vec tcg_gen_shls_vec_ppc64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_ppc64 #define tcg_gen_sars_vec tcg_gen_sars_vec_ppc64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_ppc64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_ppc64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_ppc64 #define tb_htable_lookup tb_htable_lookup_ppc64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_ppc64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_ppc64 #define tlb_init tlb_init_ppc64 +#define tlb_destroy tlb_destroy_ppc64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_ppc64 #define tlb_flush tlb_flush_ppc64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_ppc64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_ppc64 #define get_page_addr_code_hostp get_page_addr_code_hostp_ppc64 #define get_page_addr_code get_page_addr_code_ppc64 +#define probe_access_flags probe_access_flags_ppc64 #define probe_access probe_access_ppc64 #define tlb_vaddr_to_host tlb_vaddr_to_host_ppc64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_ppc64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_ppc64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_ppc64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_ppc64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_ppc64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_ppc64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_ppc64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_ppc64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_ppc64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_ppc64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_ppc64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_ppc64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_ppc64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_ppc64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_ppc64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_ppc64 #define cpu_ldub_data_ra cpu_ldub_data_ra_ppc64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_ppc64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_ppc64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_ppc64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_ppc64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_ppc64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_ppc64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_ppc64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_ppc64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_ppc64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_ppc64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_ppc64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_ppc64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_ppc64 #define cpu_ldub_data cpu_ldub_data_ppc64 #define cpu_ldsb_data cpu_ldsb_data_ppc64 -#define cpu_lduw_data cpu_lduw_data_ppc64 -#define cpu_ldsw_data cpu_ldsw_data_ppc64 -#define cpu_ldl_data cpu_ldl_data_ppc64 -#define cpu_ldq_data cpu_ldq_data_ppc64 +#define cpu_lduw_be_data cpu_lduw_be_data_ppc64 +#define cpu_lduw_le_data cpu_lduw_le_data_ppc64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_ppc64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_ppc64 +#define cpu_ldl_be_data cpu_ldl_be_data_ppc64 +#define cpu_ldl_le_data cpu_ldl_le_data_ppc64 +#define cpu_ldq_le_data cpu_ldq_le_data_ppc64 +#define cpu_ldq_be_data cpu_ldq_be_data_ppc64 #define helper_ret_stb_mmu helper_ret_stb_mmu_ppc64 #define helper_le_stw_mmu helper_le_stw_mmu_ppc64 #define helper_be_stw_mmu helper_be_stw_mmu_ppc64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_ppc64 #define helper_be_stq_mmu helper_be_stq_mmu_ppc64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_ppc64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_ppc64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_ppc64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_ppc64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_ppc64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_ppc64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_ppc64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_ppc64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_ppc64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_ppc64 #define cpu_stb_data_ra cpu_stb_data_ra_ppc64 -#define cpu_stw_data_ra cpu_stw_data_ra_ppc64 -#define cpu_stl_data_ra cpu_stl_data_ra_ppc64 -#define cpu_stq_data_ra cpu_stq_data_ra_ppc64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_ppc64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_ppc64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_ppc64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_ppc64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_ppc64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_ppc64 #define cpu_stb_data cpu_stb_data_ppc64 -#define cpu_stw_data cpu_stw_data_ppc64 -#define cpu_stl_data cpu_stl_data_ppc64 -#define cpu_stq_data cpu_stq_data_ppc64 +#define cpu_stw_be_data cpu_stw_be_data_ppc64 +#define cpu_stw_le_data cpu_stw_le_data_ppc64 +#define cpu_stl_be_data cpu_stl_be_data_ppc64 +#define cpu_stl_le_data cpu_stl_le_data_ppc64 +#define cpu_stq_be_data cpu_stq_be_data_ppc64 +#define cpu_stq_le_data cpu_stq_le_data_ppc64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_ppc64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_ppc64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_ppc64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_ppc64 #define cpu_ldl_code cpu_ldl_code_ppc64 #define cpu_ldq_code cpu_ldq_code_ppc64 +#define cpu_interrupt_handler cpu_interrupt_handler_ppc64 #define helper_div_i32 helper_div_i32_ppc64 #define helper_rem_i32 helper_rem_i32_ppc64 #define helper_divu_i32 helper_divu_i32_ppc64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_ppc64 #define helper_gvec_sar32i helper_gvec_sar32i_ppc64 #define helper_gvec_sar64i helper_gvec_sar64i_ppc64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_ppc64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_ppc64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_ppc64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_ppc64 #define helper_gvec_shl8v helper_gvec_shl8v_ppc64 #define helper_gvec_shl16v helper_gvec_shl16v_ppc64 #define helper_gvec_shl32v helper_gvec_shl32v_ppc64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_ppc64 #define helper_gvec_sar32v helper_gvec_sar32v_ppc64 #define helper_gvec_sar64v helper_gvec_sar64v_ppc64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_ppc64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_ppc64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_ppc64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_ppc64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_ppc64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_ppc64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_ppc64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_ppc64 #define helper_gvec_eq8 helper_gvec_eq8_ppc64 #define helper_gvec_ne8 helper_gvec_ne8_ppc64 #define helper_gvec_lt8 helper_gvec_lt8_ppc64 @@ -1575,6 +1629,33 @@ #define helper_tbegin helper_tbegin_ppc64 #define helper_load_dump_spr helper_load_dump_spr_ppc64 #define helper_store_dump_spr helper_store_dump_spr_ppc64 +#define store_fpscr store_fpscr_ppc64 +#define helper_store_fpscr helper_store_fpscr_ppc64 +#define helper_float_check_status helper_float_check_status_ppc64 +#define helper_reset_fpstatus helper_reset_fpstatus_ppc64 +#define helper_fadd helper_fadd_ppc64 +#define helper_fsub helper_fsub_ppc64 +#define helper_fmul helper_fmul_ppc64 +#define helper_fdiv helper_fdiv_ppc64 +#define helper_fctiw helper_fctiw_ppc64 +#define helper_fctiwz helper_fctiwz_ppc64 +#define helper_fctiwuz helper_fctiwuz_ppc64 +#define helper_fctid helper_fctid_ppc64 +#define helper_fctidz helper_fctidz_ppc64 +#define helper_fctidu helper_fctidu_ppc64 +#define helper_fctiduz helper_fctiduz_ppc64 +#define helper_fcfid helper_fcfid_ppc64 +#define helper_fcfids helper_fcfids_ppc64 +#define helper_fcfidu helper_fcfidu_ppc64 +#define helper_fcfidus helper_fcfidus_ppc64 +#define helper_frin helper_frin_ppc64 +#define helper_friz helper_friz_ppc64 +#define helper_frip helper_frip_ppc64 +#define helper_frim helper_frim_ppc64 +#define helper_fmadd helper_fmadd_ppc64 +#define helper_fnmadd helper_fnmadd_ppc64 +#define helper_fmsub helper_fmsub_ppc64 +#define helper_fnmsub helper_fnmsub_ppc64 #define helper_hfscr_facility_check helper_hfscr_facility_check_ppc64 #define helper_fscr_facility_check helper_fscr_facility_check_ppc64 #define helper_msr_facility_check helper_msr_facility_check_ppc64 @@ -1726,6 +1807,243 @@ #define ppc_hash32_handle_mmu_fault ppc_hash32_handle_mmu_fault_ppc64 #define gen_helper_store_booke_tsr gen_helper_store_booke_tsr_ppc64 #define gen_helper_store_booke_tcr gen_helper_store_booke_tcr_ppc64 +#define gen_helper_store_fpscr gen_helper_store_fpscr_ppc64 #define store_booke_tcr store_booke_tcr_ppc64 #define ppc_hash32_get_phys_page_debug ppc_hash32_get_phys_page_debug_ppc64 +#define helper_compute_fprf_float128 helper_compute_fprf_float128_ppc64 +#define helper_compute_fprf_float16 helper_compute_fprf_float16_ppc64 +#define helper_compute_fprf_float32 helper_compute_fprf_float32_ppc64 +#define helper_compute_fprf_float64 helper_compute_fprf_float64_ppc64 +#define helper_efdadd helper_efdadd_ppc64 +#define helper_efdcfs helper_efdcfs_ppc64 +#define helper_efdcfsf helper_efdcfsf_ppc64 +#define helper_efdcfsi helper_efdcfsi_ppc64 +#define helper_efdcfsid helper_efdcfsid_ppc64 +#define helper_efdcfuf helper_efdcfuf_ppc64 +#define helper_efdcfui helper_efdcfui_ppc64 +#define helper_efdcfuid helper_efdcfuid_ppc64 +#define helper_efdcmpeq helper_efdcmpeq_ppc64 +#define helper_efdcmpgt helper_efdcmpgt_ppc64 +#define helper_efdcmplt helper_efdcmplt_ppc64 +#define helper_efdctsf helper_efdctsf_ppc64 +#define helper_efdctsi helper_efdctsi_ppc64 +#define helper_efdctsidz helper_efdctsidz_ppc64 +#define helper_efdctsiz helper_efdctsiz_ppc64 +#define helper_efdctuf helper_efdctuf_ppc64 +#define helper_efdctui helper_efdctui_ppc64 +#define helper_efdctuidz helper_efdctuidz_ppc64 +#define helper_efdctuiz helper_efdctuiz_ppc64 +#define helper_efddiv helper_efddiv_ppc64 +#define helper_efdmul helper_efdmul_ppc64 +#define helper_efdsub helper_efdsub_ppc64 +#define helper_efdtsteq helper_efdtsteq_ppc64 +#define helper_efdtstgt helper_efdtstgt_ppc64 +#define helper_efdtstlt helper_efdtstlt_ppc64 +#define helper_efsadd helper_efsadd_ppc64 +#define helper_efscfd helper_efscfd_ppc64 +#define helper_efscfsf helper_efscfsf_ppc64 +#define helper_efscfsi helper_efscfsi_ppc64 +#define helper_efscfuf helper_efscfuf_ppc64 +#define helper_efscfui helper_efscfui_ppc64 +#define helper_efscmpeq helper_efscmpeq_ppc64 +#define helper_efscmpgt helper_efscmpgt_ppc64 +#define helper_efscmplt helper_efscmplt_ppc64 +#define helper_efsctsf helper_efsctsf_ppc64 +#define helper_efsctsi helper_efsctsi_ppc64 +#define helper_efsctsiz helper_efsctsiz_ppc64 +#define helper_efsctuf helper_efsctuf_ppc64 +#define helper_efsctui helper_efsctui_ppc64 +#define helper_efsctuiz helper_efsctuiz_ppc64 +#define helper_efsdiv helper_efsdiv_ppc64 +#define helper_efsmul helper_efsmul_ppc64 +#define helper_efssub helper_efssub_ppc64 +#define helper_efststeq helper_efststeq_ppc64 +#define helper_efststgt helper_efststgt_ppc64 +#define helper_efststlt helper_efststlt_ppc64 +#define helper_evfsadd helper_evfsadd_ppc64 +#define helper_evfscfsf helper_evfscfsf_ppc64 +#define helper_evfscfsi helper_evfscfsi_ppc64 +#define helper_evfscfuf helper_evfscfuf_ppc64 +#define helper_evfscfui helper_evfscfui_ppc64 +#define helper_evfscmpeq helper_evfscmpeq_ppc64 +#define helper_evfscmpgt helper_evfscmpgt_ppc64 +#define helper_evfscmplt helper_evfscmplt_ppc64 +#define helper_evfsctsf helper_evfsctsf_ppc64 +#define helper_evfsctsi helper_evfsctsi_ppc64 +#define helper_evfsctsiz helper_evfsctsiz_ppc64 +#define helper_evfsctuf helper_evfsctuf_ppc64 +#define helper_evfsctui helper_evfsctui_ppc64 +#define helper_evfsctuiz helper_evfsctuiz_ppc64 +#define helper_evfsdiv helper_evfsdiv_ppc64 +#define helper_evfsmul helper_evfsmul_ppc64 +#define helper_evfssub helper_evfssub_ppc64 +#define helper_evfststeq helper_evfststeq_ppc64 +#define helper_evfststgt helper_evfststgt_ppc64 +#define helper_evfststlt helper_evfststlt_ppc64 +#define helper_fcmpo helper_fcmpo_ppc64 +#define helper_fcmpu helper_fcmpu_ppc64 +#define helper_fctiwu helper_fctiwu_ppc64 +#define helper_fpscr_clrbit helper_fpscr_clrbit_ppc64 +#define helper_fpscr_setbit helper_fpscr_setbit_ppc64 +#define helper_fre helper_fre_ppc64 +#define helper_fres helper_fres_ppc64 +#define helper_frsp helper_frsp_ppc64 +#define helper_frsqrte helper_frsqrte_ppc64 +#define helper_fsel helper_fsel_ppc64 +#define helper_fsqrt helper_fsqrt_ppc64 +#define helper_ftdiv helper_ftdiv_ppc64 +#define helper_ftsqrt helper_ftsqrt_ppc64 +#define helper_todouble helper_todouble_ppc64 +#define helper_tosingle helper_tosingle_ppc64 +#define helper_xsadddp helper_xsadddp_ppc64 +#define helper_xsaddqp helper_xsaddqp_ppc64 +#define helper_xsaddsp helper_xsaddsp_ppc64 +#define helper_xscmpeqdp helper_xscmpeqdp_ppc64 +#define helper_xscmpexpdp helper_xscmpexpdp_ppc64 +#define helper_xscmpexpqp helper_xscmpexpqp_ppc64 +#define helper_xscmpgedp helper_xscmpgedp_ppc64 +#define helper_xscmpgtdp helper_xscmpgtdp_ppc64 +#define helper_xscmpnedp helper_xscmpnedp_ppc64 +#define helper_xscmpodp helper_xscmpodp_ppc64 +#define helper_xscmpoqp helper_xscmpoqp_ppc64 +#define helper_xscmpudp helper_xscmpudp_ppc64 +#define helper_xscmpuqp helper_xscmpuqp_ppc64 +#define helper_xscvdphp helper_xscvdphp_ppc64 +#define helper_xscvdpqp helper_xscvdpqp_ppc64 +#define helper_xscvdpsp helper_xscvdpsp_ppc64 +#define helper_xscvdpspn helper_xscvdpspn_ppc64 +#define helper_xscvdpsxds helper_xscvdpsxds_ppc64 +#define helper_xscvdpsxws helper_xscvdpsxws_ppc64 +#define helper_xscvdpuxds helper_xscvdpuxds_ppc64 +#define helper_xscvdpuxws helper_xscvdpuxws_ppc64 +#define helper_xscvhpdp helper_xscvhpdp_ppc64 +#define helper_xscvqpdp helper_xscvqpdp_ppc64 +#define helper_xscvqpsdz helper_xscvqpsdz_ppc64 +#define helper_xscvqpswz helper_xscvqpswz_ppc64 +#define helper_xscvqpudz helper_xscvqpudz_ppc64 +#define helper_xscvqpuwz helper_xscvqpuwz_ppc64 +#define helper_xscvsdqp helper_xscvsdqp_ppc64 +#define helper_xscvspdp helper_xscvspdp_ppc64 +#define helper_xscvspdpn helper_xscvspdpn_ppc64 +#define helper_xscvsxddp helper_xscvsxddp_ppc64 +#define helper_xscvsxdsp helper_xscvsxdsp_ppc64 +#define helper_xscvudqp helper_xscvudqp_ppc64 +#define helper_xscvuxddp helper_xscvuxddp_ppc64 +#define helper_xscvuxdsp helper_xscvuxdsp_ppc64 +#define helper_xsdivdp helper_xsdivdp_ppc64 +#define helper_xsdivqp helper_xsdivqp_ppc64 +#define helper_xsdivsp helper_xsdivsp_ppc64 +#define helper_xsmadddp helper_xsmadddp_ppc64 +#define helper_xsmaddsp helper_xsmaddsp_ppc64 +#define helper_xsmaxcdp helper_xsmaxcdp_ppc64 +#define helper_xsmaxdp helper_xsmaxdp_ppc64 +#define helper_xsmaxjdp helper_xsmaxjdp_ppc64 +#define helper_xsmincdp helper_xsmincdp_ppc64 +#define helper_xsmindp helper_xsmindp_ppc64 +#define helper_xsminjdp helper_xsminjdp_ppc64 +#define helper_xsmsubdp helper_xsmsubdp_ppc64 +#define helper_xsmsubsp helper_xsmsubsp_ppc64 +#define helper_xsmuldp helper_xsmuldp_ppc64 +#define helper_xsmulqp helper_xsmulqp_ppc64 +#define helper_xsmulsp helper_xsmulsp_ppc64 +#define helper_xsnmadddp helper_xsnmadddp_ppc64 +#define helper_xsnmaddsp helper_xsnmaddsp_ppc64 +#define helper_xsnmsubdp helper_xsnmsubdp_ppc64 +#define helper_xsnmsubsp helper_xsnmsubsp_ppc64 +#define helper_xsrdpi helper_xsrdpi_ppc64 +#define helper_xsrdpic helper_xsrdpic_ppc64 +#define helper_xsrdpim helper_xsrdpim_ppc64 +#define helper_xsrdpip helper_xsrdpip_ppc64 +#define helper_xsrdpiz helper_xsrdpiz_ppc64 +#define helper_xsredp helper_xsredp_ppc64 +#define helper_xsresp helper_xsresp_ppc64 +#define helper_xsrqpi helper_xsrqpi_ppc64 +#define helper_xsrqpxp helper_xsrqpxp_ppc64 +#define helper_xsrsp helper_xsrsp_ppc64 +#define helper_xsrsqrtedp helper_xsrsqrtedp_ppc64 +#define helper_xsrsqrtesp helper_xsrsqrtesp_ppc64 +#define helper_xssqrtdp helper_xssqrtdp_ppc64 +#define helper_xssqrtqp helper_xssqrtqp_ppc64 +#define helper_xssqrtsp helper_xssqrtsp_ppc64 +#define helper_xssubdp helper_xssubdp_ppc64 +#define helper_xssubqp helper_xssubqp_ppc64 +#define helper_xssubsp helper_xssubsp_ppc64 +#define helper_xstdivdp helper_xstdivdp_ppc64 +#define helper_xstsqrtdp helper_xstsqrtdp_ppc64 +#define helper_xststdcdp helper_xststdcdp_ppc64 +#define helper_xststdcqp helper_xststdcqp_ppc64 +#define helper_xststdcsp helper_xststdcsp_ppc64 +#define helper_xvadddp helper_xvadddp_ppc64 +#define helper_xvaddsp helper_xvaddsp_ppc64 +#define helper_xvcmpeqdp helper_xvcmpeqdp_ppc64 +#define helper_xvcmpeqsp helper_xvcmpeqsp_ppc64 +#define helper_xvcmpgedp helper_xvcmpgedp_ppc64 +#define helper_xvcmpgesp helper_xvcmpgesp_ppc64 +#define helper_xvcmpgtdp helper_xvcmpgtdp_ppc64 +#define helper_xvcmpgtsp helper_xvcmpgtsp_ppc64 +#define helper_xvcmpnedp helper_xvcmpnedp_ppc64 +#define helper_xvcmpnesp helper_xvcmpnesp_ppc64 +#define helper_xvcvdpsp helper_xvcvdpsp_ppc64 +#define helper_xvcvdpsxds helper_xvcvdpsxds_ppc64 +#define helper_xvcvdpsxws helper_xvcvdpsxws_ppc64 +#define helper_xvcvdpuxds helper_xvcvdpuxds_ppc64 +#define helper_xvcvdpuxws helper_xvcvdpuxws_ppc64 +#define helper_xvcvhpsp helper_xvcvhpsp_ppc64 +#define helper_xvcvspdp helper_xvcvspdp_ppc64 +#define helper_xvcvsphp helper_xvcvsphp_ppc64 +#define helper_xvcvspsxds helper_xvcvspsxds_ppc64 +#define helper_xvcvspsxws helper_xvcvspsxws_ppc64 +#define helper_xvcvspuxds helper_xvcvspuxds_ppc64 +#define helper_xvcvspuxws helper_xvcvspuxws_ppc64 +#define helper_xvcvsxddp helper_xvcvsxddp_ppc64 +#define helper_xvcvsxdsp helper_xvcvsxdsp_ppc64 +#define helper_xvcvsxwdp helper_xvcvsxwdp_ppc64 +#define helper_xvcvsxwsp helper_xvcvsxwsp_ppc64 +#define helper_xvcvuxddp helper_xvcvuxddp_ppc64 +#define helper_xvcvuxdsp helper_xvcvuxdsp_ppc64 +#define helper_xvcvuxwdp helper_xvcvuxwdp_ppc64 +#define helper_xvcvuxwsp helper_xvcvuxwsp_ppc64 +#define helper_xvdivdp helper_xvdivdp_ppc64 +#define helper_xvdivsp helper_xvdivsp_ppc64 +#define helper_xvmadddp helper_xvmadddp_ppc64 +#define helper_xvmaddsp helper_xvmaddsp_ppc64 +#define helper_xvmaxdp helper_xvmaxdp_ppc64 +#define helper_xvmaxsp helper_xvmaxsp_ppc64 +#define helper_xvmindp helper_xvmindp_ppc64 +#define helper_xvminsp helper_xvminsp_ppc64 +#define helper_xvmsubdp helper_xvmsubdp_ppc64 +#define helper_xvmsubsp helper_xvmsubsp_ppc64 +#define helper_xvmuldp helper_xvmuldp_ppc64 +#define helper_xvmulsp helper_xvmulsp_ppc64 +#define helper_xvnmadddp helper_xvnmadddp_ppc64 +#define helper_xvnmaddsp helper_xvnmaddsp_ppc64 +#define helper_xvnmsubdp helper_xvnmsubdp_ppc64 +#define helper_xvnmsubsp helper_xvnmsubsp_ppc64 +#define helper_xvrdpi helper_xvrdpi_ppc64 +#define helper_xvrdpic helper_xvrdpic_ppc64 +#define helper_xvrdpim helper_xvrdpim_ppc64 +#define helper_xvrdpip helper_xvrdpip_ppc64 +#define helper_xvrdpiz helper_xvrdpiz_ppc64 +#define helper_xvredp helper_xvredp_ppc64 +#define helper_xvresp helper_xvresp_ppc64 +#define helper_xvrspi helper_xvrspi_ppc64 +#define helper_xvrspic helper_xvrspic_ppc64 +#define helper_xvrspim helper_xvrspim_ppc64 +#define helper_xvrspip helper_xvrspip_ppc64 +#define helper_xvrspiz helper_xvrspiz_ppc64 +#define helper_xvrsqrtedp helper_xvrsqrtedp_ppc64 +#define helper_xvrsqrtesp helper_xvrsqrtesp_ppc64 +#define helper_xvsqrtdp helper_xvsqrtdp_ppc64 +#define helper_xvsqrtsp helper_xvsqrtsp_ppc64 +#define helper_xvsubdp helper_xvsubdp_ppc64 +#define helper_xvsubsp helper_xvsubsp_ppc64 +#define helper_xvtdivdp helper_xvtdivdp_ppc64 +#define helper_xvtdivsp helper_xvtdivsp_ppc64 +#define helper_xvtsqrtdp helper_xvtsqrtdp_ppc64 +#define helper_xvtsqrtsp helper_xvtsqrtsp_ppc64 +#define helper_xvtstdcdp helper_xvtstdcdp_ppc64 +#define helper_xvtstdcsp helper_xvtstdcsp_ppc64 +#define helper_xvxsigsp helper_xvxsigsp_ppc64 +#define helper_xxperm helper_xxperm_ppc64 +#define helper_xxpermr helper_xxpermr_ppc64 #endif diff --git a/qemu/riscv32.h b/qemu/riscv32.h index 90889da546..edc897463b 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_riscv32 #define tcg_gen_shr_i64 tcg_gen_shr_i64_riscv32 #define tcg_gen_st_i64 tcg_gen_st_i64_riscv32 +#define tcg_gen_add_i64 tcg_gen_add_i64_riscv32 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_riscv32 #define tcg_gen_xor_i64 tcg_gen_xor_i64_riscv32 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_riscv32 #define cpu_icount_to_ns cpu_icount_to_ns_riscv32 #define cpu_is_stopped cpu_is_stopped_riscv32 #define cpu_get_ticks cpu_get_ticks_riscv32 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_riscv32 #define floatx80_mul floatx80_mul_riscv32 #define floatx80_div floatx80_div_riscv32 +#define floatx80_modrem floatx80_modrem_riscv32 +#define floatx80_mod floatx80_mod_riscv32 #define floatx80_rem floatx80_rem_riscv32 #define floatx80_sqrt floatx80_sqrt_riscv32 #define floatx80_eq floatx80_eq_riscv32 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_riscv32 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_riscv32 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_riscv32 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_riscv32 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_riscv32 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_riscv32 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_riscv32 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_riscv32 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_riscv32 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_riscv32 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_riscv32 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_riscv32 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_riscv32 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_riscv32 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_riscv32 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_riscv32 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_riscv32 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_riscv32 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_riscv32 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_riscv32 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_riscv32 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_riscv32 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_riscv32 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_riscv32 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_riscv32 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_riscv32 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_riscv32 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_riscv32 #define tcg_gen_shri_vec tcg_gen_shri_vec_riscv32 #define tcg_gen_sari_vec tcg_gen_sari_vec_riscv32 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_riscv32 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_riscv32 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_riscv32 #define tcg_gen_add_vec tcg_gen_add_vec_riscv32 #define tcg_gen_sub_vec tcg_gen_sub_vec_riscv32 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_riscv32 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_riscv32 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_riscv32 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_riscv32 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_riscv32 #define tcg_gen_shls_vec tcg_gen_shls_vec_riscv32 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_riscv32 #define tcg_gen_sars_vec tcg_gen_sars_vec_riscv32 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_riscv32 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_riscv32 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_riscv32 #define tb_htable_lookup tb_htable_lookup_riscv32 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_riscv32 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_riscv32 #define tlb_init tlb_init_riscv32 +#define tlb_destroy tlb_destroy_riscv32 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_riscv32 #define tlb_flush tlb_flush_riscv32 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_riscv32 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_riscv32 #define get_page_addr_code_hostp get_page_addr_code_hostp_riscv32 #define get_page_addr_code get_page_addr_code_riscv32 +#define probe_access_flags probe_access_flags_riscv32 #define probe_access probe_access_riscv32 #define tlb_vaddr_to_host tlb_vaddr_to_host_riscv32 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_riscv32 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_riscv32 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_riscv32 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_riscv32 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_riscv32 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_riscv32 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_riscv32 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_riscv32 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_riscv32 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_riscv32 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_riscv32 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_riscv32 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_riscv32 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_riscv32 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_riscv32 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_riscv32 #define cpu_ldub_data_ra cpu_ldub_data_ra_riscv32 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_riscv32 -#define cpu_lduw_data_ra cpu_lduw_data_ra_riscv32 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_riscv32 -#define cpu_ldl_data_ra cpu_ldl_data_ra_riscv32 -#define cpu_ldq_data_ra cpu_ldq_data_ra_riscv32 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_riscv32 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_riscv32 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_riscv32 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_riscv32 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_riscv32 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_riscv32 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_riscv32 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_riscv32 #define cpu_ldub_data cpu_ldub_data_riscv32 #define cpu_ldsb_data cpu_ldsb_data_riscv32 -#define cpu_lduw_data cpu_lduw_data_riscv32 -#define cpu_ldsw_data cpu_ldsw_data_riscv32 -#define cpu_ldl_data cpu_ldl_data_riscv32 -#define cpu_ldq_data cpu_ldq_data_riscv32 +#define cpu_lduw_be_data cpu_lduw_be_data_riscv32 +#define cpu_lduw_le_data cpu_lduw_le_data_riscv32 +#define cpu_ldsw_be_data cpu_ldsw_be_data_riscv32 +#define cpu_ldsw_le_data cpu_ldsw_le_data_riscv32 +#define cpu_ldl_be_data cpu_ldl_be_data_riscv32 +#define cpu_ldl_le_data cpu_ldl_le_data_riscv32 +#define cpu_ldq_le_data cpu_ldq_le_data_riscv32 +#define cpu_ldq_be_data cpu_ldq_be_data_riscv32 #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv32 #define helper_le_stw_mmu helper_le_stw_mmu_riscv32 #define helper_be_stw_mmu helper_be_stw_mmu_riscv32 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_riscv32 #define helper_be_stq_mmu helper_be_stq_mmu_riscv32 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_riscv32 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_riscv32 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_riscv32 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_riscv32 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_riscv32 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_riscv32 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_riscv32 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_riscv32 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_riscv32 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_riscv32 #define cpu_stb_data_ra cpu_stb_data_ra_riscv32 -#define cpu_stw_data_ra cpu_stw_data_ra_riscv32 -#define cpu_stl_data_ra cpu_stl_data_ra_riscv32 -#define cpu_stq_data_ra cpu_stq_data_ra_riscv32 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_riscv32 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_riscv32 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_riscv32 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_riscv32 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_riscv32 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_riscv32 #define cpu_stb_data cpu_stb_data_riscv32 -#define cpu_stw_data cpu_stw_data_riscv32 -#define cpu_stl_data cpu_stl_data_riscv32 -#define cpu_stq_data cpu_stq_data_riscv32 +#define cpu_stw_be_data cpu_stw_be_data_riscv32 +#define cpu_stw_le_data cpu_stw_le_data_riscv32 +#define cpu_stl_be_data cpu_stl_be_data_riscv32 +#define cpu_stl_le_data cpu_stl_le_data_riscv32 +#define cpu_stq_be_data cpu_stq_be_data_riscv32 +#define cpu_stq_le_data cpu_stq_le_data_riscv32 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_riscv32 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_riscv32 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_riscv32 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_riscv32 #define cpu_ldl_code cpu_ldl_code_riscv32 #define cpu_ldq_code cpu_ldq_code_riscv32 +#define cpu_interrupt_handler cpu_interrupt_handler_riscv32 #define helper_div_i32 helper_div_i32_riscv32 #define helper_rem_i32 helper_rem_i32_riscv32 #define helper_divu_i32 helper_divu_i32_riscv32 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_riscv32 #define helper_gvec_sar32i helper_gvec_sar32i_riscv32 #define helper_gvec_sar64i helper_gvec_sar64i_riscv32 +#define helper_gvec_rotl8i helper_gvec_rotl8i_riscv32 +#define helper_gvec_rotl16i helper_gvec_rotl16i_riscv32 +#define helper_gvec_rotl32i helper_gvec_rotl32i_riscv32 +#define helper_gvec_rotl64i helper_gvec_rotl64i_riscv32 #define helper_gvec_shl8v helper_gvec_shl8v_riscv32 #define helper_gvec_shl16v helper_gvec_shl16v_riscv32 #define helper_gvec_shl32v helper_gvec_shl32v_riscv32 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_riscv32 #define helper_gvec_sar32v helper_gvec_sar32v_riscv32 #define helper_gvec_sar64v helper_gvec_sar64v_riscv32 +#define helper_gvec_rotl8v helper_gvec_rotl8v_riscv32 +#define helper_gvec_rotl16v helper_gvec_rotl16v_riscv32 +#define helper_gvec_rotl32v helper_gvec_rotl32v_riscv32 +#define helper_gvec_rotl64v helper_gvec_rotl64v_riscv32 +#define helper_gvec_rotr8v helper_gvec_rotr8v_riscv32 +#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv32 +#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv32 +#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv32 #define helper_gvec_eq8 helper_gvec_eq8_riscv32 #define helper_gvec_ne8 helper_gvec_ne8_riscv32 #define helper_gvec_lt8 helper_gvec_lt8_riscv32 @@ -1366,6 +1420,7 @@ #define helper_mret helper_mret_riscv32 #define helper_wfi helper_wfi_riscv32 #define helper_tlb_flush helper_tlb_flush_riscv32 +#define helper_hyp_tlb_flush helper_hyp_tlb_flush_riscv32 #define pmp_hart_has_privs pmp_hart_has_privs_riscv32 #define pmpcfg_csr_write pmpcfg_csr_write_riscv32 #define pmpcfg_csr_read pmpcfg_csr_read_riscv32 @@ -1386,4 +1441,1006 @@ #define gen_helper_tlb_flush gen_helper_tlb_flush_riscv32 #define riscv_fpr_regnames riscv_fpr_regnames_riscv32 #define riscv_int_regnames riscv_int_regnames_riscv32 +#define fclass_d fclass_d_riscv32 +#define fclass_h fclass_h_riscv32 +#define fclass_s fclass_s_riscv32 +#define helper_vaadd_vv_b helper_vaadd_vv_b_riscv32 +#define helper_vaadd_vv_d helper_vaadd_vv_d_riscv32 +#define helper_vaadd_vv_h helper_vaadd_vv_h_riscv32 +#define helper_vaadd_vv_w helper_vaadd_vv_w_riscv32 +#define helper_vaadd_vx_b helper_vaadd_vx_b_riscv32 +#define helper_vaadd_vx_d helper_vaadd_vx_d_riscv32 +#define helper_vaadd_vx_h helper_vaadd_vx_h_riscv32 +#define helper_vaadd_vx_w helper_vaadd_vx_w_riscv32 +#define helper_vadc_vvm_b helper_vadc_vvm_b_riscv32 +#define helper_vadc_vvm_d helper_vadc_vvm_d_riscv32 +#define helper_vadc_vvm_h helper_vadc_vvm_h_riscv32 +#define helper_vadc_vvm_w helper_vadc_vvm_w_riscv32 +#define helper_vadc_vxm_b helper_vadc_vxm_b_riscv32 +#define helper_vadc_vxm_d helper_vadc_vxm_d_riscv32 +#define helper_vadc_vxm_h helper_vadc_vxm_h_riscv32 +#define helper_vadc_vxm_w helper_vadc_vxm_w_riscv32 +#define helper_vadd_vv_b helper_vadd_vv_b_riscv32 +#define helper_vadd_vv_d helper_vadd_vv_d_riscv32 +#define helper_vadd_vv_h helper_vadd_vv_h_riscv32 +#define helper_vadd_vv_w helper_vadd_vv_w_riscv32 +#define helper_vadd_vx_b helper_vadd_vx_b_riscv32 +#define helper_vadd_vx_d helper_vadd_vx_d_riscv32 +#define helper_vadd_vx_h helper_vadd_vx_h_riscv32 +#define helper_vadd_vx_w helper_vadd_vx_w_riscv32 +#define helper_vamoaddw_v_w helper_vamoaddw_v_w_riscv32 +#define helper_vamoandw_v_w helper_vamoandw_v_w_riscv32 +#define helper_vamomaxuw_v_w helper_vamomaxuw_v_w_riscv32 +#define helper_vamomaxw_v_w helper_vamomaxw_v_w_riscv32 +#define helper_vamominuw_v_w helper_vamominuw_v_w_riscv32 +#define helper_vamominw_v_w helper_vamominw_v_w_riscv32 +#define helper_vamoorw_v_w helper_vamoorw_v_w_riscv32 +#define helper_vamoswapw_v_w helper_vamoswapw_v_w_riscv32 +#define helper_vamoxorw_v_w helper_vamoxorw_v_w_riscv32 +#define helper_vand_vv_b helper_vand_vv_b_riscv32 +#define helper_vand_vv_d helper_vand_vv_d_riscv32 +#define helper_vand_vv_h helper_vand_vv_h_riscv32 +#define helper_vand_vv_w helper_vand_vv_w_riscv32 +#define helper_vand_vx_b helper_vand_vx_b_riscv32 +#define helper_vand_vx_d helper_vand_vx_d_riscv32 +#define helper_vand_vx_h helper_vand_vx_h_riscv32 +#define helper_vand_vx_w helper_vand_vx_w_riscv32 +#define helper_vasub_vv_b helper_vasub_vv_b_riscv32 +#define helper_vasub_vv_d helper_vasub_vv_d_riscv32 +#define helper_vasub_vv_h helper_vasub_vv_h_riscv32 +#define helper_vasub_vv_w helper_vasub_vv_w_riscv32 +#define helper_vasub_vx_b helper_vasub_vx_b_riscv32 +#define helper_vasub_vx_d helper_vasub_vx_d_riscv32 +#define helper_vasub_vx_h helper_vasub_vx_h_riscv32 +#define helper_vasub_vx_w helper_vasub_vx_w_riscv32 +#define helper_vcompress_vm_b helper_vcompress_vm_b_riscv32 +#define helper_vcompress_vm_d helper_vcompress_vm_d_riscv32 +#define helper_vcompress_vm_h helper_vcompress_vm_h_riscv32 +#define helper_vcompress_vm_w helper_vcompress_vm_w_riscv32 +#define helper_vdiv_vv_b helper_vdiv_vv_b_riscv32 +#define helper_vdiv_vv_d helper_vdiv_vv_d_riscv32 +#define helper_vdiv_vv_h helper_vdiv_vv_h_riscv32 +#define helper_vdiv_vv_w helper_vdiv_vv_w_riscv32 +#define helper_vdiv_vx_b helper_vdiv_vx_b_riscv32 +#define helper_vdiv_vx_d helper_vdiv_vx_d_riscv32 +#define helper_vdiv_vx_h helper_vdiv_vx_h_riscv32 +#define helper_vdiv_vx_w helper_vdiv_vx_w_riscv32 +#define helper_vdivu_vv_b helper_vdivu_vv_b_riscv32 +#define helper_vdivu_vv_d helper_vdivu_vv_d_riscv32 +#define helper_vdivu_vv_h helper_vdivu_vv_h_riscv32 +#define helper_vdivu_vv_w helper_vdivu_vv_w_riscv32 +#define helper_vdivu_vx_b helper_vdivu_vx_b_riscv32 +#define helper_vdivu_vx_d helper_vdivu_vx_d_riscv32 +#define helper_vdivu_vx_h helper_vdivu_vx_h_riscv32 +#define helper_vdivu_vx_w helper_vdivu_vx_w_riscv32 +#define helper_vec_rsubs16 helper_vec_rsubs16_riscv32 +#define helper_vec_rsubs32 helper_vec_rsubs32_riscv32 +#define helper_vec_rsubs64 helper_vec_rsubs64_riscv32 +#define helper_vec_rsubs8 helper_vec_rsubs8_riscv32 +#define helper_vfadd_vf_d helper_vfadd_vf_d_riscv32 +#define helper_vfadd_vf_h helper_vfadd_vf_h_riscv32 +#define helper_vfadd_vf_w helper_vfadd_vf_w_riscv32 +#define helper_vfadd_vv_d helper_vfadd_vv_d_riscv32 +#define helper_vfadd_vv_h helper_vfadd_vv_h_riscv32 +#define helper_vfadd_vv_w helper_vfadd_vv_w_riscv32 +#define helper_vfclass_v_d helper_vfclass_v_d_riscv32 +#define helper_vfclass_v_h helper_vfclass_v_h_riscv32 +#define helper_vfclass_v_w helper_vfclass_v_w_riscv32 +#define helper_vfcvt_f_x_v_d helper_vfcvt_f_x_v_d_riscv32 +#define helper_vfcvt_f_x_v_h helper_vfcvt_f_x_v_h_riscv32 +#define helper_vfcvt_f_x_v_w helper_vfcvt_f_x_v_w_riscv32 +#define helper_vfcvt_f_xu_v_d helper_vfcvt_f_xu_v_d_riscv32 +#define helper_vfcvt_f_xu_v_h helper_vfcvt_f_xu_v_h_riscv32 +#define helper_vfcvt_f_xu_v_w helper_vfcvt_f_xu_v_w_riscv32 +#define helper_vfcvt_x_f_v_d helper_vfcvt_x_f_v_d_riscv32 +#define helper_vfcvt_x_f_v_h helper_vfcvt_x_f_v_h_riscv32 +#define helper_vfcvt_x_f_v_w helper_vfcvt_x_f_v_w_riscv32 +#define helper_vfcvt_xu_f_v_d helper_vfcvt_xu_f_v_d_riscv32 +#define helper_vfcvt_xu_f_v_h helper_vfcvt_xu_f_v_h_riscv32 +#define helper_vfcvt_xu_f_v_w helper_vfcvt_xu_f_v_w_riscv32 +#define helper_vfdiv_vf_d helper_vfdiv_vf_d_riscv32 +#define helper_vfdiv_vf_h helper_vfdiv_vf_h_riscv32 +#define helper_vfdiv_vf_w helper_vfdiv_vf_w_riscv32 +#define helper_vfdiv_vv_d helper_vfdiv_vv_d_riscv32 +#define helper_vfdiv_vv_h helper_vfdiv_vv_h_riscv32 +#define helper_vfdiv_vv_w helper_vfdiv_vv_w_riscv32 +#define helper_vfmacc_vf_d helper_vfmacc_vf_d_riscv32 +#define helper_vfmacc_vf_h helper_vfmacc_vf_h_riscv32 +#define helper_vfmacc_vf_w helper_vfmacc_vf_w_riscv32 +#define helper_vfmacc_vv_d helper_vfmacc_vv_d_riscv32 +#define helper_vfmacc_vv_h helper_vfmacc_vv_h_riscv32 +#define helper_vfmacc_vv_w helper_vfmacc_vv_w_riscv32 +#define helper_vfmadd_vf_d helper_vfmadd_vf_d_riscv32 +#define helper_vfmadd_vf_h helper_vfmadd_vf_h_riscv32 +#define helper_vfmadd_vf_w helper_vfmadd_vf_w_riscv32 +#define helper_vfmadd_vv_d helper_vfmadd_vv_d_riscv32 +#define helper_vfmadd_vv_h helper_vfmadd_vv_h_riscv32 +#define helper_vfmadd_vv_w helper_vfmadd_vv_w_riscv32 +#define helper_vfmax_vf_d helper_vfmax_vf_d_riscv32 +#define helper_vfmax_vf_h helper_vfmax_vf_h_riscv32 +#define helper_vfmax_vf_w helper_vfmax_vf_w_riscv32 +#define helper_vfmax_vv_d helper_vfmax_vv_d_riscv32 +#define helper_vfmax_vv_h helper_vfmax_vv_h_riscv32 +#define helper_vfmax_vv_w helper_vfmax_vv_w_riscv32 +#define helper_vfmerge_vfm_d helper_vfmerge_vfm_d_riscv32 +#define helper_vfmerge_vfm_h helper_vfmerge_vfm_h_riscv32 +#define helper_vfmerge_vfm_w helper_vfmerge_vfm_w_riscv32 +#define helper_vfmin_vf_d helper_vfmin_vf_d_riscv32 +#define helper_vfmin_vf_h helper_vfmin_vf_h_riscv32 +#define helper_vfmin_vf_w helper_vfmin_vf_w_riscv32 +#define helper_vfmin_vv_d helper_vfmin_vv_d_riscv32 +#define helper_vfmin_vv_h helper_vfmin_vv_h_riscv32 +#define helper_vfmin_vv_w helper_vfmin_vv_w_riscv32 +#define helper_vfmsac_vf_d helper_vfmsac_vf_d_riscv32 +#define helper_vfmsac_vf_h helper_vfmsac_vf_h_riscv32 +#define helper_vfmsac_vf_w helper_vfmsac_vf_w_riscv32 +#define helper_vfmsac_vv_d helper_vfmsac_vv_d_riscv32 +#define helper_vfmsac_vv_h helper_vfmsac_vv_h_riscv32 +#define helper_vfmsac_vv_w helper_vfmsac_vv_w_riscv32 +#define helper_vfmsub_vf_d helper_vfmsub_vf_d_riscv32 +#define helper_vfmsub_vf_h helper_vfmsub_vf_h_riscv32 +#define helper_vfmsub_vf_w helper_vfmsub_vf_w_riscv32 +#define helper_vfmsub_vv_d helper_vfmsub_vv_d_riscv32 +#define helper_vfmsub_vv_h helper_vfmsub_vv_h_riscv32 +#define helper_vfmsub_vv_w helper_vfmsub_vv_w_riscv32 +#define helper_vfmul_vf_d helper_vfmul_vf_d_riscv32 +#define helper_vfmul_vf_h helper_vfmul_vf_h_riscv32 +#define helper_vfmul_vf_w helper_vfmul_vf_w_riscv32 +#define helper_vfmul_vv_d helper_vfmul_vv_d_riscv32 +#define helper_vfmul_vv_h helper_vfmul_vv_h_riscv32 +#define helper_vfmul_vv_w helper_vfmul_vv_w_riscv32 +#define helper_vfncvt_f_f_v_h helper_vfncvt_f_f_v_h_riscv32 +#define helper_vfncvt_f_f_v_w helper_vfncvt_f_f_v_w_riscv32 +#define helper_vfncvt_f_x_v_h helper_vfncvt_f_x_v_h_riscv32 +#define helper_vfncvt_f_x_v_w helper_vfncvt_f_x_v_w_riscv32 +#define helper_vfncvt_f_xu_v_h helper_vfncvt_f_xu_v_h_riscv32 +#define helper_vfncvt_f_xu_v_w helper_vfncvt_f_xu_v_w_riscv32 +#define helper_vfncvt_x_f_v_h helper_vfncvt_x_f_v_h_riscv32 +#define helper_vfncvt_x_f_v_w helper_vfncvt_x_f_v_w_riscv32 +#define helper_vfncvt_xu_f_v_h helper_vfncvt_xu_f_v_h_riscv32 +#define helper_vfncvt_xu_f_v_w helper_vfncvt_xu_f_v_w_riscv32 +#define helper_vfnmacc_vf_d helper_vfnmacc_vf_d_riscv32 +#define helper_vfnmacc_vf_h helper_vfnmacc_vf_h_riscv32 +#define helper_vfnmacc_vf_w helper_vfnmacc_vf_w_riscv32 +#define helper_vfnmacc_vv_d helper_vfnmacc_vv_d_riscv32 +#define helper_vfnmacc_vv_h helper_vfnmacc_vv_h_riscv32 +#define helper_vfnmacc_vv_w helper_vfnmacc_vv_w_riscv32 +#define helper_vfnmadd_vf_d helper_vfnmadd_vf_d_riscv32 +#define helper_vfnmadd_vf_h helper_vfnmadd_vf_h_riscv32 +#define helper_vfnmadd_vf_w helper_vfnmadd_vf_w_riscv32 +#define helper_vfnmadd_vv_d helper_vfnmadd_vv_d_riscv32 +#define helper_vfnmadd_vv_h helper_vfnmadd_vv_h_riscv32 +#define helper_vfnmadd_vv_w helper_vfnmadd_vv_w_riscv32 +#define helper_vfnmsac_vf_d helper_vfnmsac_vf_d_riscv32 +#define helper_vfnmsac_vf_h helper_vfnmsac_vf_h_riscv32 +#define helper_vfnmsac_vf_w helper_vfnmsac_vf_w_riscv32 +#define helper_vfnmsac_vv_d helper_vfnmsac_vv_d_riscv32 +#define helper_vfnmsac_vv_h helper_vfnmsac_vv_h_riscv32 +#define helper_vfnmsac_vv_w helper_vfnmsac_vv_w_riscv32 +#define helper_vfnmsub_vf_d helper_vfnmsub_vf_d_riscv32 +#define helper_vfnmsub_vf_h helper_vfnmsub_vf_h_riscv32 +#define helper_vfnmsub_vf_w helper_vfnmsub_vf_w_riscv32 +#define helper_vfnmsub_vv_d helper_vfnmsub_vv_d_riscv32 +#define helper_vfnmsub_vv_h helper_vfnmsub_vv_h_riscv32 +#define helper_vfnmsub_vv_w helper_vfnmsub_vv_w_riscv32 +#define helper_vfrdiv_vf_d helper_vfrdiv_vf_d_riscv32 +#define helper_vfrdiv_vf_h helper_vfrdiv_vf_h_riscv32 +#define helper_vfrdiv_vf_w helper_vfrdiv_vf_w_riscv32 +#define helper_vfredmax_vs_d helper_vfredmax_vs_d_riscv32 +#define helper_vfredmax_vs_h helper_vfredmax_vs_h_riscv32 +#define helper_vfredmax_vs_w helper_vfredmax_vs_w_riscv32 +#define helper_vfredmin_vs_d helper_vfredmin_vs_d_riscv32 +#define helper_vfredmin_vs_h helper_vfredmin_vs_h_riscv32 +#define helper_vfredmin_vs_w helper_vfredmin_vs_w_riscv32 +#define helper_vfredsum_vs_d helper_vfredsum_vs_d_riscv32 +#define helper_vfredsum_vs_h helper_vfredsum_vs_h_riscv32 +#define helper_vfredsum_vs_w helper_vfredsum_vs_w_riscv32 +#define helper_vfrsub_vf_d helper_vfrsub_vf_d_riscv32 +#define helper_vfrsub_vf_h helper_vfrsub_vf_h_riscv32 +#define helper_vfrsub_vf_w helper_vfrsub_vf_w_riscv32 +#define helper_vfsgnj_vf_d helper_vfsgnj_vf_d_riscv32 +#define helper_vfsgnj_vf_h helper_vfsgnj_vf_h_riscv32 +#define helper_vfsgnj_vf_w helper_vfsgnj_vf_w_riscv32 +#define helper_vfsgnj_vv_d helper_vfsgnj_vv_d_riscv32 +#define helper_vfsgnj_vv_h helper_vfsgnj_vv_h_riscv32 +#define helper_vfsgnj_vv_w helper_vfsgnj_vv_w_riscv32 +#define helper_vfsgnjn_vf_d helper_vfsgnjn_vf_d_riscv32 +#define helper_vfsgnjn_vf_h helper_vfsgnjn_vf_h_riscv32 +#define helper_vfsgnjn_vf_w helper_vfsgnjn_vf_w_riscv32 +#define helper_vfsgnjn_vv_d helper_vfsgnjn_vv_d_riscv32 +#define helper_vfsgnjn_vv_h helper_vfsgnjn_vv_h_riscv32 +#define helper_vfsgnjn_vv_w helper_vfsgnjn_vv_w_riscv32 +#define helper_vfsgnjx_vf_d helper_vfsgnjx_vf_d_riscv32 +#define helper_vfsgnjx_vf_h helper_vfsgnjx_vf_h_riscv32 +#define helper_vfsgnjx_vf_w helper_vfsgnjx_vf_w_riscv32 +#define helper_vfsgnjx_vv_d helper_vfsgnjx_vv_d_riscv32 +#define helper_vfsgnjx_vv_h helper_vfsgnjx_vv_h_riscv32 +#define helper_vfsgnjx_vv_w helper_vfsgnjx_vv_w_riscv32 +#define helper_vfsqrt_v_d helper_vfsqrt_v_d_riscv32 +#define helper_vfsqrt_v_h helper_vfsqrt_v_h_riscv32 +#define helper_vfsqrt_v_w helper_vfsqrt_v_w_riscv32 +#define helper_vfsub_vf_d helper_vfsub_vf_d_riscv32 +#define helper_vfsub_vf_h helper_vfsub_vf_h_riscv32 +#define helper_vfsub_vf_w helper_vfsub_vf_w_riscv32 +#define helper_vfsub_vv_d helper_vfsub_vv_d_riscv32 +#define helper_vfsub_vv_h helper_vfsub_vv_h_riscv32 +#define helper_vfsub_vv_w helper_vfsub_vv_w_riscv32 +#define helper_vfwadd_vf_h helper_vfwadd_vf_h_riscv32 +#define helper_vfwadd_vf_w helper_vfwadd_vf_w_riscv32 +#define helper_vfwadd_vv_h helper_vfwadd_vv_h_riscv32 +#define helper_vfwadd_vv_w helper_vfwadd_vv_w_riscv32 +#define helper_vfwadd_wf_h helper_vfwadd_wf_h_riscv32 +#define helper_vfwadd_wf_w helper_vfwadd_wf_w_riscv32 +#define helper_vfwadd_wv_h helper_vfwadd_wv_h_riscv32 +#define helper_vfwadd_wv_w helper_vfwadd_wv_w_riscv32 +#define helper_vfwcvt_f_f_v_h helper_vfwcvt_f_f_v_h_riscv32 +#define helper_vfwcvt_f_f_v_w helper_vfwcvt_f_f_v_w_riscv32 +#define helper_vfwcvt_f_x_v_h helper_vfwcvt_f_x_v_h_riscv32 +#define helper_vfwcvt_f_x_v_w helper_vfwcvt_f_x_v_w_riscv32 +#define helper_vfwcvt_f_xu_v_h helper_vfwcvt_f_xu_v_h_riscv32 +#define helper_vfwcvt_f_xu_v_w helper_vfwcvt_f_xu_v_w_riscv32 +#define helper_vfwcvt_x_f_v_h helper_vfwcvt_x_f_v_h_riscv32 +#define helper_vfwcvt_x_f_v_w helper_vfwcvt_x_f_v_w_riscv32 +#define helper_vfwcvt_xu_f_v_h helper_vfwcvt_xu_f_v_h_riscv32 +#define helper_vfwcvt_xu_f_v_w helper_vfwcvt_xu_f_v_w_riscv32 +#define helper_vfwmacc_vf_h helper_vfwmacc_vf_h_riscv32 +#define helper_vfwmacc_vf_w helper_vfwmacc_vf_w_riscv32 +#define helper_vfwmacc_vv_h helper_vfwmacc_vv_h_riscv32 +#define helper_vfwmacc_vv_w helper_vfwmacc_vv_w_riscv32 +#define helper_vfwmsac_vf_h helper_vfwmsac_vf_h_riscv32 +#define helper_vfwmsac_vf_w helper_vfwmsac_vf_w_riscv32 +#define helper_vfwmsac_vv_h helper_vfwmsac_vv_h_riscv32 +#define helper_vfwmsac_vv_w helper_vfwmsac_vv_w_riscv32 +#define helper_vfwmul_vf_h helper_vfwmul_vf_h_riscv32 +#define helper_vfwmul_vf_w helper_vfwmul_vf_w_riscv32 +#define helper_vfwmul_vv_h helper_vfwmul_vv_h_riscv32 +#define helper_vfwmul_vv_w helper_vfwmul_vv_w_riscv32 +#define helper_vfwnmacc_vf_h helper_vfwnmacc_vf_h_riscv32 +#define helper_vfwnmacc_vf_w helper_vfwnmacc_vf_w_riscv32 +#define helper_vfwnmacc_vv_h helper_vfwnmacc_vv_h_riscv32 +#define helper_vfwnmacc_vv_w helper_vfwnmacc_vv_w_riscv32 +#define helper_vfwnmsac_vf_h helper_vfwnmsac_vf_h_riscv32 +#define helper_vfwnmsac_vf_w helper_vfwnmsac_vf_w_riscv32 +#define helper_vfwnmsac_vv_h helper_vfwnmsac_vv_h_riscv32 +#define helper_vfwnmsac_vv_w helper_vfwnmsac_vv_w_riscv32 +#define helper_vfwredsum_vs_h helper_vfwredsum_vs_h_riscv32 +#define helper_vfwredsum_vs_w helper_vfwredsum_vs_w_riscv32 +#define helper_vfwsub_vf_h helper_vfwsub_vf_h_riscv32 +#define helper_vfwsub_vf_w helper_vfwsub_vf_w_riscv32 +#define helper_vfwsub_vv_h helper_vfwsub_vv_h_riscv32 +#define helper_vfwsub_vv_w helper_vfwsub_vv_w_riscv32 +#define helper_vfwsub_wf_h helper_vfwsub_wf_h_riscv32 +#define helper_vfwsub_wf_w helper_vfwsub_wf_w_riscv32 +#define helper_vfwsub_wv_h helper_vfwsub_wv_h_riscv32 +#define helper_vfwsub_wv_w helper_vfwsub_wv_w_riscv32 +#define helper_vid_v_b helper_vid_v_b_riscv32 +#define helper_vid_v_d helper_vid_v_d_riscv32 +#define helper_vid_v_h helper_vid_v_h_riscv32 +#define helper_vid_v_w helper_vid_v_w_riscv32 +#define helper_viota_m_b helper_viota_m_b_riscv32 +#define helper_viota_m_d helper_viota_m_d_riscv32 +#define helper_viota_m_h helper_viota_m_h_riscv32 +#define helper_viota_m_w helper_viota_m_w_riscv32 +#define helper_vlb_v_b helper_vlb_v_b_riscv32 +#define helper_vlb_v_b_mask helper_vlb_v_b_mask_riscv32 +#define helper_vlb_v_d helper_vlb_v_d_riscv32 +#define helper_vlb_v_d_mask helper_vlb_v_d_mask_riscv32 +#define helper_vlb_v_h helper_vlb_v_h_riscv32 +#define helper_vlb_v_h_mask helper_vlb_v_h_mask_riscv32 +#define helper_vlb_v_w helper_vlb_v_w_riscv32 +#define helper_vlb_v_w_mask helper_vlb_v_w_mask_riscv32 +#define helper_vlbff_v_b helper_vlbff_v_b_riscv32 +#define helper_vlbff_v_d helper_vlbff_v_d_riscv32 +#define helper_vlbff_v_h helper_vlbff_v_h_riscv32 +#define helper_vlbff_v_w helper_vlbff_v_w_riscv32 +#define helper_vlbu_v_b helper_vlbu_v_b_riscv32 +#define helper_vlbu_v_b_mask helper_vlbu_v_b_mask_riscv32 +#define helper_vlbu_v_d helper_vlbu_v_d_riscv32 +#define helper_vlbu_v_d_mask helper_vlbu_v_d_mask_riscv32 +#define helper_vlbu_v_h helper_vlbu_v_h_riscv32 +#define helper_vlbu_v_h_mask helper_vlbu_v_h_mask_riscv32 +#define helper_vlbu_v_w helper_vlbu_v_w_riscv32 +#define helper_vlbu_v_w_mask helper_vlbu_v_w_mask_riscv32 +#define helper_vlbuff_v_b helper_vlbuff_v_b_riscv32 +#define helper_vlbuff_v_d helper_vlbuff_v_d_riscv32 +#define helper_vlbuff_v_h helper_vlbuff_v_h_riscv32 +#define helper_vlbuff_v_w helper_vlbuff_v_w_riscv32 +#define helper_vle_v_b helper_vle_v_b_riscv32 +#define helper_vle_v_b_mask helper_vle_v_b_mask_riscv32 +#define helper_vle_v_d helper_vle_v_d_riscv32 +#define helper_vle_v_d_mask helper_vle_v_d_mask_riscv32 +#define helper_vle_v_h helper_vle_v_h_riscv32 +#define helper_vle_v_h_mask helper_vle_v_h_mask_riscv32 +#define helper_vle_v_w helper_vle_v_w_riscv32 +#define helper_vle_v_w_mask helper_vle_v_w_mask_riscv32 +#define helper_vleff_v_b helper_vleff_v_b_riscv32 +#define helper_vleff_v_d helper_vleff_v_d_riscv32 +#define helper_vleff_v_h helper_vleff_v_h_riscv32 +#define helper_vleff_v_w helper_vleff_v_w_riscv32 +#define helper_vlh_v_d helper_vlh_v_d_riscv32 +#define helper_vlh_v_d_mask helper_vlh_v_d_mask_riscv32 +#define helper_vlh_v_h helper_vlh_v_h_riscv32 +#define helper_vlh_v_h_mask helper_vlh_v_h_mask_riscv32 +#define helper_vlh_v_w helper_vlh_v_w_riscv32 +#define helper_vlh_v_w_mask helper_vlh_v_w_mask_riscv32 +#define helper_vlhff_v_d helper_vlhff_v_d_riscv32 +#define helper_vlhff_v_h helper_vlhff_v_h_riscv32 +#define helper_vlhff_v_w helper_vlhff_v_w_riscv32 +#define helper_vlhu_v_d helper_vlhu_v_d_riscv32 +#define helper_vlhu_v_d_mask helper_vlhu_v_d_mask_riscv32 +#define helper_vlhu_v_h helper_vlhu_v_h_riscv32 +#define helper_vlhu_v_h_mask helper_vlhu_v_h_mask_riscv32 +#define helper_vlhu_v_w helper_vlhu_v_w_riscv32 +#define helper_vlhu_v_w_mask helper_vlhu_v_w_mask_riscv32 +#define helper_vlhuff_v_d helper_vlhuff_v_d_riscv32 +#define helper_vlhuff_v_h helper_vlhuff_v_h_riscv32 +#define helper_vlhuff_v_w helper_vlhuff_v_w_riscv32 +#define helper_vlsb_v_b helper_vlsb_v_b_riscv32 +#define helper_vlsb_v_d helper_vlsb_v_d_riscv32 +#define helper_vlsb_v_h helper_vlsb_v_h_riscv32 +#define helper_vlsb_v_w helper_vlsb_v_w_riscv32 +#define helper_vlsbu_v_b helper_vlsbu_v_b_riscv32 +#define helper_vlsbu_v_d helper_vlsbu_v_d_riscv32 +#define helper_vlsbu_v_h helper_vlsbu_v_h_riscv32 +#define helper_vlsbu_v_w helper_vlsbu_v_w_riscv32 +#define helper_vlse_v_b helper_vlse_v_b_riscv32 +#define helper_vlse_v_d helper_vlse_v_d_riscv32 +#define helper_vlse_v_h helper_vlse_v_h_riscv32 +#define helper_vlse_v_w helper_vlse_v_w_riscv32 +#define helper_vlsh_v_d helper_vlsh_v_d_riscv32 +#define helper_vlsh_v_h helper_vlsh_v_h_riscv32 +#define helper_vlsh_v_w helper_vlsh_v_w_riscv32 +#define helper_vlshu_v_d helper_vlshu_v_d_riscv32 +#define helper_vlshu_v_h helper_vlshu_v_h_riscv32 +#define helper_vlshu_v_w helper_vlshu_v_w_riscv32 +#define helper_vlsw_v_d helper_vlsw_v_d_riscv32 +#define helper_vlsw_v_w helper_vlsw_v_w_riscv32 +#define helper_vlswu_v_d helper_vlswu_v_d_riscv32 +#define helper_vlswu_v_w helper_vlswu_v_w_riscv32 +#define helper_vlw_v_d helper_vlw_v_d_riscv32 +#define helper_vlw_v_d_mask helper_vlw_v_d_mask_riscv32 +#define helper_vlw_v_w helper_vlw_v_w_riscv32 +#define helper_vlw_v_w_mask helper_vlw_v_w_mask_riscv32 +#define helper_vlwff_v_d helper_vlwff_v_d_riscv32 +#define helper_vlwff_v_w helper_vlwff_v_w_riscv32 +#define helper_vlwu_v_d helper_vlwu_v_d_riscv32 +#define helper_vlwu_v_d_mask helper_vlwu_v_d_mask_riscv32 +#define helper_vlwu_v_w helper_vlwu_v_w_riscv32 +#define helper_vlwu_v_w_mask helper_vlwu_v_w_mask_riscv32 +#define helper_vlwuff_v_d helper_vlwuff_v_d_riscv32 +#define helper_vlwuff_v_w helper_vlwuff_v_w_riscv32 +#define helper_vlxb_v_b helper_vlxb_v_b_riscv32 +#define helper_vlxb_v_d helper_vlxb_v_d_riscv32 +#define helper_vlxb_v_h helper_vlxb_v_h_riscv32 +#define helper_vlxb_v_w helper_vlxb_v_w_riscv32 +#define helper_vlxbu_v_b helper_vlxbu_v_b_riscv32 +#define helper_vlxbu_v_d helper_vlxbu_v_d_riscv32 +#define helper_vlxbu_v_h helper_vlxbu_v_h_riscv32 +#define helper_vlxbu_v_w helper_vlxbu_v_w_riscv32 +#define helper_vlxe_v_b helper_vlxe_v_b_riscv32 +#define helper_vlxe_v_d helper_vlxe_v_d_riscv32 +#define helper_vlxe_v_h helper_vlxe_v_h_riscv32 +#define helper_vlxe_v_w helper_vlxe_v_w_riscv32 +#define helper_vlxh_v_d helper_vlxh_v_d_riscv32 +#define helper_vlxh_v_h helper_vlxh_v_h_riscv32 +#define helper_vlxh_v_w helper_vlxh_v_w_riscv32 +#define helper_vlxhu_v_d helper_vlxhu_v_d_riscv32 +#define helper_vlxhu_v_h helper_vlxhu_v_h_riscv32 +#define helper_vlxhu_v_w helper_vlxhu_v_w_riscv32 +#define helper_vlxw_v_d helper_vlxw_v_d_riscv32 +#define helper_vlxw_v_w helper_vlxw_v_w_riscv32 +#define helper_vlxwu_v_d helper_vlxwu_v_d_riscv32 +#define helper_vlxwu_v_w helper_vlxwu_v_w_riscv32 +#define helper_vmacc_vv_b helper_vmacc_vv_b_riscv32 +#define helper_vmacc_vv_d helper_vmacc_vv_d_riscv32 +#define helper_vmacc_vv_h helper_vmacc_vv_h_riscv32 +#define helper_vmacc_vv_w helper_vmacc_vv_w_riscv32 +#define helper_vmacc_vx_b helper_vmacc_vx_b_riscv32 +#define helper_vmacc_vx_d helper_vmacc_vx_d_riscv32 +#define helper_vmacc_vx_h helper_vmacc_vx_h_riscv32 +#define helper_vmacc_vx_w helper_vmacc_vx_w_riscv32 +#define helper_vmadc_vvm_b helper_vmadc_vvm_b_riscv32 +#define helper_vmadc_vvm_d helper_vmadc_vvm_d_riscv32 +#define helper_vmadc_vvm_h helper_vmadc_vvm_h_riscv32 +#define helper_vmadc_vvm_w helper_vmadc_vvm_w_riscv32 +#define helper_vmadc_vxm_b helper_vmadc_vxm_b_riscv32 +#define helper_vmadc_vxm_d helper_vmadc_vxm_d_riscv32 +#define helper_vmadc_vxm_h helper_vmadc_vxm_h_riscv32 +#define helper_vmadc_vxm_w helper_vmadc_vxm_w_riscv32 +#define helper_vmadd_vv_b helper_vmadd_vv_b_riscv32 +#define helper_vmadd_vv_d helper_vmadd_vv_d_riscv32 +#define helper_vmadd_vv_h helper_vmadd_vv_h_riscv32 +#define helper_vmadd_vv_w helper_vmadd_vv_w_riscv32 +#define helper_vmadd_vx_b helper_vmadd_vx_b_riscv32 +#define helper_vmadd_vx_d helper_vmadd_vx_d_riscv32 +#define helper_vmadd_vx_h helper_vmadd_vx_h_riscv32 +#define helper_vmadd_vx_w helper_vmadd_vx_w_riscv32 +#define helper_vmand_mm helper_vmand_mm_riscv32 +#define helper_vmandnot_mm helper_vmandnot_mm_riscv32 +#define helper_vmax_vv_b helper_vmax_vv_b_riscv32 +#define helper_vmax_vv_d helper_vmax_vv_d_riscv32 +#define helper_vmax_vv_h helper_vmax_vv_h_riscv32 +#define helper_vmax_vv_w helper_vmax_vv_w_riscv32 +#define helper_vmax_vx_b helper_vmax_vx_b_riscv32 +#define helper_vmax_vx_d helper_vmax_vx_d_riscv32 +#define helper_vmax_vx_h helper_vmax_vx_h_riscv32 +#define helper_vmax_vx_w helper_vmax_vx_w_riscv32 +#define helper_vmaxu_vv_b helper_vmaxu_vv_b_riscv32 +#define helper_vmaxu_vv_d helper_vmaxu_vv_d_riscv32 +#define helper_vmaxu_vv_h helper_vmaxu_vv_h_riscv32 +#define helper_vmaxu_vv_w helper_vmaxu_vv_w_riscv32 +#define helper_vmaxu_vx_b helper_vmaxu_vx_b_riscv32 +#define helper_vmaxu_vx_d helper_vmaxu_vx_d_riscv32 +#define helper_vmaxu_vx_h helper_vmaxu_vx_h_riscv32 +#define helper_vmaxu_vx_w helper_vmaxu_vx_w_riscv32 +#define helper_vmerge_vvm_b helper_vmerge_vvm_b_riscv32 +#define helper_vmerge_vvm_d helper_vmerge_vvm_d_riscv32 +#define helper_vmerge_vvm_h helper_vmerge_vvm_h_riscv32 +#define helper_vmerge_vvm_w helper_vmerge_vvm_w_riscv32 +#define helper_vmerge_vxm_b helper_vmerge_vxm_b_riscv32 +#define helper_vmerge_vxm_d helper_vmerge_vxm_d_riscv32 +#define helper_vmerge_vxm_h helper_vmerge_vxm_h_riscv32 +#define helper_vmerge_vxm_w helper_vmerge_vxm_w_riscv32 +#define helper_vmfeq_vf_d helper_vmfeq_vf_d_riscv32 +#define helper_vmfeq_vf_h helper_vmfeq_vf_h_riscv32 +#define helper_vmfeq_vf_w helper_vmfeq_vf_w_riscv32 +#define helper_vmfeq_vv_d helper_vmfeq_vv_d_riscv32 +#define helper_vmfeq_vv_h helper_vmfeq_vv_h_riscv32 +#define helper_vmfeq_vv_w helper_vmfeq_vv_w_riscv32 +#define helper_vmfge_vf_d helper_vmfge_vf_d_riscv32 +#define helper_vmfge_vf_h helper_vmfge_vf_h_riscv32 +#define helper_vmfge_vf_w helper_vmfge_vf_w_riscv32 +#define helper_vmfgt_vf_d helper_vmfgt_vf_d_riscv32 +#define helper_vmfgt_vf_h helper_vmfgt_vf_h_riscv32 +#define helper_vmfgt_vf_w helper_vmfgt_vf_w_riscv32 +#define helper_vmfirst_m helper_vmfirst_m_riscv32 +#define helper_vmfle_vf_d helper_vmfle_vf_d_riscv32 +#define helper_vmfle_vf_h helper_vmfle_vf_h_riscv32 +#define helper_vmfle_vf_w helper_vmfle_vf_w_riscv32 +#define helper_vmfle_vv_d helper_vmfle_vv_d_riscv32 +#define helper_vmfle_vv_h helper_vmfle_vv_h_riscv32 +#define helper_vmfle_vv_w helper_vmfle_vv_w_riscv32 +#define helper_vmflt_vf_d helper_vmflt_vf_d_riscv32 +#define helper_vmflt_vf_h helper_vmflt_vf_h_riscv32 +#define helper_vmflt_vf_w helper_vmflt_vf_w_riscv32 +#define helper_vmflt_vv_d helper_vmflt_vv_d_riscv32 +#define helper_vmflt_vv_h helper_vmflt_vv_h_riscv32 +#define helper_vmflt_vv_w helper_vmflt_vv_w_riscv32 +#define helper_vmfne_vf_d helper_vmfne_vf_d_riscv32 +#define helper_vmfne_vf_h helper_vmfne_vf_h_riscv32 +#define helper_vmfne_vf_w helper_vmfne_vf_w_riscv32 +#define helper_vmfne_vv_d helper_vmfne_vv_d_riscv32 +#define helper_vmfne_vv_h helper_vmfne_vv_h_riscv32 +#define helper_vmfne_vv_w helper_vmfne_vv_w_riscv32 +#define helper_vmford_vf_d helper_vmford_vf_d_riscv32 +#define helper_vmford_vf_h helper_vmford_vf_h_riscv32 +#define helper_vmford_vf_w helper_vmford_vf_w_riscv32 +#define helper_vmford_vv_d helper_vmford_vv_d_riscv32 +#define helper_vmford_vv_h helper_vmford_vv_h_riscv32 +#define helper_vmford_vv_w helper_vmford_vv_w_riscv32 +#define helper_vmin_vv_b helper_vmin_vv_b_riscv32 +#define helper_vmin_vv_d helper_vmin_vv_d_riscv32 +#define helper_vmin_vv_h helper_vmin_vv_h_riscv32 +#define helper_vmin_vv_w helper_vmin_vv_w_riscv32 +#define helper_vmin_vx_b helper_vmin_vx_b_riscv32 +#define helper_vmin_vx_d helper_vmin_vx_d_riscv32 +#define helper_vmin_vx_h helper_vmin_vx_h_riscv32 +#define helper_vmin_vx_w helper_vmin_vx_w_riscv32 +#define helper_vminu_vv_b helper_vminu_vv_b_riscv32 +#define helper_vminu_vv_d helper_vminu_vv_d_riscv32 +#define helper_vminu_vv_h helper_vminu_vv_h_riscv32 +#define helper_vminu_vv_w helper_vminu_vv_w_riscv32 +#define helper_vminu_vx_b helper_vminu_vx_b_riscv32 +#define helper_vminu_vx_d helper_vminu_vx_d_riscv32 +#define helper_vminu_vx_h helper_vminu_vx_h_riscv32 +#define helper_vminu_vx_w helper_vminu_vx_w_riscv32 +#define helper_vmnand_mm helper_vmnand_mm_riscv32 +#define helper_vmnor_mm helper_vmnor_mm_riscv32 +#define helper_vmor_mm helper_vmor_mm_riscv32 +#define helper_vmornot_mm helper_vmornot_mm_riscv32 +#define helper_vmpopc_m helper_vmpopc_m_riscv32 +#define helper_vmsbc_vvm_b helper_vmsbc_vvm_b_riscv32 +#define helper_vmsbc_vvm_d helper_vmsbc_vvm_d_riscv32 +#define helper_vmsbc_vvm_h helper_vmsbc_vvm_h_riscv32 +#define helper_vmsbc_vvm_w helper_vmsbc_vvm_w_riscv32 +#define helper_vmsbc_vxm_b helper_vmsbc_vxm_b_riscv32 +#define helper_vmsbc_vxm_d helper_vmsbc_vxm_d_riscv32 +#define helper_vmsbc_vxm_h helper_vmsbc_vxm_h_riscv32 +#define helper_vmsbc_vxm_w helper_vmsbc_vxm_w_riscv32 +#define helper_vmsbf_m helper_vmsbf_m_riscv32 +#define helper_vmseq_vv_b helper_vmseq_vv_b_riscv32 +#define helper_vmseq_vv_d helper_vmseq_vv_d_riscv32 +#define helper_vmseq_vv_h helper_vmseq_vv_h_riscv32 +#define helper_vmseq_vv_w helper_vmseq_vv_w_riscv32 +#define helper_vmseq_vx_b helper_vmseq_vx_b_riscv32 +#define helper_vmseq_vx_d helper_vmseq_vx_d_riscv32 +#define helper_vmseq_vx_h helper_vmseq_vx_h_riscv32 +#define helper_vmseq_vx_w helper_vmseq_vx_w_riscv32 +#define helper_vmsgt_vx_b helper_vmsgt_vx_b_riscv32 +#define helper_vmsgt_vx_d helper_vmsgt_vx_d_riscv32 +#define helper_vmsgt_vx_h helper_vmsgt_vx_h_riscv32 +#define helper_vmsgt_vx_w helper_vmsgt_vx_w_riscv32 +#define helper_vmsgtu_vx_b helper_vmsgtu_vx_b_riscv32 +#define helper_vmsgtu_vx_d helper_vmsgtu_vx_d_riscv32 +#define helper_vmsgtu_vx_h helper_vmsgtu_vx_h_riscv32 +#define helper_vmsgtu_vx_w helper_vmsgtu_vx_w_riscv32 +#define helper_vmsif_m helper_vmsif_m_riscv32 +#define helper_vmsle_vv_b helper_vmsle_vv_b_riscv32 +#define helper_vmsle_vv_d helper_vmsle_vv_d_riscv32 +#define helper_vmsle_vv_h helper_vmsle_vv_h_riscv32 +#define helper_vmsle_vv_w helper_vmsle_vv_w_riscv32 +#define helper_vmsle_vx_b helper_vmsle_vx_b_riscv32 +#define helper_vmsle_vx_d helper_vmsle_vx_d_riscv32 +#define helper_vmsle_vx_h helper_vmsle_vx_h_riscv32 +#define helper_vmsle_vx_w helper_vmsle_vx_w_riscv32 +#define helper_vmsleu_vv_b helper_vmsleu_vv_b_riscv32 +#define helper_vmsleu_vv_d helper_vmsleu_vv_d_riscv32 +#define helper_vmsleu_vv_h helper_vmsleu_vv_h_riscv32 +#define helper_vmsleu_vv_w helper_vmsleu_vv_w_riscv32 +#define helper_vmsleu_vx_b helper_vmsleu_vx_b_riscv32 +#define helper_vmsleu_vx_d helper_vmsleu_vx_d_riscv32 +#define helper_vmsleu_vx_h helper_vmsleu_vx_h_riscv32 +#define helper_vmsleu_vx_w helper_vmsleu_vx_w_riscv32 +#define helper_vmslt_vv_b helper_vmslt_vv_b_riscv32 +#define helper_vmslt_vv_d helper_vmslt_vv_d_riscv32 +#define helper_vmslt_vv_h helper_vmslt_vv_h_riscv32 +#define helper_vmslt_vv_w helper_vmslt_vv_w_riscv32 +#define helper_vmslt_vx_b helper_vmslt_vx_b_riscv32 +#define helper_vmslt_vx_d helper_vmslt_vx_d_riscv32 +#define helper_vmslt_vx_h helper_vmslt_vx_h_riscv32 +#define helper_vmslt_vx_w helper_vmslt_vx_w_riscv32 +#define helper_vmsltu_vv_b helper_vmsltu_vv_b_riscv32 +#define helper_vmsltu_vv_d helper_vmsltu_vv_d_riscv32 +#define helper_vmsltu_vv_h helper_vmsltu_vv_h_riscv32 +#define helper_vmsltu_vv_w helper_vmsltu_vv_w_riscv32 +#define helper_vmsltu_vx_b helper_vmsltu_vx_b_riscv32 +#define helper_vmsltu_vx_d helper_vmsltu_vx_d_riscv32 +#define helper_vmsltu_vx_h helper_vmsltu_vx_h_riscv32 +#define helper_vmsltu_vx_w helper_vmsltu_vx_w_riscv32 +#define helper_vmsne_vv_b helper_vmsne_vv_b_riscv32 +#define helper_vmsne_vv_d helper_vmsne_vv_d_riscv32 +#define helper_vmsne_vv_h helper_vmsne_vv_h_riscv32 +#define helper_vmsne_vv_w helper_vmsne_vv_w_riscv32 +#define helper_vmsne_vx_b helper_vmsne_vx_b_riscv32 +#define helper_vmsne_vx_d helper_vmsne_vx_d_riscv32 +#define helper_vmsne_vx_h helper_vmsne_vx_h_riscv32 +#define helper_vmsne_vx_w helper_vmsne_vx_w_riscv32 +#define helper_vmsof_m helper_vmsof_m_riscv32 +#define helper_vmul_vv_b helper_vmul_vv_b_riscv32 +#define helper_vmul_vv_d helper_vmul_vv_d_riscv32 +#define helper_vmul_vv_h helper_vmul_vv_h_riscv32 +#define helper_vmul_vv_w helper_vmul_vv_w_riscv32 +#define helper_vmul_vx_b helper_vmul_vx_b_riscv32 +#define helper_vmul_vx_d helper_vmul_vx_d_riscv32 +#define helper_vmul_vx_h helper_vmul_vx_h_riscv32 +#define helper_vmul_vx_w helper_vmul_vx_w_riscv32 +#define helper_vmulh_vv_b helper_vmulh_vv_b_riscv32 +#define helper_vmulh_vv_d helper_vmulh_vv_d_riscv32 +#define helper_vmulh_vv_h helper_vmulh_vv_h_riscv32 +#define helper_vmulh_vv_w helper_vmulh_vv_w_riscv32 +#define helper_vmulh_vx_b helper_vmulh_vx_b_riscv32 +#define helper_vmulh_vx_d helper_vmulh_vx_d_riscv32 +#define helper_vmulh_vx_h helper_vmulh_vx_h_riscv32 +#define helper_vmulh_vx_w helper_vmulh_vx_w_riscv32 +#define helper_vmulhsu_vv_b helper_vmulhsu_vv_b_riscv32 +#define helper_vmulhsu_vv_d helper_vmulhsu_vv_d_riscv32 +#define helper_vmulhsu_vv_h helper_vmulhsu_vv_h_riscv32 +#define helper_vmulhsu_vv_w helper_vmulhsu_vv_w_riscv32 +#define helper_vmulhsu_vx_b helper_vmulhsu_vx_b_riscv32 +#define helper_vmulhsu_vx_d helper_vmulhsu_vx_d_riscv32 +#define helper_vmulhsu_vx_h helper_vmulhsu_vx_h_riscv32 +#define helper_vmulhsu_vx_w helper_vmulhsu_vx_w_riscv32 +#define helper_vmulhu_vv_b helper_vmulhu_vv_b_riscv32 +#define helper_vmulhu_vv_d helper_vmulhu_vv_d_riscv32 +#define helper_vmulhu_vv_h helper_vmulhu_vv_h_riscv32 +#define helper_vmulhu_vv_w helper_vmulhu_vv_w_riscv32 +#define helper_vmulhu_vx_b helper_vmulhu_vx_b_riscv32 +#define helper_vmulhu_vx_d helper_vmulhu_vx_d_riscv32 +#define helper_vmulhu_vx_h helper_vmulhu_vx_h_riscv32 +#define helper_vmulhu_vx_w helper_vmulhu_vx_w_riscv32 +#define helper_vmv_v_v_b helper_vmv_v_v_b_riscv32 +#define helper_vmv_v_v_d helper_vmv_v_v_d_riscv32 +#define helper_vmv_v_v_h helper_vmv_v_v_h_riscv32 +#define helper_vmv_v_v_w helper_vmv_v_v_w_riscv32 +#define helper_vmv_v_x_b helper_vmv_v_x_b_riscv32 +#define helper_vmv_v_x_d helper_vmv_v_x_d_riscv32 +#define helper_vmv_v_x_h helper_vmv_v_x_h_riscv32 +#define helper_vmv_v_x_w helper_vmv_v_x_w_riscv32 +#define helper_vmxnor_mm helper_vmxnor_mm_riscv32 +#define helper_vmxor_mm helper_vmxor_mm_riscv32 +#define helper_vnclip_vv_b helper_vnclip_vv_b_riscv32 +#define helper_vnclip_vv_h helper_vnclip_vv_h_riscv32 +#define helper_vnclip_vv_w helper_vnclip_vv_w_riscv32 +#define helper_vnclip_vx_b helper_vnclip_vx_b_riscv32 +#define helper_vnclip_vx_h helper_vnclip_vx_h_riscv32 +#define helper_vnclip_vx_w helper_vnclip_vx_w_riscv32 +#define helper_vnclipu_vv_b helper_vnclipu_vv_b_riscv32 +#define helper_vnclipu_vv_h helper_vnclipu_vv_h_riscv32 +#define helper_vnclipu_vv_w helper_vnclipu_vv_w_riscv32 +#define helper_vnclipu_vx_b helper_vnclipu_vx_b_riscv32 +#define helper_vnclipu_vx_h helper_vnclipu_vx_h_riscv32 +#define helper_vnclipu_vx_w helper_vnclipu_vx_w_riscv32 +#define helper_vnmsac_vv_b helper_vnmsac_vv_b_riscv32 +#define helper_vnmsac_vv_d helper_vnmsac_vv_d_riscv32 +#define helper_vnmsac_vv_h helper_vnmsac_vv_h_riscv32 +#define helper_vnmsac_vv_w helper_vnmsac_vv_w_riscv32 +#define helper_vnmsac_vx_b helper_vnmsac_vx_b_riscv32 +#define helper_vnmsac_vx_d helper_vnmsac_vx_d_riscv32 +#define helper_vnmsac_vx_h helper_vnmsac_vx_h_riscv32 +#define helper_vnmsac_vx_w helper_vnmsac_vx_w_riscv32 +#define helper_vnmsub_vv_b helper_vnmsub_vv_b_riscv32 +#define helper_vnmsub_vv_d helper_vnmsub_vv_d_riscv32 +#define helper_vnmsub_vv_h helper_vnmsub_vv_h_riscv32 +#define helper_vnmsub_vv_w helper_vnmsub_vv_w_riscv32 +#define helper_vnmsub_vx_b helper_vnmsub_vx_b_riscv32 +#define helper_vnmsub_vx_d helper_vnmsub_vx_d_riscv32 +#define helper_vnmsub_vx_h helper_vnmsub_vx_h_riscv32 +#define helper_vnmsub_vx_w helper_vnmsub_vx_w_riscv32 +#define helper_vnsra_vv_b helper_vnsra_vv_b_riscv32 +#define helper_vnsra_vv_h helper_vnsra_vv_h_riscv32 +#define helper_vnsra_vv_w helper_vnsra_vv_w_riscv32 +#define helper_vnsra_vx_b helper_vnsra_vx_b_riscv32 +#define helper_vnsra_vx_h helper_vnsra_vx_h_riscv32 +#define helper_vnsra_vx_w helper_vnsra_vx_w_riscv32 +#define helper_vnsrl_vv_b helper_vnsrl_vv_b_riscv32 +#define helper_vnsrl_vv_h helper_vnsrl_vv_h_riscv32 +#define helper_vnsrl_vv_w helper_vnsrl_vv_w_riscv32 +#define helper_vnsrl_vx_b helper_vnsrl_vx_b_riscv32 +#define helper_vnsrl_vx_h helper_vnsrl_vx_h_riscv32 +#define helper_vnsrl_vx_w helper_vnsrl_vx_w_riscv32 +#define helper_vor_vv_b helper_vor_vv_b_riscv32 +#define helper_vor_vv_d helper_vor_vv_d_riscv32 +#define helper_vor_vv_h helper_vor_vv_h_riscv32 +#define helper_vor_vv_w helper_vor_vv_w_riscv32 +#define helper_vor_vx_b helper_vor_vx_b_riscv32 +#define helper_vor_vx_d helper_vor_vx_d_riscv32 +#define helper_vor_vx_h helper_vor_vx_h_riscv32 +#define helper_vor_vx_w helper_vor_vx_w_riscv32 +#define helper_vredand_vs_b helper_vredand_vs_b_riscv32 +#define helper_vredand_vs_d helper_vredand_vs_d_riscv32 +#define helper_vredand_vs_h helper_vredand_vs_h_riscv32 +#define helper_vredand_vs_w helper_vredand_vs_w_riscv32 +#define helper_vredmax_vs_b helper_vredmax_vs_b_riscv32 +#define helper_vredmax_vs_d helper_vredmax_vs_d_riscv32 +#define helper_vredmax_vs_h helper_vredmax_vs_h_riscv32 +#define helper_vredmax_vs_w helper_vredmax_vs_w_riscv32 +#define helper_vredmaxu_vs_b helper_vredmaxu_vs_b_riscv32 +#define helper_vredmaxu_vs_d helper_vredmaxu_vs_d_riscv32 +#define helper_vredmaxu_vs_h helper_vredmaxu_vs_h_riscv32 +#define helper_vredmaxu_vs_w helper_vredmaxu_vs_w_riscv32 +#define helper_vredmin_vs_b helper_vredmin_vs_b_riscv32 +#define helper_vredmin_vs_d helper_vredmin_vs_d_riscv32 +#define helper_vredmin_vs_h helper_vredmin_vs_h_riscv32 +#define helper_vredmin_vs_w helper_vredmin_vs_w_riscv32 +#define helper_vredminu_vs_b helper_vredminu_vs_b_riscv32 +#define helper_vredminu_vs_d helper_vredminu_vs_d_riscv32 +#define helper_vredminu_vs_h helper_vredminu_vs_h_riscv32 +#define helper_vredminu_vs_w helper_vredminu_vs_w_riscv32 +#define helper_vredor_vs_b helper_vredor_vs_b_riscv32 +#define helper_vredor_vs_d helper_vredor_vs_d_riscv32 +#define helper_vredor_vs_h helper_vredor_vs_h_riscv32 +#define helper_vredor_vs_w helper_vredor_vs_w_riscv32 +#define helper_vredsum_vs_b helper_vredsum_vs_b_riscv32 +#define helper_vredsum_vs_d helper_vredsum_vs_d_riscv32 +#define helper_vredsum_vs_h helper_vredsum_vs_h_riscv32 +#define helper_vredsum_vs_w helper_vredsum_vs_w_riscv32 +#define helper_vredxor_vs_b helper_vredxor_vs_b_riscv32 +#define helper_vredxor_vs_d helper_vredxor_vs_d_riscv32 +#define helper_vredxor_vs_h helper_vredxor_vs_h_riscv32 +#define helper_vredxor_vs_w helper_vredxor_vs_w_riscv32 +#define helper_vrem_vv_b helper_vrem_vv_b_riscv32 +#define helper_vrem_vv_d helper_vrem_vv_d_riscv32 +#define helper_vrem_vv_h helper_vrem_vv_h_riscv32 +#define helper_vrem_vv_w helper_vrem_vv_w_riscv32 +#define helper_vrem_vx_b helper_vrem_vx_b_riscv32 +#define helper_vrem_vx_d helper_vrem_vx_d_riscv32 +#define helper_vrem_vx_h helper_vrem_vx_h_riscv32 +#define helper_vrem_vx_w helper_vrem_vx_w_riscv32 +#define helper_vremu_vv_b helper_vremu_vv_b_riscv32 +#define helper_vremu_vv_d helper_vremu_vv_d_riscv32 +#define helper_vremu_vv_h helper_vremu_vv_h_riscv32 +#define helper_vremu_vv_w helper_vremu_vv_w_riscv32 +#define helper_vremu_vx_b helper_vremu_vx_b_riscv32 +#define helper_vremu_vx_d helper_vremu_vx_d_riscv32 +#define helper_vremu_vx_h helper_vremu_vx_h_riscv32 +#define helper_vremu_vx_w helper_vremu_vx_w_riscv32 +#define helper_vrgather_vv_b helper_vrgather_vv_b_riscv32 +#define helper_vrgather_vv_d helper_vrgather_vv_d_riscv32 +#define helper_vrgather_vv_h helper_vrgather_vv_h_riscv32 +#define helper_vrgather_vv_w helper_vrgather_vv_w_riscv32 +#define helper_vrgather_vx_b helper_vrgather_vx_b_riscv32 +#define helper_vrgather_vx_d helper_vrgather_vx_d_riscv32 +#define helper_vrgather_vx_h helper_vrgather_vx_h_riscv32 +#define helper_vrgather_vx_w helper_vrgather_vx_w_riscv32 +#define helper_vrsub_vx_b helper_vrsub_vx_b_riscv32 +#define helper_vrsub_vx_d helper_vrsub_vx_d_riscv32 +#define helper_vrsub_vx_h helper_vrsub_vx_h_riscv32 +#define helper_vrsub_vx_w helper_vrsub_vx_w_riscv32 +#define helper_vsadd_vv_b helper_vsadd_vv_b_riscv32 +#define helper_vsadd_vv_d helper_vsadd_vv_d_riscv32 +#define helper_vsadd_vv_h helper_vsadd_vv_h_riscv32 +#define helper_vsadd_vv_w helper_vsadd_vv_w_riscv32 +#define helper_vsadd_vx_b helper_vsadd_vx_b_riscv32 +#define helper_vsadd_vx_d helper_vsadd_vx_d_riscv32 +#define helper_vsadd_vx_h helper_vsadd_vx_h_riscv32 +#define helper_vsadd_vx_w helper_vsadd_vx_w_riscv32 +#define helper_vsaddu_vv_b helper_vsaddu_vv_b_riscv32 +#define helper_vsaddu_vv_d helper_vsaddu_vv_d_riscv32 +#define helper_vsaddu_vv_h helper_vsaddu_vv_h_riscv32 +#define helper_vsaddu_vv_w helper_vsaddu_vv_w_riscv32 +#define helper_vsaddu_vx_b helper_vsaddu_vx_b_riscv32 +#define helper_vsaddu_vx_d helper_vsaddu_vx_d_riscv32 +#define helper_vsaddu_vx_h helper_vsaddu_vx_h_riscv32 +#define helper_vsaddu_vx_w helper_vsaddu_vx_w_riscv32 +#define helper_vsb_v_b helper_vsb_v_b_riscv32 +#define helper_vsb_v_b_mask helper_vsb_v_b_mask_riscv32 +#define helper_vsb_v_d helper_vsb_v_d_riscv32 +#define helper_vsb_v_d_mask helper_vsb_v_d_mask_riscv32 +#define helper_vsb_v_h helper_vsb_v_h_riscv32 +#define helper_vsb_v_h_mask helper_vsb_v_h_mask_riscv32 +#define helper_vsb_v_w helper_vsb_v_w_riscv32 +#define helper_vsb_v_w_mask helper_vsb_v_w_mask_riscv32 +#define helper_vsbc_vvm_b helper_vsbc_vvm_b_riscv32 +#define helper_vsbc_vvm_d helper_vsbc_vvm_d_riscv32 +#define helper_vsbc_vvm_h helper_vsbc_vvm_h_riscv32 +#define helper_vsbc_vvm_w helper_vsbc_vvm_w_riscv32 +#define helper_vsbc_vxm_b helper_vsbc_vxm_b_riscv32 +#define helper_vsbc_vxm_d helper_vsbc_vxm_d_riscv32 +#define helper_vsbc_vxm_h helper_vsbc_vxm_h_riscv32 +#define helper_vsbc_vxm_w helper_vsbc_vxm_w_riscv32 +#define helper_vse_v_b helper_vse_v_b_riscv32 +#define helper_vse_v_b_mask helper_vse_v_b_mask_riscv32 +#define helper_vse_v_d helper_vse_v_d_riscv32 +#define helper_vse_v_d_mask helper_vse_v_d_mask_riscv32 +#define helper_vse_v_h helper_vse_v_h_riscv32 +#define helper_vse_v_h_mask helper_vse_v_h_mask_riscv32 +#define helper_vse_v_w helper_vse_v_w_riscv32 +#define helper_vse_v_w_mask helper_vse_v_w_mask_riscv32 +#define helper_vsetvl helper_vsetvl_riscv32 +#define helper_vsh_v_d helper_vsh_v_d_riscv32 +#define helper_vsh_v_d_mask helper_vsh_v_d_mask_riscv32 +#define helper_vsh_v_h helper_vsh_v_h_riscv32 +#define helper_vsh_v_h_mask helper_vsh_v_h_mask_riscv32 +#define helper_vsh_v_w helper_vsh_v_w_riscv32 +#define helper_vsh_v_w_mask helper_vsh_v_w_mask_riscv32 +#define helper_vslide1down_vx_b helper_vslide1down_vx_b_riscv32 +#define helper_vslide1down_vx_d helper_vslide1down_vx_d_riscv32 +#define helper_vslide1down_vx_h helper_vslide1down_vx_h_riscv32 +#define helper_vslide1down_vx_w helper_vslide1down_vx_w_riscv32 +#define helper_vslide1up_vx_b helper_vslide1up_vx_b_riscv32 +#define helper_vslide1up_vx_d helper_vslide1up_vx_d_riscv32 +#define helper_vslide1up_vx_h helper_vslide1up_vx_h_riscv32 +#define helper_vslide1up_vx_w helper_vslide1up_vx_w_riscv32 +#define helper_vslidedown_vx_b helper_vslidedown_vx_b_riscv32 +#define helper_vslidedown_vx_d helper_vslidedown_vx_d_riscv32 +#define helper_vslidedown_vx_h helper_vslidedown_vx_h_riscv32 +#define helper_vslidedown_vx_w helper_vslidedown_vx_w_riscv32 +#define helper_vslideup_vx_b helper_vslideup_vx_b_riscv32 +#define helper_vslideup_vx_d helper_vslideup_vx_d_riscv32 +#define helper_vslideup_vx_h helper_vslideup_vx_h_riscv32 +#define helper_vslideup_vx_w helper_vslideup_vx_w_riscv32 +#define helper_vsll_vv_b helper_vsll_vv_b_riscv32 +#define helper_vsll_vv_d helper_vsll_vv_d_riscv32 +#define helper_vsll_vv_h helper_vsll_vv_h_riscv32 +#define helper_vsll_vv_w helper_vsll_vv_w_riscv32 +#define helper_vsll_vx_b helper_vsll_vx_b_riscv32 +#define helper_vsll_vx_d helper_vsll_vx_d_riscv32 +#define helper_vsll_vx_h helper_vsll_vx_h_riscv32 +#define helper_vsll_vx_w helper_vsll_vx_w_riscv32 +#define helper_vsmul_vv_b helper_vsmul_vv_b_riscv32 +#define helper_vsmul_vv_d helper_vsmul_vv_d_riscv32 +#define helper_vsmul_vv_h helper_vsmul_vv_h_riscv32 +#define helper_vsmul_vv_w helper_vsmul_vv_w_riscv32 +#define helper_vsmul_vx_b helper_vsmul_vx_b_riscv32 +#define helper_vsmul_vx_d helper_vsmul_vx_d_riscv32 +#define helper_vsmul_vx_h helper_vsmul_vx_h_riscv32 +#define helper_vsmul_vx_w helper_vsmul_vx_w_riscv32 +#define helper_vsra_vv_b helper_vsra_vv_b_riscv32 +#define helper_vsra_vv_d helper_vsra_vv_d_riscv32 +#define helper_vsra_vv_h helper_vsra_vv_h_riscv32 +#define helper_vsra_vv_w helper_vsra_vv_w_riscv32 +#define helper_vsra_vx_b helper_vsra_vx_b_riscv32 +#define helper_vsra_vx_d helper_vsra_vx_d_riscv32 +#define helper_vsra_vx_h helper_vsra_vx_h_riscv32 +#define helper_vsra_vx_w helper_vsra_vx_w_riscv32 +#define helper_vsrl_vv_b helper_vsrl_vv_b_riscv32 +#define helper_vsrl_vv_d helper_vsrl_vv_d_riscv32 +#define helper_vsrl_vv_h helper_vsrl_vv_h_riscv32 +#define helper_vsrl_vv_w helper_vsrl_vv_w_riscv32 +#define helper_vsrl_vx_b helper_vsrl_vx_b_riscv32 +#define helper_vsrl_vx_d helper_vsrl_vx_d_riscv32 +#define helper_vsrl_vx_h helper_vsrl_vx_h_riscv32 +#define helper_vsrl_vx_w helper_vsrl_vx_w_riscv32 +#define helper_vssb_v_b helper_vssb_v_b_riscv32 +#define helper_vssb_v_d helper_vssb_v_d_riscv32 +#define helper_vssb_v_h helper_vssb_v_h_riscv32 +#define helper_vssb_v_w helper_vssb_v_w_riscv32 +#define helper_vsse_v_b helper_vsse_v_b_riscv32 +#define helper_vsse_v_d helper_vsse_v_d_riscv32 +#define helper_vsse_v_h helper_vsse_v_h_riscv32 +#define helper_vsse_v_w helper_vsse_v_w_riscv32 +#define helper_vssh_v_d helper_vssh_v_d_riscv32 +#define helper_vssh_v_h helper_vssh_v_h_riscv32 +#define helper_vssh_v_w helper_vssh_v_w_riscv32 +#define helper_vssra_vv_b helper_vssra_vv_b_riscv32 +#define helper_vssra_vv_d helper_vssra_vv_d_riscv32 +#define helper_vssra_vv_h helper_vssra_vv_h_riscv32 +#define helper_vssra_vv_w helper_vssra_vv_w_riscv32 +#define helper_vssra_vx_b helper_vssra_vx_b_riscv32 +#define helper_vssra_vx_d helper_vssra_vx_d_riscv32 +#define helper_vssra_vx_h helper_vssra_vx_h_riscv32 +#define helper_vssra_vx_w helper_vssra_vx_w_riscv32 +#define helper_vssrl_vv_b helper_vssrl_vv_b_riscv32 +#define helper_vssrl_vv_d helper_vssrl_vv_d_riscv32 +#define helper_vssrl_vv_h helper_vssrl_vv_h_riscv32 +#define helper_vssrl_vv_w helper_vssrl_vv_w_riscv32 +#define helper_vssrl_vx_b helper_vssrl_vx_b_riscv32 +#define helper_vssrl_vx_d helper_vssrl_vx_d_riscv32 +#define helper_vssrl_vx_h helper_vssrl_vx_h_riscv32 +#define helper_vssrl_vx_w helper_vssrl_vx_w_riscv32 +#define helper_vssub_vv_b helper_vssub_vv_b_riscv32 +#define helper_vssub_vv_d helper_vssub_vv_d_riscv32 +#define helper_vssub_vv_h helper_vssub_vv_h_riscv32 +#define helper_vssub_vv_w helper_vssub_vv_w_riscv32 +#define helper_vssub_vx_b helper_vssub_vx_b_riscv32 +#define helper_vssub_vx_d helper_vssub_vx_d_riscv32 +#define helper_vssub_vx_h helper_vssub_vx_h_riscv32 +#define helper_vssub_vx_w helper_vssub_vx_w_riscv32 +#define helper_vssubu_vv_b helper_vssubu_vv_b_riscv32 +#define helper_vssubu_vv_d helper_vssubu_vv_d_riscv32 +#define helper_vssubu_vv_h helper_vssubu_vv_h_riscv32 +#define helper_vssubu_vv_w helper_vssubu_vv_w_riscv32 +#define helper_vssubu_vx_b helper_vssubu_vx_b_riscv32 +#define helper_vssubu_vx_d helper_vssubu_vx_d_riscv32 +#define helper_vssubu_vx_h helper_vssubu_vx_h_riscv32 +#define helper_vssubu_vx_w helper_vssubu_vx_w_riscv32 +#define helper_vssw_v_d helper_vssw_v_d_riscv32 +#define helper_vssw_v_w helper_vssw_v_w_riscv32 +#define helper_vsub_vv_b helper_vsub_vv_b_riscv32 +#define helper_vsub_vv_d helper_vsub_vv_d_riscv32 +#define helper_vsub_vv_h helper_vsub_vv_h_riscv32 +#define helper_vsub_vv_w helper_vsub_vv_w_riscv32 +#define helper_vsub_vx_b helper_vsub_vx_b_riscv32 +#define helper_vsub_vx_d helper_vsub_vx_d_riscv32 +#define helper_vsub_vx_h helper_vsub_vx_h_riscv32 +#define helper_vsub_vx_w helper_vsub_vx_w_riscv32 +#define helper_vsw_v_d helper_vsw_v_d_riscv32 +#define helper_vsw_v_d_mask helper_vsw_v_d_mask_riscv32 +#define helper_vsw_v_w helper_vsw_v_w_riscv32 +#define helper_vsw_v_w_mask helper_vsw_v_w_mask_riscv32 +#define helper_vsxb_v_b helper_vsxb_v_b_riscv32 +#define helper_vsxb_v_d helper_vsxb_v_d_riscv32 +#define helper_vsxb_v_h helper_vsxb_v_h_riscv32 +#define helper_vsxb_v_w helper_vsxb_v_w_riscv32 +#define helper_vsxe_v_b helper_vsxe_v_b_riscv32 +#define helper_vsxe_v_d helper_vsxe_v_d_riscv32 +#define helper_vsxe_v_h helper_vsxe_v_h_riscv32 +#define helper_vsxe_v_w helper_vsxe_v_w_riscv32 +#define helper_vsxh_v_d helper_vsxh_v_d_riscv32 +#define helper_vsxh_v_h helper_vsxh_v_h_riscv32 +#define helper_vsxh_v_w helper_vsxh_v_w_riscv32 +#define helper_vsxw_v_d helper_vsxw_v_d_riscv32 +#define helper_vsxw_v_w helper_vsxw_v_w_riscv32 +#define helper_vwadd_vv_b helper_vwadd_vv_b_riscv32 +#define helper_vwadd_vv_h helper_vwadd_vv_h_riscv32 +#define helper_vwadd_vv_w helper_vwadd_vv_w_riscv32 +#define helper_vwadd_vx_b helper_vwadd_vx_b_riscv32 +#define helper_vwadd_vx_h helper_vwadd_vx_h_riscv32 +#define helper_vwadd_vx_w helper_vwadd_vx_w_riscv32 +#define helper_vwadd_wv_b helper_vwadd_wv_b_riscv32 +#define helper_vwadd_wv_h helper_vwadd_wv_h_riscv32 +#define helper_vwadd_wv_w helper_vwadd_wv_w_riscv32 +#define helper_vwadd_wx_b helper_vwadd_wx_b_riscv32 +#define helper_vwadd_wx_h helper_vwadd_wx_h_riscv32 +#define helper_vwadd_wx_w helper_vwadd_wx_w_riscv32 +#define helper_vwaddu_vv_b helper_vwaddu_vv_b_riscv32 +#define helper_vwaddu_vv_h helper_vwaddu_vv_h_riscv32 +#define helper_vwaddu_vv_w helper_vwaddu_vv_w_riscv32 +#define helper_vwaddu_vx_b helper_vwaddu_vx_b_riscv32 +#define helper_vwaddu_vx_h helper_vwaddu_vx_h_riscv32 +#define helper_vwaddu_vx_w helper_vwaddu_vx_w_riscv32 +#define helper_vwaddu_wv_b helper_vwaddu_wv_b_riscv32 +#define helper_vwaddu_wv_h helper_vwaddu_wv_h_riscv32 +#define helper_vwaddu_wv_w helper_vwaddu_wv_w_riscv32 +#define helper_vwaddu_wx_b helper_vwaddu_wx_b_riscv32 +#define helper_vwaddu_wx_h helper_vwaddu_wx_h_riscv32 +#define helper_vwaddu_wx_w helper_vwaddu_wx_w_riscv32 +#define helper_vwmacc_vv_b helper_vwmacc_vv_b_riscv32 +#define helper_vwmacc_vv_h helper_vwmacc_vv_h_riscv32 +#define helper_vwmacc_vv_w helper_vwmacc_vv_w_riscv32 +#define helper_vwmacc_vx_b helper_vwmacc_vx_b_riscv32 +#define helper_vwmacc_vx_h helper_vwmacc_vx_h_riscv32 +#define helper_vwmacc_vx_w helper_vwmacc_vx_w_riscv32 +#define helper_vwmaccsu_vv_b helper_vwmaccsu_vv_b_riscv32 +#define helper_vwmaccsu_vv_h helper_vwmaccsu_vv_h_riscv32 +#define helper_vwmaccsu_vv_w helper_vwmaccsu_vv_w_riscv32 +#define helper_vwmaccsu_vx_b helper_vwmaccsu_vx_b_riscv32 +#define helper_vwmaccsu_vx_h helper_vwmaccsu_vx_h_riscv32 +#define helper_vwmaccsu_vx_w helper_vwmaccsu_vx_w_riscv32 +#define helper_vwmaccu_vv_b helper_vwmaccu_vv_b_riscv32 +#define helper_vwmaccu_vv_h helper_vwmaccu_vv_h_riscv32 +#define helper_vwmaccu_vv_w helper_vwmaccu_vv_w_riscv32 +#define helper_vwmaccu_vx_b helper_vwmaccu_vx_b_riscv32 +#define helper_vwmaccu_vx_h helper_vwmaccu_vx_h_riscv32 +#define helper_vwmaccu_vx_w helper_vwmaccu_vx_w_riscv32 +#define helper_vwmaccus_vx_b helper_vwmaccus_vx_b_riscv32 +#define helper_vwmaccus_vx_h helper_vwmaccus_vx_h_riscv32 +#define helper_vwmaccus_vx_w helper_vwmaccus_vx_w_riscv32 +#define helper_vwmul_vv_b helper_vwmul_vv_b_riscv32 +#define helper_vwmul_vv_h helper_vwmul_vv_h_riscv32 +#define helper_vwmul_vv_w helper_vwmul_vv_w_riscv32 +#define helper_vwmul_vx_b helper_vwmul_vx_b_riscv32 +#define helper_vwmul_vx_h helper_vwmul_vx_h_riscv32 +#define helper_vwmul_vx_w helper_vwmul_vx_w_riscv32 +#define helper_vwmulsu_vv_b helper_vwmulsu_vv_b_riscv32 +#define helper_vwmulsu_vv_h helper_vwmulsu_vv_h_riscv32 +#define helper_vwmulsu_vv_w helper_vwmulsu_vv_w_riscv32 +#define helper_vwmulsu_vx_b helper_vwmulsu_vx_b_riscv32 +#define helper_vwmulsu_vx_h helper_vwmulsu_vx_h_riscv32 +#define helper_vwmulsu_vx_w helper_vwmulsu_vx_w_riscv32 +#define helper_vwmulu_vv_b helper_vwmulu_vv_b_riscv32 +#define helper_vwmulu_vv_h helper_vwmulu_vv_h_riscv32 +#define helper_vwmulu_vv_w helper_vwmulu_vv_w_riscv32 +#define helper_vwmulu_vx_b helper_vwmulu_vx_b_riscv32 +#define helper_vwmulu_vx_h helper_vwmulu_vx_h_riscv32 +#define helper_vwmulu_vx_w helper_vwmulu_vx_w_riscv32 +#define helper_vwredsum_vs_b helper_vwredsum_vs_b_riscv32 +#define helper_vwredsum_vs_h helper_vwredsum_vs_h_riscv32 +#define helper_vwredsum_vs_w helper_vwredsum_vs_w_riscv32 +#define helper_vwredsumu_vs_b helper_vwredsumu_vs_b_riscv32 +#define helper_vwredsumu_vs_h helper_vwredsumu_vs_h_riscv32 +#define helper_vwredsumu_vs_w helper_vwredsumu_vs_w_riscv32 +#define helper_vwsmacc_vv_b helper_vwsmacc_vv_b_riscv32 +#define helper_vwsmacc_vv_h helper_vwsmacc_vv_h_riscv32 +#define helper_vwsmacc_vv_w helper_vwsmacc_vv_w_riscv32 +#define helper_vwsmacc_vx_b helper_vwsmacc_vx_b_riscv32 +#define helper_vwsmacc_vx_h helper_vwsmacc_vx_h_riscv32 +#define helper_vwsmacc_vx_w helper_vwsmacc_vx_w_riscv32 +#define helper_vwsmaccsu_vv_b helper_vwsmaccsu_vv_b_riscv32 +#define helper_vwsmaccsu_vv_h helper_vwsmaccsu_vv_h_riscv32 +#define helper_vwsmaccsu_vv_w helper_vwsmaccsu_vv_w_riscv32 +#define helper_vwsmaccsu_vx_b helper_vwsmaccsu_vx_b_riscv32 +#define helper_vwsmaccsu_vx_h helper_vwsmaccsu_vx_h_riscv32 +#define helper_vwsmaccsu_vx_w helper_vwsmaccsu_vx_w_riscv32 +#define helper_vwsmaccu_vv_b helper_vwsmaccu_vv_b_riscv32 +#define helper_vwsmaccu_vv_h helper_vwsmaccu_vv_h_riscv32 +#define helper_vwsmaccu_vv_w helper_vwsmaccu_vv_w_riscv32 +#define helper_vwsmaccu_vx_b helper_vwsmaccu_vx_b_riscv32 +#define helper_vwsmaccu_vx_h helper_vwsmaccu_vx_h_riscv32 +#define helper_vwsmaccu_vx_w helper_vwsmaccu_vx_w_riscv32 +#define helper_vwsmaccus_vx_b helper_vwsmaccus_vx_b_riscv32 +#define helper_vwsmaccus_vx_h helper_vwsmaccus_vx_h_riscv32 +#define helper_vwsmaccus_vx_w helper_vwsmaccus_vx_w_riscv32 +#define helper_vwsub_vv_b helper_vwsub_vv_b_riscv32 +#define helper_vwsub_vv_h helper_vwsub_vv_h_riscv32 +#define helper_vwsub_vv_w helper_vwsub_vv_w_riscv32 +#define helper_vwsub_vx_b helper_vwsub_vx_b_riscv32 +#define helper_vwsub_vx_h helper_vwsub_vx_h_riscv32 +#define helper_vwsub_vx_w helper_vwsub_vx_w_riscv32 +#define helper_vwsub_wv_b helper_vwsub_wv_b_riscv32 +#define helper_vwsub_wv_h helper_vwsub_wv_h_riscv32 +#define helper_vwsub_wv_w helper_vwsub_wv_w_riscv32 +#define helper_vwsub_wx_b helper_vwsub_wx_b_riscv32 +#define helper_vwsub_wx_h helper_vwsub_wx_h_riscv32 +#define helper_vwsub_wx_w helper_vwsub_wx_w_riscv32 +#define helper_vwsubu_vv_b helper_vwsubu_vv_b_riscv32 +#define helper_vwsubu_vv_h helper_vwsubu_vv_h_riscv32 +#define helper_vwsubu_vv_w helper_vwsubu_vv_w_riscv32 +#define helper_vwsubu_vx_b helper_vwsubu_vx_b_riscv32 +#define helper_vwsubu_vx_h helper_vwsubu_vx_h_riscv32 +#define helper_vwsubu_vx_w helper_vwsubu_vx_w_riscv32 +#define helper_vwsubu_wv_b helper_vwsubu_wv_b_riscv32 +#define helper_vwsubu_wv_h helper_vwsubu_wv_h_riscv32 +#define helper_vwsubu_wv_w helper_vwsubu_wv_w_riscv32 +#define helper_vwsubu_wx_b helper_vwsubu_wx_b_riscv32 +#define helper_vwsubu_wx_h helper_vwsubu_wx_h_riscv32 +#define helper_vwsubu_wx_w helper_vwsubu_wx_w_riscv32 +#define helper_vxor_vv_b helper_vxor_vv_b_riscv32 +#define helper_vxor_vv_d helper_vxor_vv_d_riscv32 +#define helper_vxor_vv_h helper_vxor_vv_h_riscv32 +#define helper_vxor_vv_w helper_vxor_vv_w_riscv32 +#define helper_vxor_vx_b helper_vxor_vx_b_riscv32 +#define helper_vxor_vx_d helper_vxor_vx_d_riscv32 +#define helper_vxor_vx_h helper_vxor_vx_h_riscv32 +#define helper_vxor_vx_w helper_vxor_vx_w_riscv32 #endif diff --git a/qemu/riscv64.h b/qemu/riscv64.h index 1bb119334e..2f0870dc20 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_riscv64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_riscv64 #define tcg_gen_st_i64 tcg_gen_st_i64_riscv64 +#define tcg_gen_add_i64 tcg_gen_add_i64_riscv64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_riscv64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_riscv64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_riscv64 #define cpu_icount_to_ns cpu_icount_to_ns_riscv64 #define cpu_is_stopped cpu_is_stopped_riscv64 #define cpu_get_ticks cpu_get_ticks_riscv64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_riscv64 #define floatx80_mul floatx80_mul_riscv64 #define floatx80_div floatx80_div_riscv64 +#define floatx80_modrem floatx80_modrem_riscv64 +#define floatx80_mod floatx80_mod_riscv64 #define floatx80_rem floatx80_rem_riscv64 #define floatx80_sqrt floatx80_sqrt_riscv64 #define floatx80_eq floatx80_eq_riscv64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_riscv64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_riscv64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_riscv64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_riscv64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_riscv64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_riscv64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_riscv64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_riscv64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_riscv64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_riscv64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_riscv64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_riscv64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_riscv64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_riscv64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_riscv64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_riscv64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_riscv64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_riscv64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_riscv64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_riscv64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_riscv64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_riscv64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_riscv64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_riscv64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_riscv64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_riscv64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_riscv64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_riscv64 #define tcg_gen_shri_vec tcg_gen_shri_vec_riscv64 #define tcg_gen_sari_vec tcg_gen_sari_vec_riscv64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_riscv64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_riscv64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_riscv64 #define tcg_gen_add_vec tcg_gen_add_vec_riscv64 #define tcg_gen_sub_vec tcg_gen_sub_vec_riscv64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_riscv64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_riscv64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_riscv64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_riscv64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_riscv64 #define tcg_gen_shls_vec tcg_gen_shls_vec_riscv64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_riscv64 #define tcg_gen_sars_vec tcg_gen_sars_vec_riscv64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_riscv64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_riscv64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_riscv64 #define tb_htable_lookup tb_htable_lookup_riscv64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_riscv64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_riscv64 #define tlb_init tlb_init_riscv64 +#define tlb_destroy tlb_destroy_riscv64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_riscv64 #define tlb_flush tlb_flush_riscv64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_riscv64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_riscv64 #define get_page_addr_code_hostp get_page_addr_code_hostp_riscv64 #define get_page_addr_code get_page_addr_code_riscv64 +#define probe_access_flags probe_access_flags_riscv64 #define probe_access probe_access_riscv64 #define tlb_vaddr_to_host tlb_vaddr_to_host_riscv64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_riscv64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_riscv64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_riscv64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_riscv64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_riscv64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_riscv64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_riscv64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_riscv64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_riscv64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_riscv64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_riscv64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_riscv64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_riscv64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_riscv64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_riscv64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_riscv64 #define cpu_ldub_data_ra cpu_ldub_data_ra_riscv64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_riscv64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_riscv64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_riscv64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_riscv64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_riscv64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_riscv64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_riscv64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_riscv64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_riscv64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_riscv64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_riscv64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_riscv64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_riscv64 #define cpu_ldub_data cpu_ldub_data_riscv64 #define cpu_ldsb_data cpu_ldsb_data_riscv64 -#define cpu_lduw_data cpu_lduw_data_riscv64 -#define cpu_ldsw_data cpu_ldsw_data_riscv64 -#define cpu_ldl_data cpu_ldl_data_riscv64 -#define cpu_ldq_data cpu_ldq_data_riscv64 +#define cpu_lduw_be_data cpu_lduw_be_data_riscv64 +#define cpu_lduw_le_data cpu_lduw_le_data_riscv64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_riscv64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_riscv64 +#define cpu_ldl_be_data cpu_ldl_be_data_riscv64 +#define cpu_ldl_le_data cpu_ldl_le_data_riscv64 +#define cpu_ldq_le_data cpu_ldq_le_data_riscv64 +#define cpu_ldq_be_data cpu_ldq_be_data_riscv64 #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv64 #define helper_le_stw_mmu helper_le_stw_mmu_riscv64 #define helper_be_stw_mmu helper_be_stw_mmu_riscv64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_riscv64 #define helper_be_stq_mmu helper_be_stq_mmu_riscv64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_riscv64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_riscv64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_riscv64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_riscv64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_riscv64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_riscv64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_riscv64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_riscv64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_riscv64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_riscv64 #define cpu_stb_data_ra cpu_stb_data_ra_riscv64 -#define cpu_stw_data_ra cpu_stw_data_ra_riscv64 -#define cpu_stl_data_ra cpu_stl_data_ra_riscv64 -#define cpu_stq_data_ra cpu_stq_data_ra_riscv64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_riscv64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_riscv64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_riscv64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_riscv64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_riscv64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_riscv64 #define cpu_stb_data cpu_stb_data_riscv64 -#define cpu_stw_data cpu_stw_data_riscv64 -#define cpu_stl_data cpu_stl_data_riscv64 -#define cpu_stq_data cpu_stq_data_riscv64 +#define cpu_stw_be_data cpu_stw_be_data_riscv64 +#define cpu_stw_le_data cpu_stw_le_data_riscv64 +#define cpu_stl_be_data cpu_stl_be_data_riscv64 +#define cpu_stl_le_data cpu_stl_le_data_riscv64 +#define cpu_stq_be_data cpu_stq_be_data_riscv64 +#define cpu_stq_le_data cpu_stq_le_data_riscv64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_riscv64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_riscv64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_riscv64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_riscv64 #define cpu_ldl_code cpu_ldl_code_riscv64 #define cpu_ldq_code cpu_ldq_code_riscv64 +#define cpu_interrupt_handler cpu_interrupt_handler_riscv64 #define helper_div_i32 helper_div_i32_riscv64 #define helper_rem_i32 helper_rem_i32_riscv64 #define helper_divu_i32 helper_divu_i32_riscv64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_riscv64 #define helper_gvec_sar32i helper_gvec_sar32i_riscv64 #define helper_gvec_sar64i helper_gvec_sar64i_riscv64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_riscv64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_riscv64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_riscv64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_riscv64 #define helper_gvec_shl8v helper_gvec_shl8v_riscv64 #define helper_gvec_shl16v helper_gvec_shl16v_riscv64 #define helper_gvec_shl32v helper_gvec_shl32v_riscv64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_riscv64 #define helper_gvec_sar32v helper_gvec_sar32v_riscv64 #define helper_gvec_sar64v helper_gvec_sar64v_riscv64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_riscv64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_riscv64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_riscv64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_riscv64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_riscv64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv64 #define helper_gvec_eq8 helper_gvec_eq8_riscv64 #define helper_gvec_ne8 helper_gvec_ne8_riscv64 #define helper_gvec_lt8 helper_gvec_lt8_riscv64 @@ -1366,6 +1420,7 @@ #define helper_mret helper_mret_riscv64 #define helper_wfi helper_wfi_riscv64 #define helper_tlb_flush helper_tlb_flush_riscv64 +#define helper_hyp_tlb_flush helper_hyp_tlb_flush_riscv64 #define pmp_hart_has_privs pmp_hart_has_privs_riscv64 #define pmpcfg_csr_write pmpcfg_csr_write_riscv64 #define pmpcfg_csr_read pmpcfg_csr_read_riscv64 @@ -1386,4 +1441,1006 @@ #define gen_helper_tlb_flush gen_helper_tlb_flush_riscv64 #define riscv_fpr_regnames riscv_fpr_regnames_riscv64 #define riscv_int_regnames riscv_int_regnames_riscv64 +#define fclass_d fclass_d_riscv64 +#define fclass_h fclass_h_riscv64 +#define fclass_s fclass_s_riscv64 +#define helper_vaadd_vv_b helper_vaadd_vv_b_riscv64 +#define helper_vaadd_vv_d helper_vaadd_vv_d_riscv64 +#define helper_vaadd_vv_h helper_vaadd_vv_h_riscv64 +#define helper_vaadd_vv_w helper_vaadd_vv_w_riscv64 +#define helper_vaadd_vx_b helper_vaadd_vx_b_riscv64 +#define helper_vaadd_vx_d helper_vaadd_vx_d_riscv64 +#define helper_vaadd_vx_h helper_vaadd_vx_h_riscv64 +#define helper_vaadd_vx_w helper_vaadd_vx_w_riscv64 +#define helper_vadc_vvm_b helper_vadc_vvm_b_riscv64 +#define helper_vadc_vvm_d helper_vadc_vvm_d_riscv64 +#define helper_vadc_vvm_h helper_vadc_vvm_h_riscv64 +#define helper_vadc_vvm_w helper_vadc_vvm_w_riscv64 +#define helper_vadc_vxm_b helper_vadc_vxm_b_riscv64 +#define helper_vadc_vxm_d helper_vadc_vxm_d_riscv64 +#define helper_vadc_vxm_h helper_vadc_vxm_h_riscv64 +#define helper_vadc_vxm_w helper_vadc_vxm_w_riscv64 +#define helper_vadd_vv_b helper_vadd_vv_b_riscv64 +#define helper_vadd_vv_d helper_vadd_vv_d_riscv64 +#define helper_vadd_vv_h helper_vadd_vv_h_riscv64 +#define helper_vadd_vv_w helper_vadd_vv_w_riscv64 +#define helper_vadd_vx_b helper_vadd_vx_b_riscv64 +#define helper_vadd_vx_d helper_vadd_vx_d_riscv64 +#define helper_vadd_vx_h helper_vadd_vx_h_riscv64 +#define helper_vadd_vx_w helper_vadd_vx_w_riscv64 +#define helper_vamoaddw_v_w helper_vamoaddw_v_w_riscv64 +#define helper_vamoandw_v_w helper_vamoandw_v_w_riscv64 +#define helper_vamomaxuw_v_w helper_vamomaxuw_v_w_riscv64 +#define helper_vamomaxw_v_w helper_vamomaxw_v_w_riscv64 +#define helper_vamominuw_v_w helper_vamominuw_v_w_riscv64 +#define helper_vamominw_v_w helper_vamominw_v_w_riscv64 +#define helper_vamoorw_v_w helper_vamoorw_v_w_riscv64 +#define helper_vamoswapw_v_w helper_vamoswapw_v_w_riscv64 +#define helper_vamoxorw_v_w helper_vamoxorw_v_w_riscv64 +#define helper_vand_vv_b helper_vand_vv_b_riscv64 +#define helper_vand_vv_d helper_vand_vv_d_riscv64 +#define helper_vand_vv_h helper_vand_vv_h_riscv64 +#define helper_vand_vv_w helper_vand_vv_w_riscv64 +#define helper_vand_vx_b helper_vand_vx_b_riscv64 +#define helper_vand_vx_d helper_vand_vx_d_riscv64 +#define helper_vand_vx_h helper_vand_vx_h_riscv64 +#define helper_vand_vx_w helper_vand_vx_w_riscv64 +#define helper_vasub_vv_b helper_vasub_vv_b_riscv64 +#define helper_vasub_vv_d helper_vasub_vv_d_riscv64 +#define helper_vasub_vv_h helper_vasub_vv_h_riscv64 +#define helper_vasub_vv_w helper_vasub_vv_w_riscv64 +#define helper_vasub_vx_b helper_vasub_vx_b_riscv64 +#define helper_vasub_vx_d helper_vasub_vx_d_riscv64 +#define helper_vasub_vx_h helper_vasub_vx_h_riscv64 +#define helper_vasub_vx_w helper_vasub_vx_w_riscv64 +#define helper_vcompress_vm_b helper_vcompress_vm_b_riscv64 +#define helper_vcompress_vm_d helper_vcompress_vm_d_riscv64 +#define helper_vcompress_vm_h helper_vcompress_vm_h_riscv64 +#define helper_vcompress_vm_w helper_vcompress_vm_w_riscv64 +#define helper_vdiv_vv_b helper_vdiv_vv_b_riscv64 +#define helper_vdiv_vv_d helper_vdiv_vv_d_riscv64 +#define helper_vdiv_vv_h helper_vdiv_vv_h_riscv64 +#define helper_vdiv_vv_w helper_vdiv_vv_w_riscv64 +#define helper_vdiv_vx_b helper_vdiv_vx_b_riscv64 +#define helper_vdiv_vx_d helper_vdiv_vx_d_riscv64 +#define helper_vdiv_vx_h helper_vdiv_vx_h_riscv64 +#define helper_vdiv_vx_w helper_vdiv_vx_w_riscv64 +#define helper_vdivu_vv_b helper_vdivu_vv_b_riscv64 +#define helper_vdivu_vv_d helper_vdivu_vv_d_riscv64 +#define helper_vdivu_vv_h helper_vdivu_vv_h_riscv64 +#define helper_vdivu_vv_w helper_vdivu_vv_w_riscv64 +#define helper_vdivu_vx_b helper_vdivu_vx_b_riscv64 +#define helper_vdivu_vx_d helper_vdivu_vx_d_riscv64 +#define helper_vdivu_vx_h helper_vdivu_vx_h_riscv64 +#define helper_vdivu_vx_w helper_vdivu_vx_w_riscv64 +#define helper_vec_rsubs16 helper_vec_rsubs16_riscv64 +#define helper_vec_rsubs32 helper_vec_rsubs32_riscv64 +#define helper_vec_rsubs64 helper_vec_rsubs64_riscv64 +#define helper_vec_rsubs8 helper_vec_rsubs8_riscv64 +#define helper_vfadd_vf_d helper_vfadd_vf_d_riscv64 +#define helper_vfadd_vf_h helper_vfadd_vf_h_riscv64 +#define helper_vfadd_vf_w helper_vfadd_vf_w_riscv64 +#define helper_vfadd_vv_d helper_vfadd_vv_d_riscv64 +#define helper_vfadd_vv_h helper_vfadd_vv_h_riscv64 +#define helper_vfadd_vv_w helper_vfadd_vv_w_riscv64 +#define helper_vfclass_v_d helper_vfclass_v_d_riscv64 +#define helper_vfclass_v_h helper_vfclass_v_h_riscv64 +#define helper_vfclass_v_w helper_vfclass_v_w_riscv64 +#define helper_vfcvt_f_x_v_d helper_vfcvt_f_x_v_d_riscv64 +#define helper_vfcvt_f_x_v_h helper_vfcvt_f_x_v_h_riscv64 +#define helper_vfcvt_f_x_v_w helper_vfcvt_f_x_v_w_riscv64 +#define helper_vfcvt_f_xu_v_d helper_vfcvt_f_xu_v_d_riscv64 +#define helper_vfcvt_f_xu_v_h helper_vfcvt_f_xu_v_h_riscv64 +#define helper_vfcvt_f_xu_v_w helper_vfcvt_f_xu_v_w_riscv64 +#define helper_vfcvt_x_f_v_d helper_vfcvt_x_f_v_d_riscv64 +#define helper_vfcvt_x_f_v_h helper_vfcvt_x_f_v_h_riscv64 +#define helper_vfcvt_x_f_v_w helper_vfcvt_x_f_v_w_riscv64 +#define helper_vfcvt_xu_f_v_d helper_vfcvt_xu_f_v_d_riscv64 +#define helper_vfcvt_xu_f_v_h helper_vfcvt_xu_f_v_h_riscv64 +#define helper_vfcvt_xu_f_v_w helper_vfcvt_xu_f_v_w_riscv64 +#define helper_vfdiv_vf_d helper_vfdiv_vf_d_riscv64 +#define helper_vfdiv_vf_h helper_vfdiv_vf_h_riscv64 +#define helper_vfdiv_vf_w helper_vfdiv_vf_w_riscv64 +#define helper_vfdiv_vv_d helper_vfdiv_vv_d_riscv64 +#define helper_vfdiv_vv_h helper_vfdiv_vv_h_riscv64 +#define helper_vfdiv_vv_w helper_vfdiv_vv_w_riscv64 +#define helper_vfmacc_vf_d helper_vfmacc_vf_d_riscv64 +#define helper_vfmacc_vf_h helper_vfmacc_vf_h_riscv64 +#define helper_vfmacc_vf_w helper_vfmacc_vf_w_riscv64 +#define helper_vfmacc_vv_d helper_vfmacc_vv_d_riscv64 +#define helper_vfmacc_vv_h helper_vfmacc_vv_h_riscv64 +#define helper_vfmacc_vv_w helper_vfmacc_vv_w_riscv64 +#define helper_vfmadd_vf_d helper_vfmadd_vf_d_riscv64 +#define helper_vfmadd_vf_h helper_vfmadd_vf_h_riscv64 +#define helper_vfmadd_vf_w helper_vfmadd_vf_w_riscv64 +#define helper_vfmadd_vv_d helper_vfmadd_vv_d_riscv64 +#define helper_vfmadd_vv_h helper_vfmadd_vv_h_riscv64 +#define helper_vfmadd_vv_w helper_vfmadd_vv_w_riscv64 +#define helper_vfmax_vf_d helper_vfmax_vf_d_riscv64 +#define helper_vfmax_vf_h helper_vfmax_vf_h_riscv64 +#define helper_vfmax_vf_w helper_vfmax_vf_w_riscv64 +#define helper_vfmax_vv_d helper_vfmax_vv_d_riscv64 +#define helper_vfmax_vv_h helper_vfmax_vv_h_riscv64 +#define helper_vfmax_vv_w helper_vfmax_vv_w_riscv64 +#define helper_vfmerge_vfm_d helper_vfmerge_vfm_d_riscv64 +#define helper_vfmerge_vfm_h helper_vfmerge_vfm_h_riscv64 +#define helper_vfmerge_vfm_w helper_vfmerge_vfm_w_riscv64 +#define helper_vfmin_vf_d helper_vfmin_vf_d_riscv64 +#define helper_vfmin_vf_h helper_vfmin_vf_h_riscv64 +#define helper_vfmin_vf_w helper_vfmin_vf_w_riscv64 +#define helper_vfmin_vv_d helper_vfmin_vv_d_riscv64 +#define helper_vfmin_vv_h helper_vfmin_vv_h_riscv64 +#define helper_vfmin_vv_w helper_vfmin_vv_w_riscv64 +#define helper_vfmsac_vf_d helper_vfmsac_vf_d_riscv64 +#define helper_vfmsac_vf_h helper_vfmsac_vf_h_riscv64 +#define helper_vfmsac_vf_w helper_vfmsac_vf_w_riscv64 +#define helper_vfmsac_vv_d helper_vfmsac_vv_d_riscv64 +#define helper_vfmsac_vv_h helper_vfmsac_vv_h_riscv64 +#define helper_vfmsac_vv_w helper_vfmsac_vv_w_riscv64 +#define helper_vfmsub_vf_d helper_vfmsub_vf_d_riscv64 +#define helper_vfmsub_vf_h helper_vfmsub_vf_h_riscv64 +#define helper_vfmsub_vf_w helper_vfmsub_vf_w_riscv64 +#define helper_vfmsub_vv_d helper_vfmsub_vv_d_riscv64 +#define helper_vfmsub_vv_h helper_vfmsub_vv_h_riscv64 +#define helper_vfmsub_vv_w helper_vfmsub_vv_w_riscv64 +#define helper_vfmul_vf_d helper_vfmul_vf_d_riscv64 +#define helper_vfmul_vf_h helper_vfmul_vf_h_riscv64 +#define helper_vfmul_vf_w helper_vfmul_vf_w_riscv64 +#define helper_vfmul_vv_d helper_vfmul_vv_d_riscv64 +#define helper_vfmul_vv_h helper_vfmul_vv_h_riscv64 +#define helper_vfmul_vv_w helper_vfmul_vv_w_riscv64 +#define helper_vfncvt_f_f_v_h helper_vfncvt_f_f_v_h_riscv64 +#define helper_vfncvt_f_f_v_w helper_vfncvt_f_f_v_w_riscv64 +#define helper_vfncvt_f_x_v_h helper_vfncvt_f_x_v_h_riscv64 +#define helper_vfncvt_f_x_v_w helper_vfncvt_f_x_v_w_riscv64 +#define helper_vfncvt_f_xu_v_h helper_vfncvt_f_xu_v_h_riscv64 +#define helper_vfncvt_f_xu_v_w helper_vfncvt_f_xu_v_w_riscv64 +#define helper_vfncvt_x_f_v_h helper_vfncvt_x_f_v_h_riscv64 +#define helper_vfncvt_x_f_v_w helper_vfncvt_x_f_v_w_riscv64 +#define helper_vfncvt_xu_f_v_h helper_vfncvt_xu_f_v_h_riscv64 +#define helper_vfncvt_xu_f_v_w helper_vfncvt_xu_f_v_w_riscv64 +#define helper_vfnmacc_vf_d helper_vfnmacc_vf_d_riscv64 +#define helper_vfnmacc_vf_h helper_vfnmacc_vf_h_riscv64 +#define helper_vfnmacc_vf_w helper_vfnmacc_vf_w_riscv64 +#define helper_vfnmacc_vv_d helper_vfnmacc_vv_d_riscv64 +#define helper_vfnmacc_vv_h helper_vfnmacc_vv_h_riscv64 +#define helper_vfnmacc_vv_w helper_vfnmacc_vv_w_riscv64 +#define helper_vfnmadd_vf_d helper_vfnmadd_vf_d_riscv64 +#define helper_vfnmadd_vf_h helper_vfnmadd_vf_h_riscv64 +#define helper_vfnmadd_vf_w helper_vfnmadd_vf_w_riscv64 +#define helper_vfnmadd_vv_d helper_vfnmadd_vv_d_riscv64 +#define helper_vfnmadd_vv_h helper_vfnmadd_vv_h_riscv64 +#define helper_vfnmadd_vv_w helper_vfnmadd_vv_w_riscv64 +#define helper_vfnmsac_vf_d helper_vfnmsac_vf_d_riscv64 +#define helper_vfnmsac_vf_h helper_vfnmsac_vf_h_riscv64 +#define helper_vfnmsac_vf_w helper_vfnmsac_vf_w_riscv64 +#define helper_vfnmsac_vv_d helper_vfnmsac_vv_d_riscv64 +#define helper_vfnmsac_vv_h helper_vfnmsac_vv_h_riscv64 +#define helper_vfnmsac_vv_w helper_vfnmsac_vv_w_riscv64 +#define helper_vfnmsub_vf_d helper_vfnmsub_vf_d_riscv64 +#define helper_vfnmsub_vf_h helper_vfnmsub_vf_h_riscv64 +#define helper_vfnmsub_vf_w helper_vfnmsub_vf_w_riscv64 +#define helper_vfnmsub_vv_d helper_vfnmsub_vv_d_riscv64 +#define helper_vfnmsub_vv_h helper_vfnmsub_vv_h_riscv64 +#define helper_vfnmsub_vv_w helper_vfnmsub_vv_w_riscv64 +#define helper_vfrdiv_vf_d helper_vfrdiv_vf_d_riscv64 +#define helper_vfrdiv_vf_h helper_vfrdiv_vf_h_riscv64 +#define helper_vfrdiv_vf_w helper_vfrdiv_vf_w_riscv64 +#define helper_vfredmax_vs_d helper_vfredmax_vs_d_riscv64 +#define helper_vfredmax_vs_h helper_vfredmax_vs_h_riscv64 +#define helper_vfredmax_vs_w helper_vfredmax_vs_w_riscv64 +#define helper_vfredmin_vs_d helper_vfredmin_vs_d_riscv64 +#define helper_vfredmin_vs_h helper_vfredmin_vs_h_riscv64 +#define helper_vfredmin_vs_w helper_vfredmin_vs_w_riscv64 +#define helper_vfredsum_vs_d helper_vfredsum_vs_d_riscv64 +#define helper_vfredsum_vs_h helper_vfredsum_vs_h_riscv64 +#define helper_vfredsum_vs_w helper_vfredsum_vs_w_riscv64 +#define helper_vfrsub_vf_d helper_vfrsub_vf_d_riscv64 +#define helper_vfrsub_vf_h helper_vfrsub_vf_h_riscv64 +#define helper_vfrsub_vf_w helper_vfrsub_vf_w_riscv64 +#define helper_vfsgnj_vf_d helper_vfsgnj_vf_d_riscv64 +#define helper_vfsgnj_vf_h helper_vfsgnj_vf_h_riscv64 +#define helper_vfsgnj_vf_w helper_vfsgnj_vf_w_riscv64 +#define helper_vfsgnj_vv_d helper_vfsgnj_vv_d_riscv64 +#define helper_vfsgnj_vv_h helper_vfsgnj_vv_h_riscv64 +#define helper_vfsgnj_vv_w helper_vfsgnj_vv_w_riscv64 +#define helper_vfsgnjn_vf_d helper_vfsgnjn_vf_d_riscv64 +#define helper_vfsgnjn_vf_h helper_vfsgnjn_vf_h_riscv64 +#define helper_vfsgnjn_vf_w helper_vfsgnjn_vf_w_riscv64 +#define helper_vfsgnjn_vv_d helper_vfsgnjn_vv_d_riscv64 +#define helper_vfsgnjn_vv_h helper_vfsgnjn_vv_h_riscv64 +#define helper_vfsgnjn_vv_w helper_vfsgnjn_vv_w_riscv64 +#define helper_vfsgnjx_vf_d helper_vfsgnjx_vf_d_riscv64 +#define helper_vfsgnjx_vf_h helper_vfsgnjx_vf_h_riscv64 +#define helper_vfsgnjx_vf_w helper_vfsgnjx_vf_w_riscv64 +#define helper_vfsgnjx_vv_d helper_vfsgnjx_vv_d_riscv64 +#define helper_vfsgnjx_vv_h helper_vfsgnjx_vv_h_riscv64 +#define helper_vfsgnjx_vv_w helper_vfsgnjx_vv_w_riscv64 +#define helper_vfsqrt_v_d helper_vfsqrt_v_d_riscv64 +#define helper_vfsqrt_v_h helper_vfsqrt_v_h_riscv64 +#define helper_vfsqrt_v_w helper_vfsqrt_v_w_riscv64 +#define helper_vfsub_vf_d helper_vfsub_vf_d_riscv64 +#define helper_vfsub_vf_h helper_vfsub_vf_h_riscv64 +#define helper_vfsub_vf_w helper_vfsub_vf_w_riscv64 +#define helper_vfsub_vv_d helper_vfsub_vv_d_riscv64 +#define helper_vfsub_vv_h helper_vfsub_vv_h_riscv64 +#define helper_vfsub_vv_w helper_vfsub_vv_w_riscv64 +#define helper_vfwadd_vf_h helper_vfwadd_vf_h_riscv64 +#define helper_vfwadd_vf_w helper_vfwadd_vf_w_riscv64 +#define helper_vfwadd_vv_h helper_vfwadd_vv_h_riscv64 +#define helper_vfwadd_vv_w helper_vfwadd_vv_w_riscv64 +#define helper_vfwadd_wf_h helper_vfwadd_wf_h_riscv64 +#define helper_vfwadd_wf_w helper_vfwadd_wf_w_riscv64 +#define helper_vfwadd_wv_h helper_vfwadd_wv_h_riscv64 +#define helper_vfwadd_wv_w helper_vfwadd_wv_w_riscv64 +#define helper_vfwcvt_f_f_v_h helper_vfwcvt_f_f_v_h_riscv64 +#define helper_vfwcvt_f_f_v_w helper_vfwcvt_f_f_v_w_riscv64 +#define helper_vfwcvt_f_x_v_h helper_vfwcvt_f_x_v_h_riscv64 +#define helper_vfwcvt_f_x_v_w helper_vfwcvt_f_x_v_w_riscv64 +#define helper_vfwcvt_f_xu_v_h helper_vfwcvt_f_xu_v_h_riscv64 +#define helper_vfwcvt_f_xu_v_w helper_vfwcvt_f_xu_v_w_riscv64 +#define helper_vfwcvt_x_f_v_h helper_vfwcvt_x_f_v_h_riscv64 +#define helper_vfwcvt_x_f_v_w helper_vfwcvt_x_f_v_w_riscv64 +#define helper_vfwcvt_xu_f_v_h helper_vfwcvt_xu_f_v_h_riscv64 +#define helper_vfwcvt_xu_f_v_w helper_vfwcvt_xu_f_v_w_riscv64 +#define helper_vfwmacc_vf_h helper_vfwmacc_vf_h_riscv64 +#define helper_vfwmacc_vf_w helper_vfwmacc_vf_w_riscv64 +#define helper_vfwmacc_vv_h helper_vfwmacc_vv_h_riscv64 +#define helper_vfwmacc_vv_w helper_vfwmacc_vv_w_riscv64 +#define helper_vfwmsac_vf_h helper_vfwmsac_vf_h_riscv64 +#define helper_vfwmsac_vf_w helper_vfwmsac_vf_w_riscv64 +#define helper_vfwmsac_vv_h helper_vfwmsac_vv_h_riscv64 +#define helper_vfwmsac_vv_w helper_vfwmsac_vv_w_riscv64 +#define helper_vfwmul_vf_h helper_vfwmul_vf_h_riscv64 +#define helper_vfwmul_vf_w helper_vfwmul_vf_w_riscv64 +#define helper_vfwmul_vv_h helper_vfwmul_vv_h_riscv64 +#define helper_vfwmul_vv_w helper_vfwmul_vv_w_riscv64 +#define helper_vfwnmacc_vf_h helper_vfwnmacc_vf_h_riscv64 +#define helper_vfwnmacc_vf_w helper_vfwnmacc_vf_w_riscv64 +#define helper_vfwnmacc_vv_h helper_vfwnmacc_vv_h_riscv64 +#define helper_vfwnmacc_vv_w helper_vfwnmacc_vv_w_riscv64 +#define helper_vfwnmsac_vf_h helper_vfwnmsac_vf_h_riscv64 +#define helper_vfwnmsac_vf_w helper_vfwnmsac_vf_w_riscv64 +#define helper_vfwnmsac_vv_h helper_vfwnmsac_vv_h_riscv64 +#define helper_vfwnmsac_vv_w helper_vfwnmsac_vv_w_riscv64 +#define helper_vfwredsum_vs_h helper_vfwredsum_vs_h_riscv64 +#define helper_vfwredsum_vs_w helper_vfwredsum_vs_w_riscv64 +#define helper_vfwsub_vf_h helper_vfwsub_vf_h_riscv64 +#define helper_vfwsub_vf_w helper_vfwsub_vf_w_riscv64 +#define helper_vfwsub_vv_h helper_vfwsub_vv_h_riscv64 +#define helper_vfwsub_vv_w helper_vfwsub_vv_w_riscv64 +#define helper_vfwsub_wf_h helper_vfwsub_wf_h_riscv64 +#define helper_vfwsub_wf_w helper_vfwsub_wf_w_riscv64 +#define helper_vfwsub_wv_h helper_vfwsub_wv_h_riscv64 +#define helper_vfwsub_wv_w helper_vfwsub_wv_w_riscv64 +#define helper_vid_v_b helper_vid_v_b_riscv64 +#define helper_vid_v_d helper_vid_v_d_riscv64 +#define helper_vid_v_h helper_vid_v_h_riscv64 +#define helper_vid_v_w helper_vid_v_w_riscv64 +#define helper_viota_m_b helper_viota_m_b_riscv64 +#define helper_viota_m_d helper_viota_m_d_riscv64 +#define helper_viota_m_h helper_viota_m_h_riscv64 +#define helper_viota_m_w helper_viota_m_w_riscv64 +#define helper_vlb_v_b helper_vlb_v_b_riscv64 +#define helper_vlb_v_b_mask helper_vlb_v_b_mask_riscv64 +#define helper_vlb_v_d helper_vlb_v_d_riscv64 +#define helper_vlb_v_d_mask helper_vlb_v_d_mask_riscv64 +#define helper_vlb_v_h helper_vlb_v_h_riscv64 +#define helper_vlb_v_h_mask helper_vlb_v_h_mask_riscv64 +#define helper_vlb_v_w helper_vlb_v_w_riscv64 +#define helper_vlb_v_w_mask helper_vlb_v_w_mask_riscv64 +#define helper_vlbff_v_b helper_vlbff_v_b_riscv64 +#define helper_vlbff_v_d helper_vlbff_v_d_riscv64 +#define helper_vlbff_v_h helper_vlbff_v_h_riscv64 +#define helper_vlbff_v_w helper_vlbff_v_w_riscv64 +#define helper_vlbu_v_b helper_vlbu_v_b_riscv64 +#define helper_vlbu_v_b_mask helper_vlbu_v_b_mask_riscv64 +#define helper_vlbu_v_d helper_vlbu_v_d_riscv64 +#define helper_vlbu_v_d_mask helper_vlbu_v_d_mask_riscv64 +#define helper_vlbu_v_h helper_vlbu_v_h_riscv64 +#define helper_vlbu_v_h_mask helper_vlbu_v_h_mask_riscv64 +#define helper_vlbu_v_w helper_vlbu_v_w_riscv64 +#define helper_vlbu_v_w_mask helper_vlbu_v_w_mask_riscv64 +#define helper_vlbuff_v_b helper_vlbuff_v_b_riscv64 +#define helper_vlbuff_v_d helper_vlbuff_v_d_riscv64 +#define helper_vlbuff_v_h helper_vlbuff_v_h_riscv64 +#define helper_vlbuff_v_w helper_vlbuff_v_w_riscv64 +#define helper_vle_v_b helper_vle_v_b_riscv64 +#define helper_vle_v_b_mask helper_vle_v_b_mask_riscv64 +#define helper_vle_v_d helper_vle_v_d_riscv64 +#define helper_vle_v_d_mask helper_vle_v_d_mask_riscv64 +#define helper_vle_v_h helper_vle_v_h_riscv64 +#define helper_vle_v_h_mask helper_vle_v_h_mask_riscv64 +#define helper_vle_v_w helper_vle_v_w_riscv64 +#define helper_vle_v_w_mask helper_vle_v_w_mask_riscv64 +#define helper_vleff_v_b helper_vleff_v_b_riscv64 +#define helper_vleff_v_d helper_vleff_v_d_riscv64 +#define helper_vleff_v_h helper_vleff_v_h_riscv64 +#define helper_vleff_v_w helper_vleff_v_w_riscv64 +#define helper_vlh_v_d helper_vlh_v_d_riscv64 +#define helper_vlh_v_d_mask helper_vlh_v_d_mask_riscv64 +#define helper_vlh_v_h helper_vlh_v_h_riscv64 +#define helper_vlh_v_h_mask helper_vlh_v_h_mask_riscv64 +#define helper_vlh_v_w helper_vlh_v_w_riscv64 +#define helper_vlh_v_w_mask helper_vlh_v_w_mask_riscv64 +#define helper_vlhff_v_d helper_vlhff_v_d_riscv64 +#define helper_vlhff_v_h helper_vlhff_v_h_riscv64 +#define helper_vlhff_v_w helper_vlhff_v_w_riscv64 +#define helper_vlhu_v_d helper_vlhu_v_d_riscv64 +#define helper_vlhu_v_d_mask helper_vlhu_v_d_mask_riscv64 +#define helper_vlhu_v_h helper_vlhu_v_h_riscv64 +#define helper_vlhu_v_h_mask helper_vlhu_v_h_mask_riscv64 +#define helper_vlhu_v_w helper_vlhu_v_w_riscv64 +#define helper_vlhu_v_w_mask helper_vlhu_v_w_mask_riscv64 +#define helper_vlhuff_v_d helper_vlhuff_v_d_riscv64 +#define helper_vlhuff_v_h helper_vlhuff_v_h_riscv64 +#define helper_vlhuff_v_w helper_vlhuff_v_w_riscv64 +#define helper_vlsb_v_b helper_vlsb_v_b_riscv64 +#define helper_vlsb_v_d helper_vlsb_v_d_riscv64 +#define helper_vlsb_v_h helper_vlsb_v_h_riscv64 +#define helper_vlsb_v_w helper_vlsb_v_w_riscv64 +#define helper_vlsbu_v_b helper_vlsbu_v_b_riscv64 +#define helper_vlsbu_v_d helper_vlsbu_v_d_riscv64 +#define helper_vlsbu_v_h helper_vlsbu_v_h_riscv64 +#define helper_vlsbu_v_w helper_vlsbu_v_w_riscv64 +#define helper_vlse_v_b helper_vlse_v_b_riscv64 +#define helper_vlse_v_d helper_vlse_v_d_riscv64 +#define helper_vlse_v_h helper_vlse_v_h_riscv64 +#define helper_vlse_v_w helper_vlse_v_w_riscv64 +#define helper_vlsh_v_d helper_vlsh_v_d_riscv64 +#define helper_vlsh_v_h helper_vlsh_v_h_riscv64 +#define helper_vlsh_v_w helper_vlsh_v_w_riscv64 +#define helper_vlshu_v_d helper_vlshu_v_d_riscv64 +#define helper_vlshu_v_h helper_vlshu_v_h_riscv64 +#define helper_vlshu_v_w helper_vlshu_v_w_riscv64 +#define helper_vlsw_v_d helper_vlsw_v_d_riscv64 +#define helper_vlsw_v_w helper_vlsw_v_w_riscv64 +#define helper_vlswu_v_d helper_vlswu_v_d_riscv64 +#define helper_vlswu_v_w helper_vlswu_v_w_riscv64 +#define helper_vlw_v_d helper_vlw_v_d_riscv64 +#define helper_vlw_v_d_mask helper_vlw_v_d_mask_riscv64 +#define helper_vlw_v_w helper_vlw_v_w_riscv64 +#define helper_vlw_v_w_mask helper_vlw_v_w_mask_riscv64 +#define helper_vlwff_v_d helper_vlwff_v_d_riscv64 +#define helper_vlwff_v_w helper_vlwff_v_w_riscv64 +#define helper_vlwu_v_d helper_vlwu_v_d_riscv64 +#define helper_vlwu_v_d_mask helper_vlwu_v_d_mask_riscv64 +#define helper_vlwu_v_w helper_vlwu_v_w_riscv64 +#define helper_vlwu_v_w_mask helper_vlwu_v_w_mask_riscv64 +#define helper_vlwuff_v_d helper_vlwuff_v_d_riscv64 +#define helper_vlwuff_v_w helper_vlwuff_v_w_riscv64 +#define helper_vlxb_v_b helper_vlxb_v_b_riscv64 +#define helper_vlxb_v_d helper_vlxb_v_d_riscv64 +#define helper_vlxb_v_h helper_vlxb_v_h_riscv64 +#define helper_vlxb_v_w helper_vlxb_v_w_riscv64 +#define helper_vlxbu_v_b helper_vlxbu_v_b_riscv64 +#define helper_vlxbu_v_d helper_vlxbu_v_d_riscv64 +#define helper_vlxbu_v_h helper_vlxbu_v_h_riscv64 +#define helper_vlxbu_v_w helper_vlxbu_v_w_riscv64 +#define helper_vlxe_v_b helper_vlxe_v_b_riscv64 +#define helper_vlxe_v_d helper_vlxe_v_d_riscv64 +#define helper_vlxe_v_h helper_vlxe_v_h_riscv64 +#define helper_vlxe_v_w helper_vlxe_v_w_riscv64 +#define helper_vlxh_v_d helper_vlxh_v_d_riscv64 +#define helper_vlxh_v_h helper_vlxh_v_h_riscv64 +#define helper_vlxh_v_w helper_vlxh_v_w_riscv64 +#define helper_vlxhu_v_d helper_vlxhu_v_d_riscv64 +#define helper_vlxhu_v_h helper_vlxhu_v_h_riscv64 +#define helper_vlxhu_v_w helper_vlxhu_v_w_riscv64 +#define helper_vlxw_v_d helper_vlxw_v_d_riscv64 +#define helper_vlxw_v_w helper_vlxw_v_w_riscv64 +#define helper_vlxwu_v_d helper_vlxwu_v_d_riscv64 +#define helper_vlxwu_v_w helper_vlxwu_v_w_riscv64 +#define helper_vmacc_vv_b helper_vmacc_vv_b_riscv64 +#define helper_vmacc_vv_d helper_vmacc_vv_d_riscv64 +#define helper_vmacc_vv_h helper_vmacc_vv_h_riscv64 +#define helper_vmacc_vv_w helper_vmacc_vv_w_riscv64 +#define helper_vmacc_vx_b helper_vmacc_vx_b_riscv64 +#define helper_vmacc_vx_d helper_vmacc_vx_d_riscv64 +#define helper_vmacc_vx_h helper_vmacc_vx_h_riscv64 +#define helper_vmacc_vx_w helper_vmacc_vx_w_riscv64 +#define helper_vmadc_vvm_b helper_vmadc_vvm_b_riscv64 +#define helper_vmadc_vvm_d helper_vmadc_vvm_d_riscv64 +#define helper_vmadc_vvm_h helper_vmadc_vvm_h_riscv64 +#define helper_vmadc_vvm_w helper_vmadc_vvm_w_riscv64 +#define helper_vmadc_vxm_b helper_vmadc_vxm_b_riscv64 +#define helper_vmadc_vxm_d helper_vmadc_vxm_d_riscv64 +#define helper_vmadc_vxm_h helper_vmadc_vxm_h_riscv64 +#define helper_vmadc_vxm_w helper_vmadc_vxm_w_riscv64 +#define helper_vmadd_vv_b helper_vmadd_vv_b_riscv64 +#define helper_vmadd_vv_d helper_vmadd_vv_d_riscv64 +#define helper_vmadd_vv_h helper_vmadd_vv_h_riscv64 +#define helper_vmadd_vv_w helper_vmadd_vv_w_riscv64 +#define helper_vmadd_vx_b helper_vmadd_vx_b_riscv64 +#define helper_vmadd_vx_d helper_vmadd_vx_d_riscv64 +#define helper_vmadd_vx_h helper_vmadd_vx_h_riscv64 +#define helper_vmadd_vx_w helper_vmadd_vx_w_riscv64 +#define helper_vmand_mm helper_vmand_mm_riscv64 +#define helper_vmandnot_mm helper_vmandnot_mm_riscv64 +#define helper_vmax_vv_b helper_vmax_vv_b_riscv64 +#define helper_vmax_vv_d helper_vmax_vv_d_riscv64 +#define helper_vmax_vv_h helper_vmax_vv_h_riscv64 +#define helper_vmax_vv_w helper_vmax_vv_w_riscv64 +#define helper_vmax_vx_b helper_vmax_vx_b_riscv64 +#define helper_vmax_vx_d helper_vmax_vx_d_riscv64 +#define helper_vmax_vx_h helper_vmax_vx_h_riscv64 +#define helper_vmax_vx_w helper_vmax_vx_w_riscv64 +#define helper_vmaxu_vv_b helper_vmaxu_vv_b_riscv64 +#define helper_vmaxu_vv_d helper_vmaxu_vv_d_riscv64 +#define helper_vmaxu_vv_h helper_vmaxu_vv_h_riscv64 +#define helper_vmaxu_vv_w helper_vmaxu_vv_w_riscv64 +#define helper_vmaxu_vx_b helper_vmaxu_vx_b_riscv64 +#define helper_vmaxu_vx_d helper_vmaxu_vx_d_riscv64 +#define helper_vmaxu_vx_h helper_vmaxu_vx_h_riscv64 +#define helper_vmaxu_vx_w helper_vmaxu_vx_w_riscv64 +#define helper_vmerge_vvm_b helper_vmerge_vvm_b_riscv64 +#define helper_vmerge_vvm_d helper_vmerge_vvm_d_riscv64 +#define helper_vmerge_vvm_h helper_vmerge_vvm_h_riscv64 +#define helper_vmerge_vvm_w helper_vmerge_vvm_w_riscv64 +#define helper_vmerge_vxm_b helper_vmerge_vxm_b_riscv64 +#define helper_vmerge_vxm_d helper_vmerge_vxm_d_riscv64 +#define helper_vmerge_vxm_h helper_vmerge_vxm_h_riscv64 +#define helper_vmerge_vxm_w helper_vmerge_vxm_w_riscv64 +#define helper_vmfeq_vf_d helper_vmfeq_vf_d_riscv64 +#define helper_vmfeq_vf_h helper_vmfeq_vf_h_riscv64 +#define helper_vmfeq_vf_w helper_vmfeq_vf_w_riscv64 +#define helper_vmfeq_vv_d helper_vmfeq_vv_d_riscv64 +#define helper_vmfeq_vv_h helper_vmfeq_vv_h_riscv64 +#define helper_vmfeq_vv_w helper_vmfeq_vv_w_riscv64 +#define helper_vmfge_vf_d helper_vmfge_vf_d_riscv64 +#define helper_vmfge_vf_h helper_vmfge_vf_h_riscv64 +#define helper_vmfge_vf_w helper_vmfge_vf_w_riscv64 +#define helper_vmfgt_vf_d helper_vmfgt_vf_d_riscv64 +#define helper_vmfgt_vf_h helper_vmfgt_vf_h_riscv64 +#define helper_vmfgt_vf_w helper_vmfgt_vf_w_riscv64 +#define helper_vmfirst_m helper_vmfirst_m_riscv64 +#define helper_vmfle_vf_d helper_vmfle_vf_d_riscv64 +#define helper_vmfle_vf_h helper_vmfle_vf_h_riscv64 +#define helper_vmfle_vf_w helper_vmfle_vf_w_riscv64 +#define helper_vmfle_vv_d helper_vmfle_vv_d_riscv64 +#define helper_vmfle_vv_h helper_vmfle_vv_h_riscv64 +#define helper_vmfle_vv_w helper_vmfle_vv_w_riscv64 +#define helper_vmflt_vf_d helper_vmflt_vf_d_riscv64 +#define helper_vmflt_vf_h helper_vmflt_vf_h_riscv64 +#define helper_vmflt_vf_w helper_vmflt_vf_w_riscv64 +#define helper_vmflt_vv_d helper_vmflt_vv_d_riscv64 +#define helper_vmflt_vv_h helper_vmflt_vv_h_riscv64 +#define helper_vmflt_vv_w helper_vmflt_vv_w_riscv64 +#define helper_vmfne_vf_d helper_vmfne_vf_d_riscv64 +#define helper_vmfne_vf_h helper_vmfne_vf_h_riscv64 +#define helper_vmfne_vf_w helper_vmfne_vf_w_riscv64 +#define helper_vmfne_vv_d helper_vmfne_vv_d_riscv64 +#define helper_vmfne_vv_h helper_vmfne_vv_h_riscv64 +#define helper_vmfne_vv_w helper_vmfne_vv_w_riscv64 +#define helper_vmford_vf_d helper_vmford_vf_d_riscv64 +#define helper_vmford_vf_h helper_vmford_vf_h_riscv64 +#define helper_vmford_vf_w helper_vmford_vf_w_riscv64 +#define helper_vmford_vv_d helper_vmford_vv_d_riscv64 +#define helper_vmford_vv_h helper_vmford_vv_h_riscv64 +#define helper_vmford_vv_w helper_vmford_vv_w_riscv64 +#define helper_vmin_vv_b helper_vmin_vv_b_riscv64 +#define helper_vmin_vv_d helper_vmin_vv_d_riscv64 +#define helper_vmin_vv_h helper_vmin_vv_h_riscv64 +#define helper_vmin_vv_w helper_vmin_vv_w_riscv64 +#define helper_vmin_vx_b helper_vmin_vx_b_riscv64 +#define helper_vmin_vx_d helper_vmin_vx_d_riscv64 +#define helper_vmin_vx_h helper_vmin_vx_h_riscv64 +#define helper_vmin_vx_w helper_vmin_vx_w_riscv64 +#define helper_vminu_vv_b helper_vminu_vv_b_riscv64 +#define helper_vminu_vv_d helper_vminu_vv_d_riscv64 +#define helper_vminu_vv_h helper_vminu_vv_h_riscv64 +#define helper_vminu_vv_w helper_vminu_vv_w_riscv64 +#define helper_vminu_vx_b helper_vminu_vx_b_riscv64 +#define helper_vminu_vx_d helper_vminu_vx_d_riscv64 +#define helper_vminu_vx_h helper_vminu_vx_h_riscv64 +#define helper_vminu_vx_w helper_vminu_vx_w_riscv64 +#define helper_vmnand_mm helper_vmnand_mm_riscv64 +#define helper_vmnor_mm helper_vmnor_mm_riscv64 +#define helper_vmor_mm helper_vmor_mm_riscv64 +#define helper_vmornot_mm helper_vmornot_mm_riscv64 +#define helper_vmpopc_m helper_vmpopc_m_riscv64 +#define helper_vmsbc_vvm_b helper_vmsbc_vvm_b_riscv64 +#define helper_vmsbc_vvm_d helper_vmsbc_vvm_d_riscv64 +#define helper_vmsbc_vvm_h helper_vmsbc_vvm_h_riscv64 +#define helper_vmsbc_vvm_w helper_vmsbc_vvm_w_riscv64 +#define helper_vmsbc_vxm_b helper_vmsbc_vxm_b_riscv64 +#define helper_vmsbc_vxm_d helper_vmsbc_vxm_d_riscv64 +#define helper_vmsbc_vxm_h helper_vmsbc_vxm_h_riscv64 +#define helper_vmsbc_vxm_w helper_vmsbc_vxm_w_riscv64 +#define helper_vmsbf_m helper_vmsbf_m_riscv64 +#define helper_vmseq_vv_b helper_vmseq_vv_b_riscv64 +#define helper_vmseq_vv_d helper_vmseq_vv_d_riscv64 +#define helper_vmseq_vv_h helper_vmseq_vv_h_riscv64 +#define helper_vmseq_vv_w helper_vmseq_vv_w_riscv64 +#define helper_vmseq_vx_b helper_vmseq_vx_b_riscv64 +#define helper_vmseq_vx_d helper_vmseq_vx_d_riscv64 +#define helper_vmseq_vx_h helper_vmseq_vx_h_riscv64 +#define helper_vmseq_vx_w helper_vmseq_vx_w_riscv64 +#define helper_vmsgt_vx_b helper_vmsgt_vx_b_riscv64 +#define helper_vmsgt_vx_d helper_vmsgt_vx_d_riscv64 +#define helper_vmsgt_vx_h helper_vmsgt_vx_h_riscv64 +#define helper_vmsgt_vx_w helper_vmsgt_vx_w_riscv64 +#define helper_vmsgtu_vx_b helper_vmsgtu_vx_b_riscv64 +#define helper_vmsgtu_vx_d helper_vmsgtu_vx_d_riscv64 +#define helper_vmsgtu_vx_h helper_vmsgtu_vx_h_riscv64 +#define helper_vmsgtu_vx_w helper_vmsgtu_vx_w_riscv64 +#define helper_vmsif_m helper_vmsif_m_riscv64 +#define helper_vmsle_vv_b helper_vmsle_vv_b_riscv64 +#define helper_vmsle_vv_d helper_vmsle_vv_d_riscv64 +#define helper_vmsle_vv_h helper_vmsle_vv_h_riscv64 +#define helper_vmsle_vv_w helper_vmsle_vv_w_riscv64 +#define helper_vmsle_vx_b helper_vmsle_vx_b_riscv64 +#define helper_vmsle_vx_d helper_vmsle_vx_d_riscv64 +#define helper_vmsle_vx_h helper_vmsle_vx_h_riscv64 +#define helper_vmsle_vx_w helper_vmsle_vx_w_riscv64 +#define helper_vmsleu_vv_b helper_vmsleu_vv_b_riscv64 +#define helper_vmsleu_vv_d helper_vmsleu_vv_d_riscv64 +#define helper_vmsleu_vv_h helper_vmsleu_vv_h_riscv64 +#define helper_vmsleu_vv_w helper_vmsleu_vv_w_riscv64 +#define helper_vmsleu_vx_b helper_vmsleu_vx_b_riscv64 +#define helper_vmsleu_vx_d helper_vmsleu_vx_d_riscv64 +#define helper_vmsleu_vx_h helper_vmsleu_vx_h_riscv64 +#define helper_vmsleu_vx_w helper_vmsleu_vx_w_riscv64 +#define helper_vmslt_vv_b helper_vmslt_vv_b_riscv64 +#define helper_vmslt_vv_d helper_vmslt_vv_d_riscv64 +#define helper_vmslt_vv_h helper_vmslt_vv_h_riscv64 +#define helper_vmslt_vv_w helper_vmslt_vv_w_riscv64 +#define helper_vmslt_vx_b helper_vmslt_vx_b_riscv64 +#define helper_vmslt_vx_d helper_vmslt_vx_d_riscv64 +#define helper_vmslt_vx_h helper_vmslt_vx_h_riscv64 +#define helper_vmslt_vx_w helper_vmslt_vx_w_riscv64 +#define helper_vmsltu_vv_b helper_vmsltu_vv_b_riscv64 +#define helper_vmsltu_vv_d helper_vmsltu_vv_d_riscv64 +#define helper_vmsltu_vv_h helper_vmsltu_vv_h_riscv64 +#define helper_vmsltu_vv_w helper_vmsltu_vv_w_riscv64 +#define helper_vmsltu_vx_b helper_vmsltu_vx_b_riscv64 +#define helper_vmsltu_vx_d helper_vmsltu_vx_d_riscv64 +#define helper_vmsltu_vx_h helper_vmsltu_vx_h_riscv64 +#define helper_vmsltu_vx_w helper_vmsltu_vx_w_riscv64 +#define helper_vmsne_vv_b helper_vmsne_vv_b_riscv64 +#define helper_vmsne_vv_d helper_vmsne_vv_d_riscv64 +#define helper_vmsne_vv_h helper_vmsne_vv_h_riscv64 +#define helper_vmsne_vv_w helper_vmsne_vv_w_riscv64 +#define helper_vmsne_vx_b helper_vmsne_vx_b_riscv64 +#define helper_vmsne_vx_d helper_vmsne_vx_d_riscv64 +#define helper_vmsne_vx_h helper_vmsne_vx_h_riscv64 +#define helper_vmsne_vx_w helper_vmsne_vx_w_riscv64 +#define helper_vmsof_m helper_vmsof_m_riscv64 +#define helper_vmul_vv_b helper_vmul_vv_b_riscv64 +#define helper_vmul_vv_d helper_vmul_vv_d_riscv64 +#define helper_vmul_vv_h helper_vmul_vv_h_riscv64 +#define helper_vmul_vv_w helper_vmul_vv_w_riscv64 +#define helper_vmul_vx_b helper_vmul_vx_b_riscv64 +#define helper_vmul_vx_d helper_vmul_vx_d_riscv64 +#define helper_vmul_vx_h helper_vmul_vx_h_riscv64 +#define helper_vmul_vx_w helper_vmul_vx_w_riscv64 +#define helper_vmulh_vv_b helper_vmulh_vv_b_riscv64 +#define helper_vmulh_vv_d helper_vmulh_vv_d_riscv64 +#define helper_vmulh_vv_h helper_vmulh_vv_h_riscv64 +#define helper_vmulh_vv_w helper_vmulh_vv_w_riscv64 +#define helper_vmulh_vx_b helper_vmulh_vx_b_riscv64 +#define helper_vmulh_vx_d helper_vmulh_vx_d_riscv64 +#define helper_vmulh_vx_h helper_vmulh_vx_h_riscv64 +#define helper_vmulh_vx_w helper_vmulh_vx_w_riscv64 +#define helper_vmulhsu_vv_b helper_vmulhsu_vv_b_riscv64 +#define helper_vmulhsu_vv_d helper_vmulhsu_vv_d_riscv64 +#define helper_vmulhsu_vv_h helper_vmulhsu_vv_h_riscv64 +#define helper_vmulhsu_vv_w helper_vmulhsu_vv_w_riscv64 +#define helper_vmulhsu_vx_b helper_vmulhsu_vx_b_riscv64 +#define helper_vmulhsu_vx_d helper_vmulhsu_vx_d_riscv64 +#define helper_vmulhsu_vx_h helper_vmulhsu_vx_h_riscv64 +#define helper_vmulhsu_vx_w helper_vmulhsu_vx_w_riscv64 +#define helper_vmulhu_vv_b helper_vmulhu_vv_b_riscv64 +#define helper_vmulhu_vv_d helper_vmulhu_vv_d_riscv64 +#define helper_vmulhu_vv_h helper_vmulhu_vv_h_riscv64 +#define helper_vmulhu_vv_w helper_vmulhu_vv_w_riscv64 +#define helper_vmulhu_vx_b helper_vmulhu_vx_b_riscv64 +#define helper_vmulhu_vx_d helper_vmulhu_vx_d_riscv64 +#define helper_vmulhu_vx_h helper_vmulhu_vx_h_riscv64 +#define helper_vmulhu_vx_w helper_vmulhu_vx_w_riscv64 +#define helper_vmv_v_v_b helper_vmv_v_v_b_riscv64 +#define helper_vmv_v_v_d helper_vmv_v_v_d_riscv64 +#define helper_vmv_v_v_h helper_vmv_v_v_h_riscv64 +#define helper_vmv_v_v_w helper_vmv_v_v_w_riscv64 +#define helper_vmv_v_x_b helper_vmv_v_x_b_riscv64 +#define helper_vmv_v_x_d helper_vmv_v_x_d_riscv64 +#define helper_vmv_v_x_h helper_vmv_v_x_h_riscv64 +#define helper_vmv_v_x_w helper_vmv_v_x_w_riscv64 +#define helper_vmxnor_mm helper_vmxnor_mm_riscv64 +#define helper_vmxor_mm helper_vmxor_mm_riscv64 +#define helper_vnclip_vv_b helper_vnclip_vv_b_riscv64 +#define helper_vnclip_vv_h helper_vnclip_vv_h_riscv64 +#define helper_vnclip_vv_w helper_vnclip_vv_w_riscv64 +#define helper_vnclip_vx_b helper_vnclip_vx_b_riscv64 +#define helper_vnclip_vx_h helper_vnclip_vx_h_riscv64 +#define helper_vnclip_vx_w helper_vnclip_vx_w_riscv64 +#define helper_vnclipu_vv_b helper_vnclipu_vv_b_riscv64 +#define helper_vnclipu_vv_h helper_vnclipu_vv_h_riscv64 +#define helper_vnclipu_vv_w helper_vnclipu_vv_w_riscv64 +#define helper_vnclipu_vx_b helper_vnclipu_vx_b_riscv64 +#define helper_vnclipu_vx_h helper_vnclipu_vx_h_riscv64 +#define helper_vnclipu_vx_w helper_vnclipu_vx_w_riscv64 +#define helper_vnmsac_vv_b helper_vnmsac_vv_b_riscv64 +#define helper_vnmsac_vv_d helper_vnmsac_vv_d_riscv64 +#define helper_vnmsac_vv_h helper_vnmsac_vv_h_riscv64 +#define helper_vnmsac_vv_w helper_vnmsac_vv_w_riscv64 +#define helper_vnmsac_vx_b helper_vnmsac_vx_b_riscv64 +#define helper_vnmsac_vx_d helper_vnmsac_vx_d_riscv64 +#define helper_vnmsac_vx_h helper_vnmsac_vx_h_riscv64 +#define helper_vnmsac_vx_w helper_vnmsac_vx_w_riscv64 +#define helper_vnmsub_vv_b helper_vnmsub_vv_b_riscv64 +#define helper_vnmsub_vv_d helper_vnmsub_vv_d_riscv64 +#define helper_vnmsub_vv_h helper_vnmsub_vv_h_riscv64 +#define helper_vnmsub_vv_w helper_vnmsub_vv_w_riscv64 +#define helper_vnmsub_vx_b helper_vnmsub_vx_b_riscv64 +#define helper_vnmsub_vx_d helper_vnmsub_vx_d_riscv64 +#define helper_vnmsub_vx_h helper_vnmsub_vx_h_riscv64 +#define helper_vnmsub_vx_w helper_vnmsub_vx_w_riscv64 +#define helper_vnsra_vv_b helper_vnsra_vv_b_riscv64 +#define helper_vnsra_vv_h helper_vnsra_vv_h_riscv64 +#define helper_vnsra_vv_w helper_vnsra_vv_w_riscv64 +#define helper_vnsra_vx_b helper_vnsra_vx_b_riscv64 +#define helper_vnsra_vx_h helper_vnsra_vx_h_riscv64 +#define helper_vnsra_vx_w helper_vnsra_vx_w_riscv64 +#define helper_vnsrl_vv_b helper_vnsrl_vv_b_riscv64 +#define helper_vnsrl_vv_h helper_vnsrl_vv_h_riscv64 +#define helper_vnsrl_vv_w helper_vnsrl_vv_w_riscv64 +#define helper_vnsrl_vx_b helper_vnsrl_vx_b_riscv64 +#define helper_vnsrl_vx_h helper_vnsrl_vx_h_riscv64 +#define helper_vnsrl_vx_w helper_vnsrl_vx_w_riscv64 +#define helper_vor_vv_b helper_vor_vv_b_riscv64 +#define helper_vor_vv_d helper_vor_vv_d_riscv64 +#define helper_vor_vv_h helper_vor_vv_h_riscv64 +#define helper_vor_vv_w helper_vor_vv_w_riscv64 +#define helper_vor_vx_b helper_vor_vx_b_riscv64 +#define helper_vor_vx_d helper_vor_vx_d_riscv64 +#define helper_vor_vx_h helper_vor_vx_h_riscv64 +#define helper_vor_vx_w helper_vor_vx_w_riscv64 +#define helper_vredand_vs_b helper_vredand_vs_b_riscv64 +#define helper_vredand_vs_d helper_vredand_vs_d_riscv64 +#define helper_vredand_vs_h helper_vredand_vs_h_riscv64 +#define helper_vredand_vs_w helper_vredand_vs_w_riscv64 +#define helper_vredmax_vs_b helper_vredmax_vs_b_riscv64 +#define helper_vredmax_vs_d helper_vredmax_vs_d_riscv64 +#define helper_vredmax_vs_h helper_vredmax_vs_h_riscv64 +#define helper_vredmax_vs_w helper_vredmax_vs_w_riscv64 +#define helper_vredmaxu_vs_b helper_vredmaxu_vs_b_riscv64 +#define helper_vredmaxu_vs_d helper_vredmaxu_vs_d_riscv64 +#define helper_vredmaxu_vs_h helper_vredmaxu_vs_h_riscv64 +#define helper_vredmaxu_vs_w helper_vredmaxu_vs_w_riscv64 +#define helper_vredmin_vs_b helper_vredmin_vs_b_riscv64 +#define helper_vredmin_vs_d helper_vredmin_vs_d_riscv64 +#define helper_vredmin_vs_h helper_vredmin_vs_h_riscv64 +#define helper_vredmin_vs_w helper_vredmin_vs_w_riscv64 +#define helper_vredminu_vs_b helper_vredminu_vs_b_riscv64 +#define helper_vredminu_vs_d helper_vredminu_vs_d_riscv64 +#define helper_vredminu_vs_h helper_vredminu_vs_h_riscv64 +#define helper_vredminu_vs_w helper_vredminu_vs_w_riscv64 +#define helper_vredor_vs_b helper_vredor_vs_b_riscv64 +#define helper_vredor_vs_d helper_vredor_vs_d_riscv64 +#define helper_vredor_vs_h helper_vredor_vs_h_riscv64 +#define helper_vredor_vs_w helper_vredor_vs_w_riscv64 +#define helper_vredsum_vs_b helper_vredsum_vs_b_riscv64 +#define helper_vredsum_vs_d helper_vredsum_vs_d_riscv64 +#define helper_vredsum_vs_h helper_vredsum_vs_h_riscv64 +#define helper_vredsum_vs_w helper_vredsum_vs_w_riscv64 +#define helper_vredxor_vs_b helper_vredxor_vs_b_riscv64 +#define helper_vredxor_vs_d helper_vredxor_vs_d_riscv64 +#define helper_vredxor_vs_h helper_vredxor_vs_h_riscv64 +#define helper_vredxor_vs_w helper_vredxor_vs_w_riscv64 +#define helper_vrem_vv_b helper_vrem_vv_b_riscv64 +#define helper_vrem_vv_d helper_vrem_vv_d_riscv64 +#define helper_vrem_vv_h helper_vrem_vv_h_riscv64 +#define helper_vrem_vv_w helper_vrem_vv_w_riscv64 +#define helper_vrem_vx_b helper_vrem_vx_b_riscv64 +#define helper_vrem_vx_d helper_vrem_vx_d_riscv64 +#define helper_vrem_vx_h helper_vrem_vx_h_riscv64 +#define helper_vrem_vx_w helper_vrem_vx_w_riscv64 +#define helper_vremu_vv_b helper_vremu_vv_b_riscv64 +#define helper_vremu_vv_d helper_vremu_vv_d_riscv64 +#define helper_vremu_vv_h helper_vremu_vv_h_riscv64 +#define helper_vremu_vv_w helper_vremu_vv_w_riscv64 +#define helper_vremu_vx_b helper_vremu_vx_b_riscv64 +#define helper_vremu_vx_d helper_vremu_vx_d_riscv64 +#define helper_vremu_vx_h helper_vremu_vx_h_riscv64 +#define helper_vremu_vx_w helper_vremu_vx_w_riscv64 +#define helper_vrgather_vv_b helper_vrgather_vv_b_riscv64 +#define helper_vrgather_vv_d helper_vrgather_vv_d_riscv64 +#define helper_vrgather_vv_h helper_vrgather_vv_h_riscv64 +#define helper_vrgather_vv_w helper_vrgather_vv_w_riscv64 +#define helper_vrgather_vx_b helper_vrgather_vx_b_riscv64 +#define helper_vrgather_vx_d helper_vrgather_vx_d_riscv64 +#define helper_vrgather_vx_h helper_vrgather_vx_h_riscv64 +#define helper_vrgather_vx_w helper_vrgather_vx_w_riscv64 +#define helper_vrsub_vx_b helper_vrsub_vx_b_riscv64 +#define helper_vrsub_vx_d helper_vrsub_vx_d_riscv64 +#define helper_vrsub_vx_h helper_vrsub_vx_h_riscv64 +#define helper_vrsub_vx_w helper_vrsub_vx_w_riscv64 +#define helper_vsadd_vv_b helper_vsadd_vv_b_riscv64 +#define helper_vsadd_vv_d helper_vsadd_vv_d_riscv64 +#define helper_vsadd_vv_h helper_vsadd_vv_h_riscv64 +#define helper_vsadd_vv_w helper_vsadd_vv_w_riscv64 +#define helper_vsadd_vx_b helper_vsadd_vx_b_riscv64 +#define helper_vsadd_vx_d helper_vsadd_vx_d_riscv64 +#define helper_vsadd_vx_h helper_vsadd_vx_h_riscv64 +#define helper_vsadd_vx_w helper_vsadd_vx_w_riscv64 +#define helper_vsaddu_vv_b helper_vsaddu_vv_b_riscv64 +#define helper_vsaddu_vv_d helper_vsaddu_vv_d_riscv64 +#define helper_vsaddu_vv_h helper_vsaddu_vv_h_riscv64 +#define helper_vsaddu_vv_w helper_vsaddu_vv_w_riscv64 +#define helper_vsaddu_vx_b helper_vsaddu_vx_b_riscv64 +#define helper_vsaddu_vx_d helper_vsaddu_vx_d_riscv64 +#define helper_vsaddu_vx_h helper_vsaddu_vx_h_riscv64 +#define helper_vsaddu_vx_w helper_vsaddu_vx_w_riscv64 +#define helper_vsb_v_b helper_vsb_v_b_riscv64 +#define helper_vsb_v_b_mask helper_vsb_v_b_mask_riscv64 +#define helper_vsb_v_d helper_vsb_v_d_riscv64 +#define helper_vsb_v_d_mask helper_vsb_v_d_mask_riscv64 +#define helper_vsb_v_h helper_vsb_v_h_riscv64 +#define helper_vsb_v_h_mask helper_vsb_v_h_mask_riscv64 +#define helper_vsb_v_w helper_vsb_v_w_riscv64 +#define helper_vsb_v_w_mask helper_vsb_v_w_mask_riscv64 +#define helper_vsbc_vvm_b helper_vsbc_vvm_b_riscv64 +#define helper_vsbc_vvm_d helper_vsbc_vvm_d_riscv64 +#define helper_vsbc_vvm_h helper_vsbc_vvm_h_riscv64 +#define helper_vsbc_vvm_w helper_vsbc_vvm_w_riscv64 +#define helper_vsbc_vxm_b helper_vsbc_vxm_b_riscv64 +#define helper_vsbc_vxm_d helper_vsbc_vxm_d_riscv64 +#define helper_vsbc_vxm_h helper_vsbc_vxm_h_riscv64 +#define helper_vsbc_vxm_w helper_vsbc_vxm_w_riscv64 +#define helper_vse_v_b helper_vse_v_b_riscv64 +#define helper_vse_v_b_mask helper_vse_v_b_mask_riscv64 +#define helper_vse_v_d helper_vse_v_d_riscv64 +#define helper_vse_v_d_mask helper_vse_v_d_mask_riscv64 +#define helper_vse_v_h helper_vse_v_h_riscv64 +#define helper_vse_v_h_mask helper_vse_v_h_mask_riscv64 +#define helper_vse_v_w helper_vse_v_w_riscv64 +#define helper_vse_v_w_mask helper_vse_v_w_mask_riscv64 +#define helper_vsetvl helper_vsetvl_riscv64 +#define helper_vsh_v_d helper_vsh_v_d_riscv64 +#define helper_vsh_v_d_mask helper_vsh_v_d_mask_riscv64 +#define helper_vsh_v_h helper_vsh_v_h_riscv64 +#define helper_vsh_v_h_mask helper_vsh_v_h_mask_riscv64 +#define helper_vsh_v_w helper_vsh_v_w_riscv64 +#define helper_vsh_v_w_mask helper_vsh_v_w_mask_riscv64 +#define helper_vslide1down_vx_b helper_vslide1down_vx_b_riscv64 +#define helper_vslide1down_vx_d helper_vslide1down_vx_d_riscv64 +#define helper_vslide1down_vx_h helper_vslide1down_vx_h_riscv64 +#define helper_vslide1down_vx_w helper_vslide1down_vx_w_riscv64 +#define helper_vslide1up_vx_b helper_vslide1up_vx_b_riscv64 +#define helper_vslide1up_vx_d helper_vslide1up_vx_d_riscv64 +#define helper_vslide1up_vx_h helper_vslide1up_vx_h_riscv64 +#define helper_vslide1up_vx_w helper_vslide1up_vx_w_riscv64 +#define helper_vslidedown_vx_b helper_vslidedown_vx_b_riscv64 +#define helper_vslidedown_vx_d helper_vslidedown_vx_d_riscv64 +#define helper_vslidedown_vx_h helper_vslidedown_vx_h_riscv64 +#define helper_vslidedown_vx_w helper_vslidedown_vx_w_riscv64 +#define helper_vslideup_vx_b helper_vslideup_vx_b_riscv64 +#define helper_vslideup_vx_d helper_vslideup_vx_d_riscv64 +#define helper_vslideup_vx_h helper_vslideup_vx_h_riscv64 +#define helper_vslideup_vx_w helper_vslideup_vx_w_riscv64 +#define helper_vsll_vv_b helper_vsll_vv_b_riscv64 +#define helper_vsll_vv_d helper_vsll_vv_d_riscv64 +#define helper_vsll_vv_h helper_vsll_vv_h_riscv64 +#define helper_vsll_vv_w helper_vsll_vv_w_riscv64 +#define helper_vsll_vx_b helper_vsll_vx_b_riscv64 +#define helper_vsll_vx_d helper_vsll_vx_d_riscv64 +#define helper_vsll_vx_h helper_vsll_vx_h_riscv64 +#define helper_vsll_vx_w helper_vsll_vx_w_riscv64 +#define helper_vsmul_vv_b helper_vsmul_vv_b_riscv64 +#define helper_vsmul_vv_d helper_vsmul_vv_d_riscv64 +#define helper_vsmul_vv_h helper_vsmul_vv_h_riscv64 +#define helper_vsmul_vv_w helper_vsmul_vv_w_riscv64 +#define helper_vsmul_vx_b helper_vsmul_vx_b_riscv64 +#define helper_vsmul_vx_d helper_vsmul_vx_d_riscv64 +#define helper_vsmul_vx_h helper_vsmul_vx_h_riscv64 +#define helper_vsmul_vx_w helper_vsmul_vx_w_riscv64 +#define helper_vsra_vv_b helper_vsra_vv_b_riscv64 +#define helper_vsra_vv_d helper_vsra_vv_d_riscv64 +#define helper_vsra_vv_h helper_vsra_vv_h_riscv64 +#define helper_vsra_vv_w helper_vsra_vv_w_riscv64 +#define helper_vsra_vx_b helper_vsra_vx_b_riscv64 +#define helper_vsra_vx_d helper_vsra_vx_d_riscv64 +#define helper_vsra_vx_h helper_vsra_vx_h_riscv64 +#define helper_vsra_vx_w helper_vsra_vx_w_riscv64 +#define helper_vsrl_vv_b helper_vsrl_vv_b_riscv64 +#define helper_vsrl_vv_d helper_vsrl_vv_d_riscv64 +#define helper_vsrl_vv_h helper_vsrl_vv_h_riscv64 +#define helper_vsrl_vv_w helper_vsrl_vv_w_riscv64 +#define helper_vsrl_vx_b helper_vsrl_vx_b_riscv64 +#define helper_vsrl_vx_d helper_vsrl_vx_d_riscv64 +#define helper_vsrl_vx_h helper_vsrl_vx_h_riscv64 +#define helper_vsrl_vx_w helper_vsrl_vx_w_riscv64 +#define helper_vssb_v_b helper_vssb_v_b_riscv64 +#define helper_vssb_v_d helper_vssb_v_d_riscv64 +#define helper_vssb_v_h helper_vssb_v_h_riscv64 +#define helper_vssb_v_w helper_vssb_v_w_riscv64 +#define helper_vsse_v_b helper_vsse_v_b_riscv64 +#define helper_vsse_v_d helper_vsse_v_d_riscv64 +#define helper_vsse_v_h helper_vsse_v_h_riscv64 +#define helper_vsse_v_w helper_vsse_v_w_riscv64 +#define helper_vssh_v_d helper_vssh_v_d_riscv64 +#define helper_vssh_v_h helper_vssh_v_h_riscv64 +#define helper_vssh_v_w helper_vssh_v_w_riscv64 +#define helper_vssra_vv_b helper_vssra_vv_b_riscv64 +#define helper_vssra_vv_d helper_vssra_vv_d_riscv64 +#define helper_vssra_vv_h helper_vssra_vv_h_riscv64 +#define helper_vssra_vv_w helper_vssra_vv_w_riscv64 +#define helper_vssra_vx_b helper_vssra_vx_b_riscv64 +#define helper_vssra_vx_d helper_vssra_vx_d_riscv64 +#define helper_vssra_vx_h helper_vssra_vx_h_riscv64 +#define helper_vssra_vx_w helper_vssra_vx_w_riscv64 +#define helper_vssrl_vv_b helper_vssrl_vv_b_riscv64 +#define helper_vssrl_vv_d helper_vssrl_vv_d_riscv64 +#define helper_vssrl_vv_h helper_vssrl_vv_h_riscv64 +#define helper_vssrl_vv_w helper_vssrl_vv_w_riscv64 +#define helper_vssrl_vx_b helper_vssrl_vx_b_riscv64 +#define helper_vssrl_vx_d helper_vssrl_vx_d_riscv64 +#define helper_vssrl_vx_h helper_vssrl_vx_h_riscv64 +#define helper_vssrl_vx_w helper_vssrl_vx_w_riscv64 +#define helper_vssub_vv_b helper_vssub_vv_b_riscv64 +#define helper_vssub_vv_d helper_vssub_vv_d_riscv64 +#define helper_vssub_vv_h helper_vssub_vv_h_riscv64 +#define helper_vssub_vv_w helper_vssub_vv_w_riscv64 +#define helper_vssub_vx_b helper_vssub_vx_b_riscv64 +#define helper_vssub_vx_d helper_vssub_vx_d_riscv64 +#define helper_vssub_vx_h helper_vssub_vx_h_riscv64 +#define helper_vssub_vx_w helper_vssub_vx_w_riscv64 +#define helper_vssubu_vv_b helper_vssubu_vv_b_riscv64 +#define helper_vssubu_vv_d helper_vssubu_vv_d_riscv64 +#define helper_vssubu_vv_h helper_vssubu_vv_h_riscv64 +#define helper_vssubu_vv_w helper_vssubu_vv_w_riscv64 +#define helper_vssubu_vx_b helper_vssubu_vx_b_riscv64 +#define helper_vssubu_vx_d helper_vssubu_vx_d_riscv64 +#define helper_vssubu_vx_h helper_vssubu_vx_h_riscv64 +#define helper_vssubu_vx_w helper_vssubu_vx_w_riscv64 +#define helper_vssw_v_d helper_vssw_v_d_riscv64 +#define helper_vssw_v_w helper_vssw_v_w_riscv64 +#define helper_vsub_vv_b helper_vsub_vv_b_riscv64 +#define helper_vsub_vv_d helper_vsub_vv_d_riscv64 +#define helper_vsub_vv_h helper_vsub_vv_h_riscv64 +#define helper_vsub_vv_w helper_vsub_vv_w_riscv64 +#define helper_vsub_vx_b helper_vsub_vx_b_riscv64 +#define helper_vsub_vx_d helper_vsub_vx_d_riscv64 +#define helper_vsub_vx_h helper_vsub_vx_h_riscv64 +#define helper_vsub_vx_w helper_vsub_vx_w_riscv64 +#define helper_vsw_v_d helper_vsw_v_d_riscv64 +#define helper_vsw_v_d_mask helper_vsw_v_d_mask_riscv64 +#define helper_vsw_v_w helper_vsw_v_w_riscv64 +#define helper_vsw_v_w_mask helper_vsw_v_w_mask_riscv64 +#define helper_vsxb_v_b helper_vsxb_v_b_riscv64 +#define helper_vsxb_v_d helper_vsxb_v_d_riscv64 +#define helper_vsxb_v_h helper_vsxb_v_h_riscv64 +#define helper_vsxb_v_w helper_vsxb_v_w_riscv64 +#define helper_vsxe_v_b helper_vsxe_v_b_riscv64 +#define helper_vsxe_v_d helper_vsxe_v_d_riscv64 +#define helper_vsxe_v_h helper_vsxe_v_h_riscv64 +#define helper_vsxe_v_w helper_vsxe_v_w_riscv64 +#define helper_vsxh_v_d helper_vsxh_v_d_riscv64 +#define helper_vsxh_v_h helper_vsxh_v_h_riscv64 +#define helper_vsxh_v_w helper_vsxh_v_w_riscv64 +#define helper_vsxw_v_d helper_vsxw_v_d_riscv64 +#define helper_vsxw_v_w helper_vsxw_v_w_riscv64 +#define helper_vwadd_vv_b helper_vwadd_vv_b_riscv64 +#define helper_vwadd_vv_h helper_vwadd_vv_h_riscv64 +#define helper_vwadd_vv_w helper_vwadd_vv_w_riscv64 +#define helper_vwadd_vx_b helper_vwadd_vx_b_riscv64 +#define helper_vwadd_vx_h helper_vwadd_vx_h_riscv64 +#define helper_vwadd_vx_w helper_vwadd_vx_w_riscv64 +#define helper_vwadd_wv_b helper_vwadd_wv_b_riscv64 +#define helper_vwadd_wv_h helper_vwadd_wv_h_riscv64 +#define helper_vwadd_wv_w helper_vwadd_wv_w_riscv64 +#define helper_vwadd_wx_b helper_vwadd_wx_b_riscv64 +#define helper_vwadd_wx_h helper_vwadd_wx_h_riscv64 +#define helper_vwadd_wx_w helper_vwadd_wx_w_riscv64 +#define helper_vwaddu_vv_b helper_vwaddu_vv_b_riscv64 +#define helper_vwaddu_vv_h helper_vwaddu_vv_h_riscv64 +#define helper_vwaddu_vv_w helper_vwaddu_vv_w_riscv64 +#define helper_vwaddu_vx_b helper_vwaddu_vx_b_riscv64 +#define helper_vwaddu_vx_h helper_vwaddu_vx_h_riscv64 +#define helper_vwaddu_vx_w helper_vwaddu_vx_w_riscv64 +#define helper_vwaddu_wv_b helper_vwaddu_wv_b_riscv64 +#define helper_vwaddu_wv_h helper_vwaddu_wv_h_riscv64 +#define helper_vwaddu_wv_w helper_vwaddu_wv_w_riscv64 +#define helper_vwaddu_wx_b helper_vwaddu_wx_b_riscv64 +#define helper_vwaddu_wx_h helper_vwaddu_wx_h_riscv64 +#define helper_vwaddu_wx_w helper_vwaddu_wx_w_riscv64 +#define helper_vwmacc_vv_b helper_vwmacc_vv_b_riscv64 +#define helper_vwmacc_vv_h helper_vwmacc_vv_h_riscv64 +#define helper_vwmacc_vv_w helper_vwmacc_vv_w_riscv64 +#define helper_vwmacc_vx_b helper_vwmacc_vx_b_riscv64 +#define helper_vwmacc_vx_h helper_vwmacc_vx_h_riscv64 +#define helper_vwmacc_vx_w helper_vwmacc_vx_w_riscv64 +#define helper_vwmaccsu_vv_b helper_vwmaccsu_vv_b_riscv64 +#define helper_vwmaccsu_vv_h helper_vwmaccsu_vv_h_riscv64 +#define helper_vwmaccsu_vv_w helper_vwmaccsu_vv_w_riscv64 +#define helper_vwmaccsu_vx_b helper_vwmaccsu_vx_b_riscv64 +#define helper_vwmaccsu_vx_h helper_vwmaccsu_vx_h_riscv64 +#define helper_vwmaccsu_vx_w helper_vwmaccsu_vx_w_riscv64 +#define helper_vwmaccu_vv_b helper_vwmaccu_vv_b_riscv64 +#define helper_vwmaccu_vv_h helper_vwmaccu_vv_h_riscv64 +#define helper_vwmaccu_vv_w helper_vwmaccu_vv_w_riscv64 +#define helper_vwmaccu_vx_b helper_vwmaccu_vx_b_riscv64 +#define helper_vwmaccu_vx_h helper_vwmaccu_vx_h_riscv64 +#define helper_vwmaccu_vx_w helper_vwmaccu_vx_w_riscv64 +#define helper_vwmaccus_vx_b helper_vwmaccus_vx_b_riscv64 +#define helper_vwmaccus_vx_h helper_vwmaccus_vx_h_riscv64 +#define helper_vwmaccus_vx_w helper_vwmaccus_vx_w_riscv64 +#define helper_vwmul_vv_b helper_vwmul_vv_b_riscv64 +#define helper_vwmul_vv_h helper_vwmul_vv_h_riscv64 +#define helper_vwmul_vv_w helper_vwmul_vv_w_riscv64 +#define helper_vwmul_vx_b helper_vwmul_vx_b_riscv64 +#define helper_vwmul_vx_h helper_vwmul_vx_h_riscv64 +#define helper_vwmul_vx_w helper_vwmul_vx_w_riscv64 +#define helper_vwmulsu_vv_b helper_vwmulsu_vv_b_riscv64 +#define helper_vwmulsu_vv_h helper_vwmulsu_vv_h_riscv64 +#define helper_vwmulsu_vv_w helper_vwmulsu_vv_w_riscv64 +#define helper_vwmulsu_vx_b helper_vwmulsu_vx_b_riscv64 +#define helper_vwmulsu_vx_h helper_vwmulsu_vx_h_riscv64 +#define helper_vwmulsu_vx_w helper_vwmulsu_vx_w_riscv64 +#define helper_vwmulu_vv_b helper_vwmulu_vv_b_riscv64 +#define helper_vwmulu_vv_h helper_vwmulu_vv_h_riscv64 +#define helper_vwmulu_vv_w helper_vwmulu_vv_w_riscv64 +#define helper_vwmulu_vx_b helper_vwmulu_vx_b_riscv64 +#define helper_vwmulu_vx_h helper_vwmulu_vx_h_riscv64 +#define helper_vwmulu_vx_w helper_vwmulu_vx_w_riscv64 +#define helper_vwredsum_vs_b helper_vwredsum_vs_b_riscv64 +#define helper_vwredsum_vs_h helper_vwredsum_vs_h_riscv64 +#define helper_vwredsum_vs_w helper_vwredsum_vs_w_riscv64 +#define helper_vwredsumu_vs_b helper_vwredsumu_vs_b_riscv64 +#define helper_vwredsumu_vs_h helper_vwredsumu_vs_h_riscv64 +#define helper_vwredsumu_vs_w helper_vwredsumu_vs_w_riscv64 +#define helper_vwsmacc_vv_b helper_vwsmacc_vv_b_riscv64 +#define helper_vwsmacc_vv_h helper_vwsmacc_vv_h_riscv64 +#define helper_vwsmacc_vv_w helper_vwsmacc_vv_w_riscv64 +#define helper_vwsmacc_vx_b helper_vwsmacc_vx_b_riscv64 +#define helper_vwsmacc_vx_h helper_vwsmacc_vx_h_riscv64 +#define helper_vwsmacc_vx_w helper_vwsmacc_vx_w_riscv64 +#define helper_vwsmaccsu_vv_b helper_vwsmaccsu_vv_b_riscv64 +#define helper_vwsmaccsu_vv_h helper_vwsmaccsu_vv_h_riscv64 +#define helper_vwsmaccsu_vv_w helper_vwsmaccsu_vv_w_riscv64 +#define helper_vwsmaccsu_vx_b helper_vwsmaccsu_vx_b_riscv64 +#define helper_vwsmaccsu_vx_h helper_vwsmaccsu_vx_h_riscv64 +#define helper_vwsmaccsu_vx_w helper_vwsmaccsu_vx_w_riscv64 +#define helper_vwsmaccu_vv_b helper_vwsmaccu_vv_b_riscv64 +#define helper_vwsmaccu_vv_h helper_vwsmaccu_vv_h_riscv64 +#define helper_vwsmaccu_vv_w helper_vwsmaccu_vv_w_riscv64 +#define helper_vwsmaccu_vx_b helper_vwsmaccu_vx_b_riscv64 +#define helper_vwsmaccu_vx_h helper_vwsmaccu_vx_h_riscv64 +#define helper_vwsmaccu_vx_w helper_vwsmaccu_vx_w_riscv64 +#define helper_vwsmaccus_vx_b helper_vwsmaccus_vx_b_riscv64 +#define helper_vwsmaccus_vx_h helper_vwsmaccus_vx_h_riscv64 +#define helper_vwsmaccus_vx_w helper_vwsmaccus_vx_w_riscv64 +#define helper_vwsub_vv_b helper_vwsub_vv_b_riscv64 +#define helper_vwsub_vv_h helper_vwsub_vv_h_riscv64 +#define helper_vwsub_vv_w helper_vwsub_vv_w_riscv64 +#define helper_vwsub_vx_b helper_vwsub_vx_b_riscv64 +#define helper_vwsub_vx_h helper_vwsub_vx_h_riscv64 +#define helper_vwsub_vx_w helper_vwsub_vx_w_riscv64 +#define helper_vwsub_wv_b helper_vwsub_wv_b_riscv64 +#define helper_vwsub_wv_h helper_vwsub_wv_h_riscv64 +#define helper_vwsub_wv_w helper_vwsub_wv_w_riscv64 +#define helper_vwsub_wx_b helper_vwsub_wx_b_riscv64 +#define helper_vwsub_wx_h helper_vwsub_wx_h_riscv64 +#define helper_vwsub_wx_w helper_vwsub_wx_w_riscv64 +#define helper_vwsubu_vv_b helper_vwsubu_vv_b_riscv64 +#define helper_vwsubu_vv_h helper_vwsubu_vv_h_riscv64 +#define helper_vwsubu_vv_w helper_vwsubu_vv_w_riscv64 +#define helper_vwsubu_vx_b helper_vwsubu_vx_b_riscv64 +#define helper_vwsubu_vx_h helper_vwsubu_vx_h_riscv64 +#define helper_vwsubu_vx_w helper_vwsubu_vx_w_riscv64 +#define helper_vwsubu_wv_b helper_vwsubu_wv_b_riscv64 +#define helper_vwsubu_wv_h helper_vwsubu_wv_h_riscv64 +#define helper_vwsubu_wv_w helper_vwsubu_wv_w_riscv64 +#define helper_vwsubu_wx_b helper_vwsubu_wx_b_riscv64 +#define helper_vwsubu_wx_h helper_vwsubu_wx_h_riscv64 +#define helper_vwsubu_wx_w helper_vwsubu_wx_w_riscv64 +#define helper_vxor_vv_b helper_vxor_vv_b_riscv64 +#define helper_vxor_vv_d helper_vxor_vv_d_riscv64 +#define helper_vxor_vv_h helper_vxor_vv_h_riscv64 +#define helper_vxor_vv_w helper_vxor_vv_w_riscv64 +#define helper_vxor_vx_b helper_vxor_vx_b_riscv64 +#define helper_vxor_vx_d helper_vxor_vx_d_riscv64 +#define helper_vxor_vx_h helper_vxor_vx_h_riscv64 +#define helper_vxor_vx_w helper_vxor_vx_w_riscv64 #endif diff --git a/qemu/s390x.h b/qemu/s390x.h index 1906872bc6..d777300ed6 100644 --- a/qemu/s390x.h +++ b/qemu/s390x.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_s390x #define tcg_gen_shr_i64 tcg_gen_shr_i64_s390x #define tcg_gen_st_i64 tcg_gen_st_i64_s390x +#define tcg_gen_add_i64 tcg_gen_add_i64_s390x +#define tcg_gen_sub_i64 tcg_gen_sub_i64_s390x #define tcg_gen_xor_i64 tcg_gen_xor_i64_s390x +#define tcg_gen_neg_i64 tcg_gen_neg_i64_s390x #define cpu_icount_to_ns cpu_icount_to_ns_s390x #define cpu_is_stopped cpu_is_stopped_s390x #define cpu_get_ticks cpu_get_ticks_s390x @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_s390x #define floatx80_mul floatx80_mul_s390x #define floatx80_div floatx80_div_s390x +#define floatx80_modrem floatx80_modrem_s390x +#define floatx80_mod floatx80_mod_s390x #define floatx80_rem floatx80_rem_s390x #define floatx80_sqrt floatx80_sqrt_s390x #define floatx80_eq floatx80_eq_s390x @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_s390x #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_s390x #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_s390x +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_s390x #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_s390x #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_s390x #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_s390x @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_s390x #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_s390x #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_s390x +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_s390x +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_s390x #define tcg_gen_gvec_sari tcg_gen_gvec_sari_s390x +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_s390x +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_s390x #define tcg_gen_gvec_shls tcg_gen_gvec_shls_s390x #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_s390x #define tcg_gen_gvec_sars tcg_gen_gvec_sars_s390x +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_s390x #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_s390x #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_s390x #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_s390x +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_s390x +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_s390x #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_s390x #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_s390x #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_s390x @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_s390x #define tcg_gen_shri_vec tcg_gen_shri_vec_s390x #define tcg_gen_sari_vec tcg_gen_sari_vec_s390x +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_s390x +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_s390x #define tcg_gen_cmp_vec tcg_gen_cmp_vec_s390x #define tcg_gen_add_vec tcg_gen_add_vec_s390x #define tcg_gen_sub_vec tcg_gen_sub_vec_s390x @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_s390x #define tcg_gen_shrv_vec tcg_gen_shrv_vec_s390x #define tcg_gen_sarv_vec tcg_gen_sarv_vec_s390x +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_s390x +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_s390x #define tcg_gen_shls_vec tcg_gen_shls_vec_s390x #define tcg_gen_shrs_vec tcg_gen_shrs_vec_s390x #define tcg_gen_sars_vec tcg_gen_sars_vec_s390x +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_s390x #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_s390x #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_s390x #define tb_htable_lookup tb_htable_lookup_s390x @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_s390x #define cpu_loop_exit_atomic cpu_loop_exit_atomic_s390x #define tlb_init tlb_init_s390x +#define tlb_destroy tlb_destroy_s390x #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_s390x #define tlb_flush tlb_flush_s390x #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_s390x @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_s390x #define get_page_addr_code_hostp get_page_addr_code_hostp_s390x #define get_page_addr_code get_page_addr_code_s390x +#define probe_access_flags probe_access_flags_s390x #define probe_access probe_access_s390x #define tlb_vaddr_to_host tlb_vaddr_to_host_s390x #define helper_ret_ldub_mmu helper_ret_ldub_mmu_s390x @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_s390x #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_s390x #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_s390x -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_s390x -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_s390x -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_s390x -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_s390x +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_s390x +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_s390x +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_s390x +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_s390x +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_s390x +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_s390x +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_s390x +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_s390x #define cpu_ldub_data_ra cpu_ldub_data_ra_s390x #define cpu_ldsb_data_ra cpu_ldsb_data_ra_s390x -#define cpu_lduw_data_ra cpu_lduw_data_ra_s390x -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_s390x -#define cpu_ldl_data_ra cpu_ldl_data_ra_s390x -#define cpu_ldq_data_ra cpu_ldq_data_ra_s390x +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_s390x +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_s390x +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_s390x +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_s390x +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_s390x +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_s390x +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_s390x +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_s390x #define cpu_ldub_data cpu_ldub_data_s390x #define cpu_ldsb_data cpu_ldsb_data_s390x -#define cpu_lduw_data cpu_lduw_data_s390x -#define cpu_ldsw_data cpu_ldsw_data_s390x -#define cpu_ldl_data cpu_ldl_data_s390x -#define cpu_ldq_data cpu_ldq_data_s390x +#define cpu_lduw_be_data cpu_lduw_be_data_s390x +#define cpu_lduw_le_data cpu_lduw_le_data_s390x +#define cpu_ldsw_be_data cpu_ldsw_be_data_s390x +#define cpu_ldsw_le_data cpu_ldsw_le_data_s390x +#define cpu_ldl_be_data cpu_ldl_be_data_s390x +#define cpu_ldl_le_data cpu_ldl_le_data_s390x +#define cpu_ldq_le_data cpu_ldq_le_data_s390x +#define cpu_ldq_be_data cpu_ldq_be_data_s390x #define helper_ret_stb_mmu helper_ret_stb_mmu_s390x #define helper_le_stw_mmu helper_le_stw_mmu_s390x #define helper_be_stw_mmu helper_be_stw_mmu_s390x @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_s390x #define helper_be_stq_mmu helper_be_stq_mmu_s390x #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_s390x -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_s390x -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_s390x -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_s390x +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_s390x +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_s390x +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_s390x +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_s390x +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_s390x +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_s390x #define cpu_stb_data_ra cpu_stb_data_ra_s390x -#define cpu_stw_data_ra cpu_stw_data_ra_s390x -#define cpu_stl_data_ra cpu_stl_data_ra_s390x -#define cpu_stq_data_ra cpu_stq_data_ra_s390x +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_s390x +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_s390x +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_s390x +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_s390x +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_s390x +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_s390x #define cpu_stb_data cpu_stb_data_s390x -#define cpu_stw_data cpu_stw_data_s390x -#define cpu_stl_data cpu_stl_data_s390x -#define cpu_stq_data cpu_stq_data_s390x +#define cpu_stw_be_data cpu_stw_be_data_s390x +#define cpu_stw_le_data cpu_stw_le_data_s390x +#define cpu_stl_be_data cpu_stl_be_data_s390x +#define cpu_stl_le_data cpu_stl_le_data_s390x +#define cpu_stq_be_data cpu_stq_be_data_s390x +#define cpu_stq_le_data cpu_stq_le_data_s390x #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_s390x #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_s390x #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_s390x @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_s390x #define cpu_ldl_code cpu_ldl_code_s390x #define cpu_ldq_code cpu_ldq_code_s390x +#define cpu_interrupt_handler cpu_interrupt_handler_s390x #define helper_div_i32 helper_div_i32_s390x #define helper_rem_i32 helper_rem_i32_s390x #define helper_divu_i32 helper_divu_i32_s390x @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_s390x #define helper_gvec_sar32i helper_gvec_sar32i_s390x #define helper_gvec_sar64i helper_gvec_sar64i_s390x +#define helper_gvec_rotl8i helper_gvec_rotl8i_s390x +#define helper_gvec_rotl16i helper_gvec_rotl16i_s390x +#define helper_gvec_rotl32i helper_gvec_rotl32i_s390x +#define helper_gvec_rotl64i helper_gvec_rotl64i_s390x #define helper_gvec_shl8v helper_gvec_shl8v_s390x #define helper_gvec_shl16v helper_gvec_shl16v_s390x #define helper_gvec_shl32v helper_gvec_shl32v_s390x @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_s390x #define helper_gvec_sar32v helper_gvec_sar32v_s390x #define helper_gvec_sar64v helper_gvec_sar64v_s390x +#define helper_gvec_rotl8v helper_gvec_rotl8v_s390x +#define helper_gvec_rotl16v helper_gvec_rotl16v_s390x +#define helper_gvec_rotl32v helper_gvec_rotl32v_s390x +#define helper_gvec_rotl64v helper_gvec_rotl64v_s390x +#define helper_gvec_rotr8v helper_gvec_rotr8v_s390x +#define helper_gvec_rotr16v helper_gvec_rotr16v_s390x +#define helper_gvec_rotr32v helper_gvec_rotr32v_s390x +#define helper_gvec_rotr64v helper_gvec_rotr64v_s390x #define helper_gvec_eq8 helper_gvec_eq8_s390x #define helper_gvec_ne8 helper_gvec_ne8_s390x #define helper_gvec_lt8 helper_gvec_lt8_s390x diff --git a/qemu/sparc.h b/qemu/sparc.h index 32be40ab68..aeee045594 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_sparc #define tcg_gen_shr_i64 tcg_gen_shr_i64_sparc #define tcg_gen_st_i64 tcg_gen_st_i64_sparc +#define tcg_gen_add_i64 tcg_gen_add_i64_sparc +#define tcg_gen_sub_i64 tcg_gen_sub_i64_sparc #define tcg_gen_xor_i64 tcg_gen_xor_i64_sparc +#define tcg_gen_neg_i64 tcg_gen_neg_i64_sparc #define cpu_icount_to_ns cpu_icount_to_ns_sparc #define cpu_is_stopped cpu_is_stopped_sparc #define cpu_get_ticks cpu_get_ticks_sparc @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_sparc #define floatx80_mul floatx80_mul_sparc #define floatx80_div floatx80_div_sparc +#define floatx80_modrem floatx80_modrem_sparc +#define floatx80_mod floatx80_mod_sparc #define floatx80_rem floatx80_rem_sparc #define floatx80_sqrt floatx80_sqrt_sparc #define floatx80_eq floatx80_eq_sparc @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_sparc #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_sparc #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_sparc +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_sparc #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_sparc #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_sparc #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_sparc @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_sparc #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_sparc #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_sparc +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_sparc +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_sparc #define tcg_gen_gvec_sari tcg_gen_gvec_sari_sparc +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_sparc +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_sparc #define tcg_gen_gvec_shls tcg_gen_gvec_shls_sparc #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_sparc #define tcg_gen_gvec_sars tcg_gen_gvec_sars_sparc +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_sparc #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_sparc #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_sparc #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_sparc +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_sparc +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_sparc #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_sparc #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_sparc #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_sparc @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_sparc #define tcg_gen_shri_vec tcg_gen_shri_vec_sparc #define tcg_gen_sari_vec tcg_gen_sari_vec_sparc +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_sparc +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_sparc #define tcg_gen_cmp_vec tcg_gen_cmp_vec_sparc #define tcg_gen_add_vec tcg_gen_add_vec_sparc #define tcg_gen_sub_vec tcg_gen_sub_vec_sparc @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_sparc #define tcg_gen_shrv_vec tcg_gen_shrv_vec_sparc #define tcg_gen_sarv_vec tcg_gen_sarv_vec_sparc +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_sparc +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_sparc #define tcg_gen_shls_vec tcg_gen_shls_vec_sparc #define tcg_gen_shrs_vec tcg_gen_shrs_vec_sparc #define tcg_gen_sars_vec tcg_gen_sars_vec_sparc +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_sparc #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_sparc #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_sparc #define tb_htable_lookup tb_htable_lookup_sparc @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_sparc #define cpu_loop_exit_atomic cpu_loop_exit_atomic_sparc #define tlb_init tlb_init_sparc +#define tlb_destroy tlb_destroy_sparc #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_sparc #define tlb_flush tlb_flush_sparc #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_sparc @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_sparc #define get_page_addr_code_hostp get_page_addr_code_hostp_sparc #define get_page_addr_code get_page_addr_code_sparc +#define probe_access_flags probe_access_flags_sparc #define probe_access probe_access_sparc #define tlb_vaddr_to_host tlb_vaddr_to_host_sparc #define helper_ret_ldub_mmu helper_ret_ldub_mmu_sparc @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_sparc #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_sparc #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_sparc -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_sparc -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_sparc -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_sparc -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_sparc +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_sparc +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_sparc +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_sparc +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_sparc +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_sparc +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_sparc +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_sparc +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_sparc #define cpu_ldub_data_ra cpu_ldub_data_ra_sparc #define cpu_ldsb_data_ra cpu_ldsb_data_ra_sparc -#define cpu_lduw_data_ra cpu_lduw_data_ra_sparc -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_sparc -#define cpu_ldl_data_ra cpu_ldl_data_ra_sparc -#define cpu_ldq_data_ra cpu_ldq_data_ra_sparc +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_sparc +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_sparc +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_sparc +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_sparc +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_sparc +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_sparc +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_sparc +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_sparc #define cpu_ldub_data cpu_ldub_data_sparc #define cpu_ldsb_data cpu_ldsb_data_sparc -#define cpu_lduw_data cpu_lduw_data_sparc -#define cpu_ldsw_data cpu_ldsw_data_sparc -#define cpu_ldl_data cpu_ldl_data_sparc -#define cpu_ldq_data cpu_ldq_data_sparc +#define cpu_lduw_be_data cpu_lduw_be_data_sparc +#define cpu_lduw_le_data cpu_lduw_le_data_sparc +#define cpu_ldsw_be_data cpu_ldsw_be_data_sparc +#define cpu_ldsw_le_data cpu_ldsw_le_data_sparc +#define cpu_ldl_be_data cpu_ldl_be_data_sparc +#define cpu_ldl_le_data cpu_ldl_le_data_sparc +#define cpu_ldq_le_data cpu_ldq_le_data_sparc +#define cpu_ldq_be_data cpu_ldq_be_data_sparc #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc #define helper_le_stw_mmu helper_le_stw_mmu_sparc #define helper_be_stw_mmu helper_be_stw_mmu_sparc @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_sparc #define helper_be_stq_mmu helper_be_stq_mmu_sparc #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_sparc -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_sparc -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_sparc -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_sparc +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_sparc +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_sparc +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_sparc +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_sparc +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_sparc +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_sparc #define cpu_stb_data_ra cpu_stb_data_ra_sparc -#define cpu_stw_data_ra cpu_stw_data_ra_sparc -#define cpu_stl_data_ra cpu_stl_data_ra_sparc -#define cpu_stq_data_ra cpu_stq_data_ra_sparc +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_sparc +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_sparc +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_sparc +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_sparc +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_sparc +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_sparc #define cpu_stb_data cpu_stb_data_sparc -#define cpu_stw_data cpu_stw_data_sparc -#define cpu_stl_data cpu_stl_data_sparc -#define cpu_stq_data cpu_stq_data_sparc +#define cpu_stw_be_data cpu_stw_be_data_sparc +#define cpu_stw_le_data cpu_stw_le_data_sparc +#define cpu_stl_be_data cpu_stl_be_data_sparc +#define cpu_stl_le_data cpu_stl_le_data_sparc +#define cpu_stq_be_data cpu_stq_be_data_sparc +#define cpu_stq_le_data cpu_stq_le_data_sparc #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_sparc #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_sparc #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_sparc @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_sparc #define cpu_ldl_code cpu_ldl_code_sparc #define cpu_ldq_code cpu_ldq_code_sparc +#define cpu_interrupt_handler cpu_interrupt_handler_sparc #define helper_div_i32 helper_div_i32_sparc #define helper_rem_i32 helper_rem_i32_sparc #define helper_divu_i32 helper_divu_i32_sparc @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_sparc #define helper_gvec_sar32i helper_gvec_sar32i_sparc #define helper_gvec_sar64i helper_gvec_sar64i_sparc +#define helper_gvec_rotl8i helper_gvec_rotl8i_sparc +#define helper_gvec_rotl16i helper_gvec_rotl16i_sparc +#define helper_gvec_rotl32i helper_gvec_rotl32i_sparc +#define helper_gvec_rotl64i helper_gvec_rotl64i_sparc #define helper_gvec_shl8v helper_gvec_shl8v_sparc #define helper_gvec_shl16v helper_gvec_shl16v_sparc #define helper_gvec_shl32v helper_gvec_shl32v_sparc @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_sparc #define helper_gvec_sar32v helper_gvec_sar32v_sparc #define helper_gvec_sar64v helper_gvec_sar64v_sparc +#define helper_gvec_rotl8v helper_gvec_rotl8v_sparc +#define helper_gvec_rotl16v helper_gvec_rotl16v_sparc +#define helper_gvec_rotl32v helper_gvec_rotl32v_sparc +#define helper_gvec_rotl64v helper_gvec_rotl64v_sparc +#define helper_gvec_rotr8v helper_gvec_rotr8v_sparc +#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc +#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc +#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc #define helper_gvec_eq8 helper_gvec_eq8_sparc #define helper_gvec_ne8 helper_gvec_ne8_sparc #define helper_gvec_lt8 helper_gvec_lt8_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index c9f6f2fcbd..f12e6380cf 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_sparc64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_sparc64 #define tcg_gen_st_i64 tcg_gen_st_i64_sparc64 +#define tcg_gen_add_i64 tcg_gen_add_i64_sparc64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_sparc64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_sparc64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_sparc64 #define cpu_icount_to_ns cpu_icount_to_ns_sparc64 #define cpu_is_stopped cpu_is_stopped_sparc64 #define cpu_get_ticks cpu_get_ticks_sparc64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_sparc64 #define floatx80_mul floatx80_mul_sparc64 #define floatx80_div floatx80_div_sparc64 +#define floatx80_modrem floatx80_modrem_sparc64 +#define floatx80_mod floatx80_mod_sparc64 #define floatx80_rem floatx80_rem_sparc64 #define floatx80_sqrt floatx80_sqrt_sparc64 #define floatx80_eq floatx80_eq_sparc64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_sparc64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_sparc64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_sparc64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_sparc64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_sparc64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_sparc64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_sparc64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_sparc64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_sparc64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_sparc64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_sparc64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_sparc64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_sparc64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_sparc64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_sparc64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_sparc64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_sparc64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_sparc64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_sparc64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_sparc64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_sparc64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_sparc64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_sparc64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_sparc64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_sparc64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_sparc64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_sparc64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_sparc64 #define tcg_gen_shri_vec tcg_gen_shri_vec_sparc64 #define tcg_gen_sari_vec tcg_gen_sari_vec_sparc64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_sparc64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_sparc64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_sparc64 #define tcg_gen_add_vec tcg_gen_add_vec_sparc64 #define tcg_gen_sub_vec tcg_gen_sub_vec_sparc64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_sparc64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_sparc64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_sparc64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_sparc64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_sparc64 #define tcg_gen_shls_vec tcg_gen_shls_vec_sparc64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_sparc64 #define tcg_gen_sars_vec tcg_gen_sars_vec_sparc64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_sparc64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_sparc64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_sparc64 #define tb_htable_lookup tb_htable_lookup_sparc64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_sparc64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_sparc64 #define tlb_init tlb_init_sparc64 +#define tlb_destroy tlb_destroy_sparc64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_sparc64 #define tlb_flush tlb_flush_sparc64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_sparc64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_sparc64 #define get_page_addr_code_hostp get_page_addr_code_hostp_sparc64 #define get_page_addr_code get_page_addr_code_sparc64 +#define probe_access_flags probe_access_flags_sparc64 #define probe_access probe_access_sparc64 #define tlb_vaddr_to_host tlb_vaddr_to_host_sparc64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_sparc64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_sparc64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_sparc64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_sparc64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_sparc64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_sparc64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_sparc64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_sparc64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_sparc64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_sparc64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_sparc64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_sparc64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_sparc64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_sparc64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_sparc64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_sparc64 #define cpu_ldub_data_ra cpu_ldub_data_ra_sparc64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_sparc64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_sparc64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_sparc64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_sparc64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_sparc64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_sparc64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_sparc64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_sparc64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_sparc64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_sparc64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_sparc64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_sparc64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_sparc64 #define cpu_ldub_data cpu_ldub_data_sparc64 #define cpu_ldsb_data cpu_ldsb_data_sparc64 -#define cpu_lduw_data cpu_lduw_data_sparc64 -#define cpu_ldsw_data cpu_ldsw_data_sparc64 -#define cpu_ldl_data cpu_ldl_data_sparc64 -#define cpu_ldq_data cpu_ldq_data_sparc64 +#define cpu_lduw_be_data cpu_lduw_be_data_sparc64 +#define cpu_lduw_le_data cpu_lduw_le_data_sparc64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_sparc64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_sparc64 +#define cpu_ldl_be_data cpu_ldl_be_data_sparc64 +#define cpu_ldl_le_data cpu_ldl_le_data_sparc64 +#define cpu_ldq_le_data cpu_ldq_le_data_sparc64 +#define cpu_ldq_be_data cpu_ldq_be_data_sparc64 #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc64 #define helper_le_stw_mmu helper_le_stw_mmu_sparc64 #define helper_be_stw_mmu helper_be_stw_mmu_sparc64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_sparc64 #define helper_be_stq_mmu helper_be_stq_mmu_sparc64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_sparc64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_sparc64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_sparc64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_sparc64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_sparc64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_sparc64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_sparc64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_sparc64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_sparc64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_sparc64 #define cpu_stb_data_ra cpu_stb_data_ra_sparc64 -#define cpu_stw_data_ra cpu_stw_data_ra_sparc64 -#define cpu_stl_data_ra cpu_stl_data_ra_sparc64 -#define cpu_stq_data_ra cpu_stq_data_ra_sparc64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_sparc64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_sparc64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_sparc64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_sparc64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_sparc64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_sparc64 #define cpu_stb_data cpu_stb_data_sparc64 -#define cpu_stw_data cpu_stw_data_sparc64 -#define cpu_stl_data cpu_stl_data_sparc64 -#define cpu_stq_data cpu_stq_data_sparc64 +#define cpu_stw_be_data cpu_stw_be_data_sparc64 +#define cpu_stw_le_data cpu_stw_le_data_sparc64 +#define cpu_stl_be_data cpu_stl_be_data_sparc64 +#define cpu_stl_le_data cpu_stl_le_data_sparc64 +#define cpu_stq_be_data cpu_stq_be_data_sparc64 +#define cpu_stq_le_data cpu_stq_le_data_sparc64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_sparc64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_sparc64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_sparc64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_sparc64 #define cpu_ldl_code cpu_ldl_code_sparc64 #define cpu_ldq_code cpu_ldq_code_sparc64 +#define cpu_interrupt_handler cpu_interrupt_handler_sparc64 #define helper_div_i32 helper_div_i32_sparc64 #define helper_rem_i32 helper_rem_i32_sparc64 #define helper_divu_i32 helper_divu_i32_sparc64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_sparc64 #define helper_gvec_sar32i helper_gvec_sar32i_sparc64 #define helper_gvec_sar64i helper_gvec_sar64i_sparc64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_sparc64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_sparc64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_sparc64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_sparc64 #define helper_gvec_shl8v helper_gvec_shl8v_sparc64 #define helper_gvec_shl16v helper_gvec_shl16v_sparc64 #define helper_gvec_shl32v helper_gvec_shl32v_sparc64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_sparc64 #define helper_gvec_sar32v helper_gvec_sar32v_sparc64 #define helper_gvec_sar64v helper_gvec_sar64v_sparc64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_sparc64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_sparc64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_sparc64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_sparc64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_sparc64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc64 #define helper_gvec_eq8 helper_gvec_eq8_sparc64 #define helper_gvec_ne8 helper_gvec_ne8_sparc64 #define helper_gvec_lt8 helper_gvec_lt8_sparc64 diff --git a/qemu/target/arm/backup.c b/qemu/target/arm/backup.c new file mode 100644 index 0000000000..5c7a651f71 --- /dev/null +++ b/qemu/target/arm/backup.c @@ -0,0 +1,5431 @@ +/* + * ARM SVE Operations + * + * Copyright (c) 2018 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" +#include "fpu/softfloat.h" +#include "tcg/tcg.h" + + +/* Note that vector data is stored in host-endian 64-bit chunks, + so addressing units smaller than that needs a host-endian fixup. */ +#ifdef HOST_WORDS_BIGENDIAN +#define H1(x) ((x) ^ 7) +#define H1_2(x) ((x) ^ 6) +#define H1_4(x) ((x) ^ 4) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#else +#define H1(x) (x) +#define H1_2(x) (x) +#define H1_4(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#endif + +/* Return a value for NZCV as per the ARM PredTest pseudofunction. + * + * The return value has bit 31 set if N is set, bit 1 set if Z is clear, + * and bit 0 set if C is set. Compare the definitions of these variables + * within CPUARMState. + */ + +/* For no G bits set, NZCV = C. */ +#define PREDTEST_INIT 1 + +/* This is an iterative function, called for each Pd and Pg word + * moving forward. + */ +static uint32_t iter_predtest_fwd(uint64_t d, uint64_t g, uint32_t flags) +{ + if (likely(g)) { + /* Compute N from first D & G. + Use bit 2 to signal first G bit seen. */ + if (!(flags & 4)) { +#ifdef _MSC_VER + flags |= ((d & (g & (0 - g))) != 0) << 31; +#else + flags |= ((d & (g & -g)) != 0) << 31; +#endif + flags |= 4; + } + + /* Accumulate Z from each D & G. */ + flags |= ((d & g) != 0) << 1; + + /* Compute C from last !(D & G). Replace previous. */ + flags = deposit32(flags, 0, 1, (d & pow2floor(g)) == 0); + } + return flags; +} + +/* This is an iterative function, called for each Pd and Pg word + * moving backward. + */ +static uint32_t iter_predtest_bwd(uint64_t d, uint64_t g, uint32_t flags) +{ + if (likely(g)) { + /* Compute C from first (i.e last) !(D & G). + Use bit 2 to signal first G bit seen. */ + if (!(flags & 4)) { + flags += 4 - 1; /* add bit 2, subtract C from PREDTEST_INIT */ + flags |= (d & pow2floor(g)) == 0; + } + + /* Accumulate Z from each D & G. */ + flags |= ((d & g) != 0) << 1; + + /* Compute N from last (i.e first) D & G. Replace previous. */ +#ifdef _MSC_VER + flags = deposit32(flags, 31, 1, (d & (g & (0 - g))) != 0); +#else + flags = deposit32(flags, 31, 1, (d & (g & -g)) != 0); +#endif + } + return flags; +} + +/* The same for a single word predicate. */ +uint32_t HELPER(sve_predtest1)(uint64_t d, uint64_t g) +{ + return iter_predtest_fwd(d, g, PREDTEST_INIT); +} + +/* The same for a multi-word predicate. */ +uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words) +{ + uint32_t flags = PREDTEST_INIT; + uint64_t *d = vd, *g = vg; + uintptr_t i = 0; + + do { + flags = iter_predtest_fwd(d[i], g[i], flags); + } while (++i < words); + + return flags; +} + +/* Expand active predicate bits to bytes, for byte elements. + * for (i = 0; i < 256; ++i) { + * unsigned long m = 0; + * for (j = 0; j < 8; j++) { + * if ((i >> j) & 1) { + * m |= 0xfful << (j << 3); + * } + * } + * printf("0x%016lx,\n", m); + * } + */ +static inline uint64_t expand_pred_b(uint8_t byte) +{ + static const uint64_t word[256] = { + 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00, + 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff, + 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000, + 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff, + 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00, + 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff, + 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000, + 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff, + 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00, + 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff, + 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000, + 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff, + 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00, + 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff, + 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000, + 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff, + 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00, + 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff, + 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000, + 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff, + 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00, + 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff, + 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000, + 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff, + 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00, + 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff, + 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000, + 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff, + 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, + 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff, + 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000, + 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff, + 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00, + 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff, + 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000, + 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff, + 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00, + 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff, + 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000, + 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff, + 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00, + 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff, + 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000, + 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff, + 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00, + 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff, + 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000, + 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff, + 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00, + 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff, + 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000, + 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff, + 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00, + 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff, + 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000, + 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff, + 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, + 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff, + 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000, + 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff, + 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00, + 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff, + 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000, + 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff, + 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00, + 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff, + 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000, + 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff, + 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00, + 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff, + 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000, + 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff, + 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00, + 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff, + 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000, + 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff, + 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00, + 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff, + 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000, + 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff, + 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00, + 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff, + 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000, + 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff, + 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00, + 0xffffffffffffffff, + }; + return word[byte]; +} + +/* Similarly for half-word elements. + * for (i = 0; i < 256; ++i) { + * unsigned long m = 0; + * if (i & 0xaa) { + * continue; + * } + * for (j = 0; j < 8; j += 2) { + * if ((i >> j) & 1) { + * m |= 0xfffful << (j << 3); + * } + * } + * printf("[0x%x] = 0x%016lx,\n", i, m); + * } + */ +static inline uint64_t expand_pred_h(uint8_t byte) +{ + static const uint64_t word[] = { + [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000, + [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000, + [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000, + [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000, + [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000, + [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000, + [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000, + [0x55] = 0xffffffffffffffff, + }; + return word[byte & 0x55]; +} + +/* Similarly for single word elements. */ +static inline uint64_t expand_pred_s(uint8_t byte) +{ + static const uint64_t word[] = { + [0x01] = 0x00000000ffffffffull, + [0x10] = 0xffffffff00000000ull, + [0x11] = 0xffffffffffffffffull, + }; + return word[byte & 0x11]; +} + +/* Swap 16-bit words within a 32-bit word. */ +static inline uint32_t hswap32(uint32_t h) +{ + return rol32(h, 16); +} + +/* Swap 16-bit words within a 64-bit word. */ +static inline uint64_t hswap64(uint64_t h) +{ + uint64_t m = 0x0000ffff0000ffffull; + h = rol64(h, 32); + return ((h & m) << 16) | ((h >> 16) & m); +} + +/* Swap 32-bit words within a 64-bit word. */ +static inline uint64_t wswap64(uint64_t h) +{ + return rol64(h, 32); +} + +#define LOGICAL_PPPP(NAME, FUNC) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + uintptr_t opr_sz = simd_oprsz(desc); \ + uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \ + uintptr_t i; \ + for (i = 0; i < opr_sz / 8; ++i) { \ + d[i] = FUNC(n[i], m[i], g[i]); \ + } \ +} + +#define DO_AND(N, M, G) (((N) & (M)) & (G)) +#define DO_BIC(N, M, G) (((N) & ~(M)) & (G)) +#define DO_EOR(N, M, G) (((N) ^ (M)) & (G)) +#define DO_ORR(N, M, G) (((N) | (M)) & (G)) +#define DO_ORN(N, M, G) (((N) | ~(M)) & (G)) +#define DO_NOR(N, M, G) (~((N) | (M)) & (G)) +#define DO_NAND(N, M, G) (~((N) & (M)) & (G)) +#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G))) + +LOGICAL_PPPP(sve_and_pppp, DO_AND) +LOGICAL_PPPP(sve_bic_pppp, DO_BIC) +LOGICAL_PPPP(sve_eor_pppp, DO_EOR) +LOGICAL_PPPP(sve_sel_pppp, DO_SEL) +LOGICAL_PPPP(sve_orr_pppp, DO_ORR) +LOGICAL_PPPP(sve_orn_pppp, DO_ORN) +LOGICAL_PPPP(sve_nor_pppp, DO_NOR) +LOGICAL_PPPP(sve_nand_pppp, DO_NAND) + +#undef DO_AND +#undef DO_BIC +#undef DO_EOR +#undef DO_ORR +#undef DO_ORN +#undef DO_NOR +#undef DO_NAND +#undef DO_SEL +#undef LOGICAL_PPPP + +/* Fully general three-operand expander, controlled by a predicate. + * This is complicated by the host-endian storage of the register file. + */ +/* ??? I don't expect the compiler could ever vectorize this itself. + * With some tables we can convert bit masks to byte masks, and with + * extra care wrt byte/word ordering we could use gcc generic vectors + * and do 16 bytes at a time. + */ +#define DO_ZPZZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +/* Similarly, specialized for 64-bit operands. */ +#define DO_ZPZZ_D(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i], mm = m[i]; \ + d[i] = OP(nn, mm); \ + } \ + } \ +} + +#define DO_AND(N, M) (N & M) +#define DO_EOR(N, M) (N ^ M) +#define DO_ORR(N, M) (N | M) +#define DO_BIC(N, M) (N & ~M) +#define DO_ADD(N, M) (N + M) +#define DO_SUB(N, M) (N - M) +#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) +#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) +#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) +#define DO_MUL(N, M) (N * M) + + +/* + * We must avoid the C undefined behaviour cases: division by + * zero and signed division of INT_MIN by -1. Both of these + * have architecturally defined required results for Arm. + * We special case all signed divisions by -1 to avoid having + * to deduce the minimum integer for the type involved. + */ +#define DO_SDIV(N, M) (unlikely(M == 0) ? 0 : unlikely(M == -1) ? -N : N / M) +#define DO_UDIV(N, M) (unlikely(M == 0) ? 0 : N / M) + +DO_ZPZZ(sve_and_zpzz_b, uint8_t, H1, DO_AND) +DO_ZPZZ(sve_and_zpzz_h, uint16_t, H1_2, DO_AND) +DO_ZPZZ(sve_and_zpzz_s, uint32_t, H1_4, DO_AND) +DO_ZPZZ_D(sve_and_zpzz_d, uint64_t, DO_AND) + +DO_ZPZZ(sve_orr_zpzz_b, uint8_t, H1, DO_ORR) +DO_ZPZZ(sve_orr_zpzz_h, uint16_t, H1_2, DO_ORR) +DO_ZPZZ(sve_orr_zpzz_s, uint32_t, H1_4, DO_ORR) +DO_ZPZZ_D(sve_orr_zpzz_d, uint64_t, DO_ORR) + +DO_ZPZZ(sve_eor_zpzz_b, uint8_t, H1, DO_EOR) +DO_ZPZZ(sve_eor_zpzz_h, uint16_t, H1_2, DO_EOR) +DO_ZPZZ(sve_eor_zpzz_s, uint32_t, H1_4, DO_EOR) +DO_ZPZZ_D(sve_eor_zpzz_d, uint64_t, DO_EOR) + +DO_ZPZZ(sve_bic_zpzz_b, uint8_t, H1, DO_BIC) +DO_ZPZZ(sve_bic_zpzz_h, uint16_t, H1_2, DO_BIC) +DO_ZPZZ(sve_bic_zpzz_s, uint32_t, H1_4, DO_BIC) +DO_ZPZZ_D(sve_bic_zpzz_d, uint64_t, DO_BIC) + +DO_ZPZZ(sve_add_zpzz_b, uint8_t, H1, DO_ADD) +DO_ZPZZ(sve_add_zpzz_h, uint16_t, H1_2, DO_ADD) +DO_ZPZZ(sve_add_zpzz_s, uint32_t, H1_4, DO_ADD) +DO_ZPZZ_D(sve_add_zpzz_d, uint64_t, DO_ADD) + +DO_ZPZZ(sve_sub_zpzz_b, uint8_t, H1, DO_SUB) +DO_ZPZZ(sve_sub_zpzz_h, uint16_t, H1_2, DO_SUB) +DO_ZPZZ(sve_sub_zpzz_s, uint32_t, H1_4, DO_SUB) +DO_ZPZZ_D(sve_sub_zpzz_d, uint64_t, DO_SUB) + +DO_ZPZZ(sve_smax_zpzz_b, int8_t, H1, DO_MAX) +DO_ZPZZ(sve_smax_zpzz_h, int16_t, H1_2, DO_MAX) +DO_ZPZZ(sve_smax_zpzz_s, int32_t, H1_4, DO_MAX) +DO_ZPZZ_D(sve_smax_zpzz_d, int64_t, DO_MAX) + +DO_ZPZZ(sve_umax_zpzz_b, uint8_t, H1, DO_MAX) +DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX) +DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX) +DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX) + +DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN) +DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN) + +DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN) +DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN) +DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN) +DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN) + +DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD) +DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD) + +DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD) +DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD) +DO_ZPZZ(sve_uabd_zpzz_s, uint32_t, H1_4, DO_ABD) +DO_ZPZZ_D(sve_uabd_zpzz_d, uint64_t, DO_ABD) + +/* Because the computation type is at least twice as large as required, + these work for both signed and unsigned source types. */ +static inline uint8_t do_mulh_b(int32_t n, int32_t m) +{ + return (n * m) >> 8; +} + +static inline uint16_t do_mulh_h(int32_t n, int32_t m) +{ + return (n * m) >> 16; +} + +static inline uint32_t do_mulh_s(int64_t n, int64_t m) +{ + return (n * m) >> 32; +} + +static inline uint64_t do_smulh_d(uint64_t n, uint64_t m) +{ + uint64_t lo, hi; + muls64(&lo, &hi, n, m); + return hi; +} + +static inline uint64_t do_umulh_d(uint64_t n, uint64_t m) +{ + uint64_t lo, hi; + mulu64(&lo, &hi, n, m); + return hi; +} + +DO_ZPZZ(sve_mul_zpzz_b, uint8_t, H1, DO_MUL) +DO_ZPZZ(sve_mul_zpzz_h, uint16_t, H1_2, DO_MUL) +DO_ZPZZ(sve_mul_zpzz_s, uint32_t, H1_4, DO_MUL) +DO_ZPZZ_D(sve_mul_zpzz_d, uint64_t, DO_MUL) + +DO_ZPZZ(sve_smulh_zpzz_b, int8_t, H1, do_mulh_b) +DO_ZPZZ(sve_smulh_zpzz_h, int16_t, H1_2, do_mulh_h) +DO_ZPZZ(sve_smulh_zpzz_s, int32_t, H1_4, do_mulh_s) +DO_ZPZZ_D(sve_smulh_zpzz_d, uint64_t, do_smulh_d) + +DO_ZPZZ(sve_umulh_zpzz_b, uint8_t, H1, do_mulh_b) +DO_ZPZZ(sve_umulh_zpzz_h, uint16_t, H1_2, do_mulh_h) +DO_ZPZZ(sve_umulh_zpzz_s, uint32_t, H1_4, do_mulh_s) +DO_ZPZZ_D(sve_umulh_zpzz_d, uint64_t, do_umulh_d) + +DO_ZPZZ(sve_sdiv_zpzz_s, int32_t, H1_4, DO_SDIV) +DO_ZPZZ_D(sve_sdiv_zpzz_d, int64_t, DO_SDIV) + +DO_ZPZZ(sve_udiv_zpzz_s, uint32_t, H1_4, DO_UDIV) +DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_UDIV) + +/* Note that all bits of the shift are significant + and not modulo the element size. */ +#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1)) +#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0) +#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0) + +DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR) +DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR) +DO_ZPZZ(sve_lsl_zpzz_b, uint8_t, H1_4, DO_LSL) + +DO_ZPZZ(sve_asr_zpzz_h, int16_t, H1, DO_ASR) +DO_ZPZZ(sve_lsr_zpzz_h, uint16_t, H1_2, DO_LSR) +DO_ZPZZ(sve_lsl_zpzz_h, uint16_t, H1_4, DO_LSL) + +DO_ZPZZ(sve_asr_zpzz_s, int32_t, H1, DO_ASR) +DO_ZPZZ(sve_lsr_zpzz_s, uint32_t, H1_2, DO_LSR) +DO_ZPZZ(sve_lsl_zpzz_s, uint32_t, H1_4, DO_LSL) + +DO_ZPZZ_D(sve_asr_zpzz_d, int64_t, DO_ASR) +DO_ZPZZ_D(sve_lsr_zpzz_d, uint64_t, DO_LSR) +DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL) + +#undef DO_ZPZZ +#undef DO_ZPZZ_D + +/* Three-operand expander, controlled by a predicate, in which the + * third operand is "wide". That is, for D = N op M, the same 64-bit + * value of M is used with all of the narrower values of N. + */ +#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3)); \ + TYPEW mm = *(TYPEW *)((char *)vm + i); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 7); \ + } \ +} + +DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR) +DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR) +DO_ZPZW(sve_lsl_zpzw_b, uint8_t, uint64_t, H1, DO_LSL) + +DO_ZPZW(sve_asr_zpzw_h, int16_t, uint64_t, H1_2, DO_ASR) +DO_ZPZW(sve_lsr_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSR) +DO_ZPZW(sve_lsl_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSL) + +DO_ZPZW(sve_asr_zpzw_s, int32_t, uint64_t, H1_4, DO_ASR) +DO_ZPZW(sve_lsr_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSR) +DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL) + +#undef DO_ZPZW + +/* Fully general two-operand expander, controlled by a predicate. + */ +#define DO_ZPZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +/* Similarly, specialized for 64-bit operands. */ +#define DO_ZPZ_D(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i]; \ + d[i] = OP(nn); \ + } \ + } \ +} + +#define DO_CLS_B(N) (clrsb32(N) - 24) +#define DO_CLS_H(N) (clrsb32(N) - 16) + +DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B) +DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H) +DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32) +DO_ZPZ_D(sve_cls_d, int64_t, clrsb64) + +#define DO_CLZ_B(N) (clz32(N) - 24) +#define DO_CLZ_H(N) (clz32(N) - 16) + +DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B) +DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H) +DO_ZPZ(sve_clz_s, uint32_t, H1_4, clz32) +DO_ZPZ_D(sve_clz_d, uint64_t, clz64) + +DO_ZPZ(sve_cnt_zpz_b, uint8_t, H1, ctpop8) +DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16) +DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32) +DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64) + +#define DO_CNOT(N) (N == 0) + +DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT) +DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT) +DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT) +DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT) + +#ifdef _MSC_VER +#define DO_FABS16(N) (N & ((uint16_t)-1 >> 1)) +#define DO_FABS32(N) (N & ((uint32_t)-1 >> 1)) +#define DO_FABS64(N) (N & ((uint64_t)-1 >> 1)) + +DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS16) +DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS32) +DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS64) +#else +#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1)) + +DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) +DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) +DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) +#endif + +#ifdef _MSC_VER +#define DO_FNEG16(N) (N ^ ~((uint16_t)-1 >> 1)) +#define DO_FNEG32(N) (N ^ ~((uint32_t)-1 >> 1)) +#define DO_FNEG64(N) (N ^ ~((uint64_t)-1 >> 1)) + +DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG16) +DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG32) +DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG64) +#else +#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) + +DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) +DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) +DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) +#endif + +#define DO_NOT(N) (~N) + +DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) +DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT) +DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT) +DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT) + +#define DO_SXTB(N) ((int8_t)N) +#define DO_SXTH(N) ((int16_t)N) +#define DO_SXTS(N) ((int32_t)N) +#define DO_UXTB(N) ((uint8_t)N) +#define DO_UXTH(N) ((uint16_t)N) +#define DO_UXTS(N) ((uint32_t)N) + +DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB) +DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB) +DO_ZPZ(sve_sxth_s, uint32_t, H1_4, DO_SXTH) +DO_ZPZ_D(sve_sxtb_d, uint64_t, DO_SXTB) +DO_ZPZ_D(sve_sxth_d, uint64_t, DO_SXTH) +DO_ZPZ_D(sve_sxtw_d, uint64_t, DO_SXTS) + +DO_ZPZ(sve_uxtb_h, uint16_t, H1_2, DO_UXTB) +DO_ZPZ(sve_uxtb_s, uint32_t, H1_4, DO_UXTB) +DO_ZPZ(sve_uxth_s, uint32_t, H1_4, DO_UXTH) +DO_ZPZ_D(sve_uxtb_d, uint64_t, DO_UXTB) +DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH) +DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS) + +#ifdef _MSC_VER +#define DO_ABS(N) (N < 0 ? (0 - N) : N) +#else +#define DO_ABS(N) (N < 0 ? -N : N) +#endif + +DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS) +DO_ZPZ(sve_abs_h, int16_t, H1_2, DO_ABS) +DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS) +DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS) + +#ifdef _MSC_VER +#define DO_NEG(N) (0 - N) +#else +#define DO_NEG(N) (-N) +#endif + +DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG) +DO_ZPZ(sve_neg_h, uint16_t, H1_2, DO_NEG) +DO_ZPZ(sve_neg_s, uint32_t, H1_4, DO_NEG) +DO_ZPZ_D(sve_neg_d, uint64_t, DO_NEG) + +DO_ZPZ(sve_revb_h, uint16_t, H1_2, bswap16) +DO_ZPZ(sve_revb_s, uint32_t, H1_4, bswap32) +DO_ZPZ_D(sve_revb_d, uint64_t, bswap64) + +DO_ZPZ(sve_revh_s, uint32_t, H1_4, hswap32) +DO_ZPZ_D(sve_revh_d, uint64_t, hswap64) + +DO_ZPZ_D(sve_revw_d, uint64_t, wswap64) + +DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8) +DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16) +DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32) +DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64) + +/* Three-operand expander, unpredicated, in which the third operand is "wide". + */ +#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + TYPEW mm = *(TYPEW *)((char *)vm + i); \ + do { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + i += sizeof(TYPE); \ + } while (i & 7); \ + } \ +} + +DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR) +DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR) +DO_ZZW(sve_lsl_zzw_b, uint8_t, uint64_t, H1, DO_LSL) + +DO_ZZW(sve_asr_zzw_h, int16_t, uint64_t, H1_2, DO_ASR) +DO_ZZW(sve_lsr_zzw_h, uint16_t, uint64_t, H1_2, DO_LSR) +DO_ZZW(sve_lsl_zzw_h, uint16_t, uint64_t, H1_2, DO_LSL) + +DO_ZZW(sve_asr_zzw_s, int32_t, uint64_t, H1_4, DO_ASR) +DO_ZZW(sve_lsr_zzw_s, uint32_t, uint64_t, H1_4, DO_LSR) +DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL) + +#undef DO_ZZW + +#undef DO_CLS_B +#undef DO_CLS_H +#undef DO_CLZ_B +#undef DO_CLZ_H +#undef DO_CNOT +#undef DO_FABS +#undef DO_FNEG +#undef DO_ABS +#undef DO_NEG +#undef DO_ZPZ +#undef DO_ZPZ_D + +/* Two-operand reduction expander, controlled by a predicate. + * The difference between TYPERED and TYPERET has to do with + * sign-extension. E.g. for SMAX, TYPERED must be signed, + * but TYPERET must be unsigned so that e.g. a 32-bit value + * is not sign-extended to the ABI uint64_t return type. + */ +/* ??? If we were to vectorize this by hand the reduction ordering + * would change. For integer operands, this is perfectly fine. + */ +#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \ +uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPERED ret = INIT; \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPEELT nn = *(TYPEELT *)((char *)vn + H(i)); \ + ret = OP(ret, nn); \ + } \ + i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \ + } while (i & 15); \ + } \ + return (TYPERET)ret; \ +} + +#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \ +uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPEE *n = vn; \ + uint8_t *pg = vg; \ + TYPER ret = INIT; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPEE nn = n[i]; \ + ret = OP(ret, nn); \ + } \ + } \ + return ret; \ +} + +DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR) +DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR) +DO_VPZ(sve_orv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_ORR) +DO_VPZ_D(sve_orv_d, uint64_t, uint64_t, 0, DO_ORR) + +DO_VPZ(sve_eorv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_EOR) +DO_VPZ(sve_eorv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_EOR) +DO_VPZ(sve_eorv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_EOR) +DO_VPZ_D(sve_eorv_d, uint64_t, uint64_t, 0, DO_EOR) + +DO_VPZ(sve_andv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_AND) +DO_VPZ(sve_andv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_AND) +DO_VPZ(sve_andv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_AND) +DO_VPZ_D(sve_andv_d, uint64_t, uint64_t, -1, DO_AND) + +DO_VPZ(sve_saddv_b, int8_t, uint64_t, uint64_t, H1, 0, DO_ADD) +DO_VPZ(sve_saddv_h, int16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD) +DO_VPZ(sve_saddv_s, int32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD) + +DO_VPZ(sve_uaddv_b, uint8_t, uint64_t, uint64_t, H1, 0, DO_ADD) +DO_VPZ(sve_uaddv_h, uint16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD) +DO_VPZ(sve_uaddv_s, uint32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD) +DO_VPZ_D(sve_uaddv_d, uint64_t, uint64_t, 0, DO_ADD) + +DO_VPZ(sve_smaxv_b, int8_t, int8_t, uint8_t, H1, INT8_MIN, DO_MAX) +DO_VPZ(sve_smaxv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MIN, DO_MAX) +DO_VPZ(sve_smaxv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MIN, DO_MAX) +DO_VPZ_D(sve_smaxv_d, int64_t, int64_t, INT64_MIN, DO_MAX) + +DO_VPZ(sve_umaxv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_MAX) +DO_VPZ(sve_umaxv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_MAX) +DO_VPZ(sve_umaxv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_MAX) +DO_VPZ_D(sve_umaxv_d, uint64_t, uint64_t, 0, DO_MAX) + +DO_VPZ(sve_sminv_b, int8_t, int8_t, uint8_t, H1, INT8_MAX, DO_MIN) +DO_VPZ(sve_sminv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MAX, DO_MIN) +DO_VPZ(sve_sminv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MAX, DO_MIN) +DO_VPZ_D(sve_sminv_d, int64_t, int64_t, INT64_MAX, DO_MIN) + +DO_VPZ(sve_uminv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_MIN) +DO_VPZ(sve_uminv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_MIN) +DO_VPZ(sve_uminv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_MIN) +DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN) + +#undef DO_VPZ +#undef DO_VPZ_D + +/* Two vector operand, one scalar operand, unpredicated. */ +#define DO_ZZI(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \ + TYPE s = s64, *d = vd, *n = vn; \ + for (i = 0; i < opr_sz; ++i) { \ + d[i] = OP(n[i], s); \ + } \ +} + +#define DO_SUBR(X, Y) (Y - X) + +DO_ZZI(sve_subri_b, uint8_t, DO_SUBR) +DO_ZZI(sve_subri_h, uint16_t, DO_SUBR) +DO_ZZI(sve_subri_s, uint32_t, DO_SUBR) +DO_ZZI(sve_subri_d, uint64_t, DO_SUBR) + +DO_ZZI(sve_smaxi_b, int8_t, DO_MAX) +DO_ZZI(sve_smaxi_h, int16_t, DO_MAX) +DO_ZZI(sve_smaxi_s, int32_t, DO_MAX) +DO_ZZI(sve_smaxi_d, int64_t, DO_MAX) + +DO_ZZI(sve_smini_b, int8_t, DO_MIN) +DO_ZZI(sve_smini_h, int16_t, DO_MIN) +DO_ZZI(sve_smini_s, int32_t, DO_MIN) +DO_ZZI(sve_smini_d, int64_t, DO_MIN) + +DO_ZZI(sve_umaxi_b, uint8_t, DO_MAX) +DO_ZZI(sve_umaxi_h, uint16_t, DO_MAX) +DO_ZZI(sve_umaxi_s, uint32_t, DO_MAX) +DO_ZZI(sve_umaxi_d, uint64_t, DO_MAX) + +DO_ZZI(sve_umini_b, uint8_t, DO_MIN) +DO_ZZI(sve_umini_h, uint16_t, DO_MIN) +DO_ZZI(sve_umini_s, uint32_t, DO_MIN) +DO_ZZI(sve_umini_d, uint64_t, DO_MIN) + +#undef DO_ZZI + +#undef DO_AND +#undef DO_ORR +#undef DO_EOR +#undef DO_BIC +#undef DO_ADD +#undef DO_SUB +#undef DO_MAX +#undef DO_MIN +#undef DO_ABD +#undef DO_MUL +#undef DO_DIV +#undef DO_ASR +#undef DO_LSR +#undef DO_LSL +#undef DO_SUBR + +/* Similar to the ARM LastActiveElement pseudocode function, except the + result is multiplied by the element size. This includes the not found + indication; e.g. not found for esz=3 is -8. */ +static intptr_t last_active_element(uint64_t *g, intptr_t words, intptr_t esz) +{ + uint64_t mask = pred_esz_masks[esz]; + intptr_t i = words; + + do { + uint64_t this_g = g[--i] & mask; + if (this_g) { + return i * 64 + (63 - clz64(this_g)); + } + } while (i > 0); + return (intptr_t)-1 << esz; +} + +uint32_t HELPER(sve_pfirst)(void *vd, void *vg, uint32_t words) +{ + uint32_t flags = PREDTEST_INIT; + uint64_t *d = vd, *g = vg; + intptr_t i = 0; + + do { + uint64_t this_d = d[i]; + uint64_t this_g = g[i]; + + if (this_g) { + if (!(flags & 4)) { + /* Set in D the first bit of G. */ +#ifdef _MSC_VER + this_d |= this_g & (0 - this_g); +#else + this_d |= this_g & -this_g; +#endif + d[i] = this_d; + } + flags = iter_predtest_fwd(this_d, this_g, flags); + } + } while (++i < words); + + return flags; +} + +uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc) +{ + intptr_t words = extract32(pred_desc, 0, SIMD_OPRSZ_BITS); + intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + uint32_t flags = PREDTEST_INIT; + uint64_t *d = vd, *g = vg, esz_mask; + intptr_t i, next; + + next = last_active_element(vd, words, esz) + (1ULL << esz); + esz_mask = pred_esz_masks[esz]; + + /* Similar to the pseudocode for pnext, but scaled by ESZ + so that we find the correct bit. */ + if (next < words * 64) { + uint64_t mask = -1; + + if (next & 63) { + mask = ~((1ull << (next & 63)) - 1); + next &= -64; + } + do { + uint64_t this_g = g[next / 64] & esz_mask & mask; + if (this_g != 0) { + next = (next & -64) + ctz64(this_g); + break; + } + next += 64; + mask = -1; + } while (next < words * 64); + } + + i = 0; + do { + uint64_t this_d = 0; + if (i == next / 64) { + this_d = 1ull << (next & 63); + } + d[i] = this_d; + flags = iter_predtest_fwd(this_d, g[i] & esz_mask, flags); + } while (++i < words); + + return flags; +} + +/* Store zero into every active element of Zd. We will use this for two + * and three-operand predicated instructions for which logic dictates a + * zero result. In particular, logical shift by element size, which is + * otherwise undefined on the host. + * + * For element sizes smaller than uint64_t, we use tables to expand + * the N bits of the controlling predicate to a byte mask, and clear + * those bytes. + */ +void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] &= ~expand_pred_b(pg[H1(i)]); + } +} + +void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] &= ~expand_pred_h(pg[H1(i)]); + } +} + +void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] &= ~expand_pred_s(pg[H1(i)]); + } +} + +void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + if (pg[H1(i)] & 1) { + d[i] = 0; + } + } +} + +/* Copy Zn into Zd, and store zero into inactive elements. */ +void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] & expand_pred_b(pg[H1(i)]); + } +} + +void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] & expand_pred_h(pg[H1(i)]); + } +} + +void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] & expand_pred_s(pg[H1(i)]); + } +} + +void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { +#ifdef _MSC_VER + d[i] = n[i] & ((uint64_t)0 - (uint64_t)(pg[H1(i)] & 1)); +#else + d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1); +#endif + } +} + +/* Three-operand expander, immediate operand, controlled by a predicate. + */ +#define DO_ZPZI(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPE imm = simd_data(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, imm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +/* Similarly, specialized for 64-bit operands. */ +#define DO_ZPZI_D(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn; \ + TYPE imm = simd_data(desc); \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i]; \ + d[i] = OP(nn, imm); \ + } \ + } \ +} + +#define DO_SHR(N, M) (N >> M) +#define DO_SHL(N, M) (N << M) + +/* Arithmetic shift right for division. This rounds negative numbers + toward zero as per signed division. Therefore before shifting, + when N is negative, add 2**M-1. */ +#ifdef _MSC_VER + #define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M) +#else + #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M) +#endif + +DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR) +DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR) +DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR) +DO_ZPZI_D(sve_asr_zpzi_d, int64_t, DO_SHR) + +DO_ZPZI(sve_lsr_zpzi_b, uint8_t, H1, DO_SHR) +DO_ZPZI(sve_lsr_zpzi_h, uint16_t, H1_2, DO_SHR) +DO_ZPZI(sve_lsr_zpzi_s, uint32_t, H1_4, DO_SHR) +DO_ZPZI_D(sve_lsr_zpzi_d, uint64_t, DO_SHR) + +DO_ZPZI(sve_lsl_zpzi_b, uint8_t, H1, DO_SHL) +DO_ZPZI(sve_lsl_zpzi_h, uint16_t, H1_2, DO_SHL) +DO_ZPZI(sve_lsl_zpzi_s, uint32_t, H1_4, DO_SHL) +DO_ZPZI_D(sve_lsl_zpzi_d, uint64_t, DO_SHL) + +DO_ZPZI(sve_asrd_b, int8_t, H1, DO_ASRD) +DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD) +DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD) +DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD) + +#undef DO_SHR +#undef DO_SHL +#undef DO_ASRD +#undef DO_ZPZI +#undef DO_ZPZI_D + +/* Fully general four-operand expander, controlled by a predicate. + */ +#define DO_ZPZZZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \ + void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + TYPE aa = *(TYPE *)((char *)va + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +/* Similarly, specialized for 64-bit operands. */ +#define DO_ZPZZZ_D(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \ + void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *a = va, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE aa = a[i], nn = n[i], mm = m[i]; \ + d[i] = OP(aa, nn, mm); \ + } \ + } \ +} + +#define DO_MLA(A, N, M) (A + N * M) +#define DO_MLS(A, N, M) (A - N * M) + +DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA) +DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS) + +DO_ZPZZZ(sve_mla_h, uint16_t, H1_2, DO_MLA) +DO_ZPZZZ(sve_mls_h, uint16_t, H1_2, DO_MLS) + +DO_ZPZZZ(sve_mla_s, uint32_t, H1_4, DO_MLA) +DO_ZPZZZ(sve_mls_s, uint32_t, H1_4, DO_MLS) + +DO_ZPZZZ_D(sve_mla_d, uint64_t, DO_MLA) +DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS) + +#undef DO_MLA +#undef DO_MLS +#undef DO_ZPZZZ +#undef DO_ZPZZZ_D + +void HELPER(sve_index_b)(void *vd, uint32_t start, + uint32_t incr, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint8_t *d = vd; + for (i = 0; i < opr_sz; i += 1) { + d[H1(i)] = start + i * incr; + } +} + +void HELPER(sve_index_h)(void *vd, uint32_t start, + uint32_t incr, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 2; + uint16_t *d = vd; + for (i = 0; i < opr_sz; i += 1) { + d[H2(i)] = start + i * incr; + } +} + +void HELPER(sve_index_s)(void *vd, uint32_t start, + uint32_t incr, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 4; + uint32_t *d = vd; + for (i = 0; i < opr_sz; i += 1) { + d[H4(i)] = start + i * incr; + } +} + +void HELPER(sve_index_d)(void *vd, uint64_t start, + uint64_t incr, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + for (i = 0; i < opr_sz; i += 1) { + d[i] = start + i * incr; + } +} + +void HELPER(sve_adr_p32)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 4; + uint32_t sh = simd_data(desc); + uint32_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] + (m[i] << sh); + } +} + +void HELPER(sve_adr_p64)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t sh = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] + (m[i] << sh); + } +} + +void HELPER(sve_adr_s32)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t sh = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] + ((uint64_t)(int32_t)m[i] << sh); + } +} + +void HELPER(sve_adr_u32)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t sh = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] + ((uint64_t)(uint32_t)m[i] << sh); + } +} + +void HELPER(sve_fexpa_h)(void *vd, void *vn, uint32_t desc) +{ + /* These constants are cut-and-paste directly from the ARM pseudocode. */ + static const uint16_t coeff[] = { + 0x0000, 0x0016, 0x002d, 0x0045, 0x005d, 0x0075, 0x008e, 0x00a8, + 0x00c2, 0x00dc, 0x00f8, 0x0114, 0x0130, 0x014d, 0x016b, 0x0189, + 0x01a8, 0x01c8, 0x01e8, 0x0209, 0x022b, 0x024e, 0x0271, 0x0295, + 0x02ba, 0x02e0, 0x0306, 0x032e, 0x0356, 0x037f, 0x03a9, 0x03d4, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / 2; + uint16_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz; i++) { + uint16_t nn = n[i]; + intptr_t idx = extract32(nn, 0, 5); + uint16_t exp = extract32(nn, 5, 5); + d[i] = coeff[idx] | (exp << 10); + } +} + +void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc) +{ + /* These constants are cut-and-paste directly from the ARM pseudocode. */ + static const uint32_t coeff[] = { + 0x000000, 0x0164d2, 0x02cd87, 0x043a29, + 0x05aac3, 0x071f62, 0x08980f, 0x0a14d5, + 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, + 0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d, + 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, + 0x1ef532, 0x20b051, 0x227043, 0x243516, + 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, + 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, + 0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b, + 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, + 0x45672a, 0x478d75, 0x49b9be, 0x4bec15, + 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, + 0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5, + 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, + 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, + 0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / 4; + uint32_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz; i++) { + uint32_t nn = n[i]; + intptr_t idx = extract32(nn, 0, 6); + uint32_t exp = extract32(nn, 6, 8); + d[i] = coeff[idx] | (exp << 23); + } +} + +void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) +{ + /* These constants are cut-and-paste directly from the ARM pseudocode. */ + static const uint64_t coeff[] = { + 0x0000000000000ull, 0x02C9A3E778061ull, 0x059B0D3158574ull, + 0x0874518759BC8ull, 0x0B5586CF9890Full, 0x0E3EC32D3D1A2ull, + 0x11301D0125B51ull, 0x1429AAEA92DE0ull, 0x172B83C7D517Bull, + 0x1A35BEB6FCB75ull, 0x1D4873168B9AAull, 0x2063B88628CD6ull, + 0x2387A6E756238ull, 0x26B4565E27CDDull, 0x29E9DF51FDEE1ull, + 0x2D285A6E4030Bull, 0x306FE0A31B715ull, 0x33C08B26416FFull, + 0x371A7373AA9CBull, 0x3A7DB34E59FF7ull, 0x3DEA64C123422ull, + 0x4160A21F72E2Aull, 0x44E086061892Dull, 0x486A2B5C13CD0ull, + 0x4BFDAD5362A27ull, 0x4F9B2769D2CA7ull, 0x5342B569D4F82ull, + 0x56F4736B527DAull, 0x5AB07DD485429ull, 0x5E76F15AD2148ull, + 0x6247EB03A5585ull, 0x6623882552225ull, 0x6A09E667F3BCDull, + 0x6DFB23C651A2Full, 0x71F75E8EC5F74ull, 0x75FEB564267C9ull, + 0x7A11473EB0187ull, 0x7E2F336CF4E62ull, 0x82589994CCE13ull, + 0x868D99B4492EDull, 0x8ACE5422AA0DBull, 0x8F1AE99157736ull, + 0x93737B0CDC5E5ull, 0x97D829FDE4E50ull, 0x9C49182A3F090ull, + 0xA0C667B5DE565ull, 0xA5503B23E255Dull, 0xA9E6B5579FDBFull, + 0xAE89F995AD3ADull, 0xB33A2B84F15FBull, 0xB7F76F2FB5E47ull, + 0xBCC1E904BC1D2ull, 0xC199BDD85529Cull, 0xC67F12E57D14Bull, + 0xCB720DCEF9069ull, 0xD072D4A07897Cull, 0xD5818DCFBA487ull, + 0xDA9E603DB3285ull, 0xDFC97337B9B5Full, 0xE502EE78B3FF6ull, + 0xEA4AFA2A490DAull, 0xEFA1BEE615A27ull, 0xF50765B6E4540ull, + 0xFA7C1819E90D8ull, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz; i++) { + uint64_t nn = n[i]; + intptr_t idx = extract32(nn, 0, 6); + uint64_t exp = extract32(nn, 6, 11); + d[i] = coeff[idx] | (exp << 52); + } +} + +void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 2; + uint16_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + uint16_t nn = n[i]; + uint16_t mm = m[i]; + if (mm & 1) { + nn = float16_one; + } + d[i] = nn ^ (mm & 2) << 14; + } +} + +void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 4; + uint32_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + uint32_t nn = n[i]; + uint32_t mm = m[i]; + if (mm & 1) { + nn = float32_one; + } + d[i] = nn ^ (mm & 2) << 30; + } +} + +void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + uint64_t mm = m[i]; + if (mm & 1) { + nn = float64_one; + } + d[i] = nn ^ (mm & 2) << 62; + } +} + +/* + * Signed saturating addition with scalar operand. + */ + +void HELPER(sve_sqaddi_b)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(int8_t)) { + int r = *(int8_t *)((char *)a + i) + b; + if (r > INT8_MAX) { + r = INT8_MAX; + } else if (r < INT8_MIN) { + r = INT8_MIN; + } + *(int8_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_sqaddi_h)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(int16_t)) { + int r = *(int16_t *)((char *)a + i) + b; + if (r > INT16_MAX) { + r = INT16_MAX; + } else if (r < INT16_MIN) { + r = INT16_MIN; + } + *(int16_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_sqaddi_s)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(int32_t)) { + int64_t r = *(int32_t *)((char *)a + i) + b; + if (r > INT32_MAX) { + r = INT32_MAX; + } else if (r < INT32_MIN) { + r = INT32_MIN; + } + *(int32_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_sqaddi_d)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(int64_t)) { + int64_t ai = *(int64_t *)((char *)a + i); + int64_t r = ai + b; + if (((r ^ ai) & ~(ai ^ b)) < 0) { + /* Signed overflow. */ + r = (r < 0 ? INT64_MAX : INT64_MIN); + } + *(int64_t *)((char *)d + i) = r; + } +} + +/* + * Unsigned saturating addition with scalar operand. + */ + +void HELPER(sve_uqaddi_b)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + int r = *(uint8_t *)((char *)a + i) + b; + if (r > UINT8_MAX) { + r = UINT8_MAX; + } else if (r < 0) { + r = 0; + } + *(uint8_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_uqaddi_h)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + int r = *(uint16_t *)((char *)a + i) + b; + if (r > UINT16_MAX) { + r = UINT16_MAX; + } else if (r < 0) { + r = 0; + } + *(uint16_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_uqaddi_s)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + int64_t r = *(uint32_t *)((char *)a + i) + b; + if (r > UINT32_MAX) { + r = UINT32_MAX; + } else if (r < 0) { + r = 0; + } + *(uint32_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_uqaddi_d)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint64_t r = *(uint64_t *)((char *)a + i) + b; + if (r < b) { + r = UINT64_MAX; + } + *(uint64_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint64_t ai = *(uint64_t *)((char *)a + i); + *(uint64_t *)((char *)d + i) = (ai < b ? 0 : ai - b); + } +} + +/* Two operand predicated copy immediate with merge. All valid immediates + * can fit within 17 signed bits in the simd_data field. + */ +void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, + uint64_t mm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + mm = dup_const(MO_8, mm); + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + uint64_t pp = expand_pred_b(pg[H1(i)]); + d[i] = (mm & pp) | (nn & ~pp); + } +} + +void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, + uint64_t mm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + mm = dup_const(MO_16, mm); + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + uint64_t pp = expand_pred_h(pg[H1(i)]); + d[i] = (mm & pp) | (nn & ~pp); + } +} + +void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, + uint64_t mm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + mm = dup_const(MO_32, mm); + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + uint64_t pp = expand_pred_s(pg[H1(i)]); + d[i] = (mm & pp) | (nn & ~pp); + } +} + +void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg, + uint64_t mm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + d[i] = (pg[H1(i)] & 1 ? mm : nn); + } +} + +void HELPER(sve_cpy_z_b)(void *vd, void *vg, uint64_t val, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + + val = dup_const(MO_8, val); + for (i = 0; i < opr_sz; i += 1) { + d[i] = val & expand_pred_b(pg[H1(i)]); + } +} + +void HELPER(sve_cpy_z_h)(void *vd, void *vg, uint64_t val, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + + val = dup_const(MO_16, val); + for (i = 0; i < opr_sz; i += 1) { + d[i] = val & expand_pred_h(pg[H1(i)]); + } +} + +void HELPER(sve_cpy_z_s)(void *vd, void *vg, uint64_t val, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + + val = dup_const(MO_32, val); + for (i = 0; i < opr_sz; i += 1) { + d[i] = val & expand_pred_s(pg[H1(i)]); + } +} + +void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + d[i] = (pg[H1(i)] & 1 ? val : 0); + } +} + +/* Big-endian hosts need to frob the byte indices. If the copy + * happens to be 8-byte aligned, then no frobbing necessary. + */ +static void swap_memmove(void *vd, void *vs, size_t n) +{ + uintptr_t d = (uintptr_t)vd; + uintptr_t s = (uintptr_t)vs; + uintptr_t o = (d | s | n) & 7; + size_t i; + +#ifndef HOST_WORDS_BIGENDIAN + o = 0; +#endif + switch (o) { + case 0: + memmove(vd, vs, n); + break; + + case 4: + if (d < s || d >= s + n) { + for (i = 0; i < n; i += 4) { + *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i); + } + } else { + for (i = n; i > 0; ) { + i -= 4; + *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i); + } + } + break; + + case 2: + case 6: + if (d < s || d >= s + n) { + for (i = 0; i < n; i += 2) { + *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i); + } + } else { + for (i = n; i > 0; ) { + i -= 2; + *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i); + } + } + break; + + default: + if (d < s || d >= s + n) { + for (i = 0; i < n; i++) { + *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i); + } + } else { + for (i = n; i > 0; ) { + i -= 1; + *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i); + } + } + break; + } +} + +/* Similarly for memset of 0. */ +static void swap_memzero(void *vd, size_t n) +{ + uintptr_t d = (uintptr_t)vd; + uintptr_t o = (d | n) & 7; + size_t i; + + /* Usually, the first bit of a predicate is set, so N is 0. */ + if (likely(n == 0)) { + return; + } + +#ifndef HOST_WORDS_BIGENDIAN + o = 0; +#endif + switch (o) { + case 0: + memset(vd, 0, n); + break; + + case 4: + for (i = 0; i < n; i += 4) { + *(uint32_t *)H1_4(d + i) = 0; + } + break; + + case 2: + case 6: + for (i = 0; i < n; i += 2) { + *(uint16_t *)H1_2(d + i) = 0; + } + break; + + default: + for (i = 0; i < n; i++) { + *(uint8_t *)H1(d + i) = 0; + } + break; + } +} + +void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t opr_sz = simd_oprsz(desc); + size_t n_ofs = simd_data(desc); + size_t n_siz = opr_sz - n_ofs; + + if (vd != vm) { + swap_memmove(vd, (char *)vn + n_ofs, n_siz); + swap_memmove((char *)vd + n_siz, vm, n_ofs); + } else if (vd != vn) { + swap_memmove((char *)vd + n_siz, vd, n_ofs); + swap_memmove(vd, (char *)vn + n_ofs, n_siz); + } else { + /* vd == vn == vm. Need temp space. */ + ARMVectorReg tmp; + swap_memmove(&tmp, vm, n_ofs); + swap_memmove(vd, (char *)vd + n_ofs, n_siz); + memcpy((char *)vd + n_siz, &tmp, n_ofs); + } +} + +#define DO_INSR(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \ +{ \ + intptr_t opr_sz = simd_oprsz(desc); \ + swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \ + *(TYPE *)((char *)vd + H(0)) = val; \ +} + +DO_INSR(sve_insr_b, uint8_t, H1) +DO_INSR(sve_insr_h, uint16_t, H1_2) +DO_INSR(sve_insr_s, uint32_t, H1_4) +DO_INSR(sve_insr_d, uint64_t, ) + +#undef DO_INSR + +void HELPER(sve_rev_b)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { + uint64_t f = *(uint64_t *)((char *)vn + i); + uint64_t b = *(uint64_t *)((char *)vn + j); + *(uint64_t *)((char *)vd + i) = bswap64(b); + *(uint64_t *)((char *)vd + j) = bswap64(f); + } +} + +void HELPER(sve_rev_h)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { + uint64_t f = *(uint64_t *)((char *)vn + i); + uint64_t b = *(uint64_t *)((char *)vn + j); + *(uint64_t *)((char *)vd + i) = hswap64(b); + *(uint64_t *)((char *)vd + j) = hswap64(f); + } +} + +void HELPER(sve_rev_s)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { + uint64_t f = *(uint64_t *)((char *)vn + i); + uint64_t b = *(uint64_t *)((char *)vn + j); + *(uint64_t *)((char *)vd + i) = rol64(b, 32); + *(uint64_t *)((char *)vd + j) = rol64(f, 32); + } +} + +void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { + uint64_t f = *(uint64_t *)((char *)vn + i); + uint64_t b = *(uint64_t *)((char *)vn + j); + *(uint64_t *)((char *)vd + i) = b; + *(uint64_t *)((char *)vd + j) = f; + } +} + +#define DO_TBL(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + uintptr_t elem = opr_sz / sizeof(TYPE); \ + TYPE *d = vd, *n = vn, *m = vm; \ + ARMVectorReg tmp; \ + if (unlikely(vd == vn)) { \ + n = memcpy(&tmp, vn, opr_sz); \ + } \ + for (i = 0; i < elem; i++) { \ + TYPE j = m[H(i)]; \ + d[H(i)] = j < elem ? n[H(j)] : 0; \ + } \ +} + +DO_TBL(sve_tbl_b, uint8_t, H1) +DO_TBL(sve_tbl_h, uint16_t, H2) +DO_TBL(sve_tbl_s, uint32_t, H4) +DO_TBL(sve_tbl_d, uint64_t, ) + +#undef TBL + +#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPED *d = vd; \ + TYPES *n = vn; \ + ARMVectorReg tmp; \ + if (unlikely((char *)vn - (char *)vd < opr_sz)) { \ + n = memcpy(&tmp, n, opr_sz / 2); \ + } \ + for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \ + d[HD(i)] = n[HS(i)]; \ + } \ +} + +DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1) +DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2) +DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4) + +DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1) +DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2) +DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4) + +#undef DO_UNPK + +/* Mask of bits included in the even numbered predicates of width esz. + * We also use this for expand_bits/compress_bits, and so extend the + * same pattern out to 16-bit units. + */ +static const uint64_t even_bit_esz_masks[5] = { + 0x5555555555555555ull, + 0x3333333333333333ull, + 0x0f0f0f0f0f0f0f0full, + 0x00ff00ff00ff00ffull, + 0x0000ffff0000ffffull, +}; + +/* Zero-extend units of 2**N bits to units of 2**(N+1) bits. + * For N==0, this corresponds to the operation that in qemu/bitops.h + * we call half_shuffle64; this algorithm is from Hacker's Delight, + * section 7-2 Shuffling Bits. + */ +static uint64_t expand_bits(uint64_t x, int n) +{ + int i; + + x &= 0xffffffffu; + for (i = 4; i >= n; i--) { + int sh = 1 << i; + x = ((x << sh) | x) & even_bit_esz_masks[i]; + } + return x; +} + +/* Compress units of 2**(N+1) bits to units of 2**N bits. + * For N==0, this corresponds to the operation that in qemu/bitops.h + * we call half_unshuffle64; this algorithm is from Hacker's Delight, + * section 7-2 Shuffling Bits, where it is called an inverse half shuffle. + */ +static uint64_t compress_bits(uint64_t x, int n) +{ + int i; + + for (i = n; i <= 4; i++) { + int sh = 1 << i; + x &= even_bit_esz_masks[i]; + x = (x >> sh) | x; + } + return x & 0xffffffffu; +} + +void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1); + uint64_t *d = vd; + intptr_t i; + + if (oprsz <= 8) { + uint64_t nn = *(uint64_t *)vn; + uint64_t mm = *(uint64_t *)vm; + int half = 4 * oprsz; + + nn = extract64(nn, high * half, half); + mm = extract64(mm, high * half, half); + nn = expand_bits(nn, esz); + mm = expand_bits(mm, esz); + d[0] = nn + (mm << (1 << esz)); + } else { + ARMPredicateReg tmp_n, tmp_m; + + /* We produce output faster than we consume input. + Therefore we must be mindful of possible overlap. */ + if (((char *)vn - (char *)vd) < (uintptr_t)oprsz) { + vn = memcpy(&tmp_n, vn, oprsz); + } + if (((char *)vm - (char *)vd) < (uintptr_t)oprsz) { + vm = memcpy(&tmp_m, vm, oprsz); + } + if (high) { + high = oprsz >> 1; + } + + if ((high & 3) == 0) { + uint32_t *n = vn, *m = vm; + high >>= 2; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + uint64_t nn = n[H4(high + i)]; + uint64_t mm = m[H4(high + i)]; + + nn = expand_bits(nn, esz); + mm = expand_bits(mm, esz); + d[i] = nn + (mm << (1 << esz)); + } + } else { + uint8_t *n = vn, *m = vm; + uint16_t *d16 = vd; + + for (i = 0; i < oprsz / 2; i++) { + uint16_t nn = n[H1(high + i)]; + uint16_t mm = m[H1(high + i)]; + + nn = expand_bits(nn, esz); + mm = expand_bits(mm, esz); + d16[H2(i)] = nn + (mm << (1 << esz)); + } + } + } +} + +void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + int odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1) << esz; + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t l, h; + intptr_t i; + + if (oprsz <= 8) { + l = compress_bits(n[0] >> odd, esz); + h = compress_bits(m[0] >> odd, esz); + d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz); + } else { + ARMPredicateReg tmp_m; + intptr_t oprsz_16 = oprsz / 16; + + if (((char *)vm - (char *)vd) < (uintptr_t)oprsz) { + m = memcpy(&tmp_m, vm, oprsz); + } + + for (i = 0; i < oprsz_16; i++) { + l = n[2 * i + 0]; + h = n[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + d[i] = l + (h << 32); + } + + /* For VL which is not a power of 2, the results from M do not + align nicely with the uint64_t for D. Put the aligned results + from M into TMP_M and then copy it into place afterward. */ + if (oprsz & 15) { + d[i] = compress_bits(n[2 * i] >> odd, esz); + + for (i = 0; i < oprsz_16; i++) { + l = m[2 * i + 0]; + h = m[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + tmp_m.p[i] = l + (h << 32); + } + tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz); + + swap_memmove((char *)vd + oprsz / 2, &tmp_m, oprsz / 2); + } else { + for (i = 0; i < oprsz_16; i++) { + l = m[2 * i + 0]; + h = m[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + d[oprsz_16 + i] = l + (h << 32); + } + } + } +} + +void HELPER(sve_trn_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + uintptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + bool odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1); + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t mask; + int shr, shl; + intptr_t i; + + shl = 1 << esz; + shr = 0; + mask = even_bit_esz_masks[esz]; + if (odd) { + mask <<= shl; + shr = shl; + shl = 0; + } + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + uint64_t nn = (n[i] & mask) >> shr; + uint64_t mm = (m[i] & mask) << shl; + d[i] = nn + mm; + } +} + +/* Reverse units of 2**N bits. */ +static uint64_t reverse_bits_64(uint64_t x, int n) +{ + int i, sh; + + x = bswap64(x); + for (i = 2, sh = 4; i >= n; i--, sh >>= 1) { + uint64_t mask = even_bit_esz_masks[i]; + x = ((x & mask) << sh) | ((x >> sh) & mask); + } + return x; +} + +static uint8_t reverse_bits_8(uint8_t x, int n) +{ + static const uint8_t mask[3] = { 0x55, 0x33, 0x0f }; + int i, sh; + + for (i = 2, sh = 4; i >= n; i--, sh >>= 1) { + x = ((x & mask[i]) << sh) | ((x >> sh) & mask[i]); + } + return x; +} + +void HELPER(sve_rev_p)(void *vd, void *vn, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t i, oprsz_2 = oprsz / 2; + + if (oprsz <= 8) { + uint64_t l = *(uint64_t *)vn; + l = reverse_bits_64(l << (64 - 8 * oprsz), esz); + *(uint64_t *)vd = l; + } else if ((oprsz & 15) == 0) { + for (i = 0; i < oprsz_2; i += 8) { + intptr_t ih = oprsz - 8 - i; + uint64_t l = reverse_bits_64(*(uint64_t *)((char *)vn + i), esz); + uint64_t h = reverse_bits_64(*(uint64_t *)((char *)vn + ih), esz); + *(uint64_t *)((char *)vd + i) = h; + *(uint64_t *)((char *)vd + ih) = l; + } + } else { + for (i = 0; i < oprsz_2; i += 1) { + intptr_t il = H1(i); + intptr_t ih = H1(oprsz - 1 - i); + uint8_t l = reverse_bits_8(*(uint8_t *)((char *)vn + il), esz); + uint8_t h = reverse_bits_8(*(uint8_t *)((char *)vn + ih), esz); + *(uint8_t *)((char *)vd + il) = h; + *(uint8_t *)((char *)vd + ih) = l; + } + } +} + +void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1); + uint64_t *d = vd; + intptr_t i; + + if (oprsz <= 8) { + uint64_t nn = *(uint64_t *)vn; + int half = 4 * oprsz; + + nn = extract64(nn, high * half, half); + nn = expand_bits(nn, 0); + d[0] = nn; + } else { + ARMPredicateReg tmp_n; + + /* We produce output faster than we consume input. + Therefore we must be mindful of possible overlap. */ + if (((char *)vn - (char *)vd) < (uintptr_t)oprsz) { + vn = memcpy(&tmp_n, vn, oprsz); + } + if (high) { + high = oprsz >> 1; + } + + if ((high & 3) == 0) { + uint32_t *n = vn; + high >>= 2; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + uint64_t nn = n[H4(high + i)]; + d[i] = expand_bits(nn, 0); + } + } else { + uint16_t *d16 = vd; + uint8_t *n = vn; + + for (i = 0; i < oprsz / 2; i++) { + uint16_t nn = n[H1(high + i)]; + d16[H2(i)] = expand_bits(nn, 0); + } + } + } +} + +#define DO_ZIP(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t i, oprsz_2 = oprsz / 2; \ + ARMVectorReg tmp_n, tmp_m; \ + /* We produce output faster than we consume input. \ + Therefore we must be mindful of possible overlap. */ \ + if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) { \ + vn = memcpy(&tmp_n, vn, oprsz_2); \ + } \ + if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ + vm = memcpy(&tmp_m, vm, oprsz_2); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(2 * i + 0)) = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)((char *)vm + H(i)); \ + } \ +} + +DO_ZIP(sve_zip_b, uint8_t, H1) +DO_ZIP(sve_zip_h, uint16_t, H1_2) +DO_ZIP(sve_zip_s, uint32_t, H1_4) +DO_ZIP(sve_zip_d, uint64_t, ) + +#define DO_UZP(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t oprsz_2 = oprsz / 2; \ + intptr_t odd_ofs = simd_data(desc); \ + intptr_t i; \ + ARMVectorReg tmp_m; \ + if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ + vm = memcpy(&tmp_m, vm, oprsz); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(i)) = *(TYPE *)((char *)vn + H(2 * i + odd_ofs)); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(oprsz_2 + i)) = *(TYPE *)((char *)vm + H(2 * i + odd_ofs)); \ + } \ +} + +DO_UZP(sve_uzp_b, uint8_t, H1) +DO_UZP(sve_uzp_h, uint16_t, H1_2) +DO_UZP(sve_uzp_s, uint32_t, H1_4) +DO_UZP(sve_uzp_d, uint64_t, ) + +#define DO_TRN(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t odd_ofs = simd_data(desc); \ + intptr_t i; \ + for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \ + TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs)); \ + TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs)); \ + *(TYPE *)((char *)vd + H(i + 0)) = ae; \ + *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be; \ + } \ +} + +DO_TRN(sve_trn_b, uint8_t, H1) +DO_TRN(sve_trn_h, uint16_t, H1_2) +DO_TRN(sve_trn_s, uint32_t, H1_4) +DO_TRN(sve_trn_d, uint64_t, ) + +#undef DO_ZIP +#undef DO_UZP +#undef DO_TRN + +void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc) / 4; + uint32_t *d = vd, *n = vn; + uint8_t *pg = vg; + + for (i = j = 0; i < opr_sz; i++) { + if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) { + d[H4(j)] = n[H4(i)]; + j++; + } + } + for (; j < opr_sz; j++) { + d[H4(j)] = 0; + } +} + +void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + for (i = j = 0; i < opr_sz; i++) { + if (pg[H1(i)] & 1) { + d[j] = n[i]; + j++; + } + } + for (; j < opr_sz; j++) { + d[j] = 0; + } +} + +/* Similar to the ARM LastActiveElement pseudocode function, except the + * result is multiplied by the element size. This includes the not found + * indication; e.g. not found for esz=3 is -8. + */ +int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + + return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz); +} + +void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) +{ + intptr_t opr_sz = simd_oprsz(desc) / 8; + int esz = simd_data(desc); + uint64_t pg, first_g, last_g, len, mask = pred_esz_masks[esz]; + intptr_t i, first_i, last_i; + ARMVectorReg tmp; + + first_i = last_i = 0; + first_g = last_g = 0; + + /* Find the extent of the active elements within VG. */ + for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) { + pg = *(uint64_t *)((char *)vg + i) & mask; + if (pg) { + if (last_g == 0) { + last_g = pg; + last_i = i; + } + first_g = pg; + first_i = i; + } + } + + len = 0; + if (first_g != 0) { + first_i = first_i * 8 + ctz64(first_g); + last_i = last_i * 8 + 63 - clz64(last_g); + len = last_i - first_i + (1ULL << esz); + if (vd == vm) { + vm = memcpy(&tmp, vm, opr_sz * 8); + } + swap_memmove(vd, (char *)vn + first_i, len); + } + swap_memmove((char *)vd + len, vm, opr_sz * 8 - len); +} + +void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i], mm = m[i]; + uint64_t pp = expand_pred_b(pg[H1(i)]); + d[i] = (nn & pp) | (mm & ~pp); + } +} + +void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i], mm = m[i]; + uint64_t pp = expand_pred_h(pg[H1(i)]); + d[i] = (nn & pp) | (mm & ~pp); + } +} + +void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i], mm = m[i]; + uint64_t pp = expand_pred_s(pg[H1(i)]); + d[i] = (nn & pp) | (mm & ~pp); + } +} + +void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i], mm = m[i]; + d[i] = (pg[H1(i)] & 1 ? nn : mm); + } +} + +/* Two operand comparison controlled by a predicate. + * ??? It is very tempting to want to be able to expand this inline + * with x86 instructions, e.g. + * + * vcmpeqw zm, zn, %ymm0 + * vpmovmskb %ymm0, %eax + * and $0x5555, %eax + * and pg, %eax + * + * or even aarch64, e.g. + * + * // mask = 4000 1000 0400 0100 0040 0010 0004 0001 + * cmeq v0.8h, zn, zm + * and v0.8h, v0.8h, mask + * addv h0, v0.8h + * and v0.8b, pg + * + * However, coming up with an abstraction that allows vector inputs and + * a scalar output, and also handles the byte-ordering of sub-uint64_t + * scalar outputs, is tricky. + */ +#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \ +uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + out |= nn OP mm; \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ +} + +#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull) +#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) +#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull) + +DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==) +DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==) +DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==) +DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==) + +DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=) +DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=) +DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=) +DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=) + +DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >) +DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >) +DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >) +DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >) + +DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=) +DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=) +DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=) +DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=) + +DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >) +DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >) +DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >) +DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >) + +DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=) +DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=) +DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=) +DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=) + +#undef DO_CMP_PPZZ_B +#undef DO_CMP_PPZZ_H +#undef DO_CMP_PPZZ_S +#undef DO_CMP_PPZZ_D +#undef DO_CMP_PPZZ + +/* Similar, but the second source is "wide". */ +#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \ +uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + TYPEW mm = *(TYPEW *)((char *)vm + i - 8); \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= nn OP mm; \ + } while (i & 7); \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ +} + +#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \ + DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \ + DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull) +#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \ + DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull) + +DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==) +DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, int16_t, uint64_t, ==) +DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, int32_t, uint64_t, ==) + +DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=) +DO_CMP_PPZW_H(sve_cmpne_ppzw_h, int16_t, uint64_t, !=) +DO_CMP_PPZW_S(sve_cmpne_ppzw_s, int32_t, uint64_t, !=) + +DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >) +DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >) +DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >) + +DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=) +DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=) +DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=) + +DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >) +DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >) +DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >) + +DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=) +DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=) +DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=) + +DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <) +DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <) +DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <) + +DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=) +DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=) +DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=) + +DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <) +DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <) +DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <) + +DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=) +DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=) +DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=) + +#undef DO_CMP_PPZW_B +#undef DO_CMP_PPZW_H +#undef DO_CMP_PPZW_S +#undef DO_CMP_PPZW + +/* Similar, but the second source is immediate. */ +#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \ +uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + TYPE mm = simd_data(desc); \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= nn OP mm; \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ +} + +#define DO_CMP_PPZI_B(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZI_H(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull) +#define DO_CMP_PPZI_S(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) +#define DO_CMP_PPZI_D(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull) + +DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==) +DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==) +DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==) +DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==) + +DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=) +DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=) +DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=) +DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=) + +DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >) +DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >) +DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >) +DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >) + +DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=) +DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=) +DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=) +DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=) + +DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >) +DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >) +DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >) +DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >) + +DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=) +DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=) +DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=) +DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=) + +DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <) +DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <) +DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <) +DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <) + +DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=) +DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=) +DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=) +DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=) + +DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <) +DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <) +DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <) +DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <) + +DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=) +DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=) +DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=) +DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=) + +#undef DO_CMP_PPZI_B +#undef DO_CMP_PPZI_H +#undef DO_CMP_PPZI_S +#undef DO_CMP_PPZI_D +#undef DO_CMP_PPZI + +/* Similar to the ARM LastActive pseudocode function. */ +static bool last_active_pred(void *vd, void *vg, intptr_t oprsz) +{ + intptr_t i; + + for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) { + uint64_t pg = *(uint64_t *)((char *)vg + i); + if (pg) { + return (pow2floor(pg) & *(uint64_t *)((char *)vd + i)) != 0; + } + } + return 0; +} + +/* Compute a mask into RETB that is true for all G, up to and including + * (if after) or excluding (if !after) the first G & N. + * Return true if BRK found. + */ +static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, + bool brk, bool after) +{ + uint64_t b; + + if (brk) { + b = 0; + } else if ((g & n) == 0) { + /* For all G, no N are set; break not found. */ + b = g; + } else { + /* Break somewhere in N. Locate it. */ + b = g & n; /* guard true, pred true */ +#ifdef _MSC_VER + b = b & (0 - b); /* first such */ +#else + b = b & -b; /* first such */ +#endif + if (after) { + b = b | (b - 1); /* break after same */ + } else { + b = b - 1; /* break before same */ + } + brk = true; + } + + *retb = b; + return brk; +} + +/* Compute a zeroing BRK. */ +static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g, + intptr_t oprsz, bool after) +{ + bool brk = false; + intptr_t i; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + uint64_t this_b, this_g = g[i]; + + brk = compute_brk(&this_b, n[i], this_g, brk, after); + d[i] = this_b & this_g; + } +} + +/* Likewise, but also compute flags. */ +static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g, + intptr_t oprsz, bool after) +{ + uint32_t flags = PREDTEST_INIT; + bool brk = false; + intptr_t i; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + uint64_t this_b, this_d, this_g = g[i]; + + brk = compute_brk(&this_b, n[i], this_g, brk, after); + d[i] = this_d = this_b & this_g; + flags = iter_predtest_fwd(this_d, this_g, flags); + } + return flags; +} + +/* Compute a merging BRK. */ +static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g, + intptr_t oprsz, bool after) +{ + bool brk = false; + intptr_t i; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + uint64_t this_b, this_g = g[i]; + + brk = compute_brk(&this_b, n[i], this_g, brk, after); + d[i] = (this_b & this_g) | (d[i] & ~this_g); + } +} + +/* Likewise, but also compute flags. */ +static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g, + intptr_t oprsz, bool after) +{ + uint32_t flags = PREDTEST_INIT; + bool brk = false; + intptr_t i; + + for (i = 0; i < oprsz / 8; ++i) { + uint64_t this_b, this_d = d[i], this_g = g[i]; + + brk = compute_brk(&this_b, n[i], this_g, brk, after); + d[i] = this_d = (this_b & this_g) | (this_d & ~this_g); + flags = iter_predtest_fwd(this_d, this_g, flags); + } + return flags; +} + +static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz) +{ + /* It is quicker to zero the whole predicate than loop on OPRSZ. + * The compiler should turn this into 4 64-bit integer stores. + */ + memset(d, 0, sizeof(ARMPredicateReg)); + return PREDTEST_INIT; +} + +void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, + uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + if (last_active_pred(vn, vg, oprsz)) { + compute_brk_z(vd, vm, vg, oprsz, true); + } else { + do_zero(vd, oprsz); + } +} + +uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, + uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + if (last_active_pred(vn, vg, oprsz)) { + return compute_brks_z(vd, vm, vg, oprsz, true); + } else { + return do_zero(vd, oprsz); + } +} + +void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, + uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + if (last_active_pred(vn, vg, oprsz)) { + compute_brk_z(vd, vm, vg, oprsz, false); + } else { + do_zero(vd, oprsz); + } +} + +uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, + uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + if (last_active_pred(vn, vg, oprsz)) { + return compute_brks_z(vd, vm, vg, oprsz, false); + } else { + return do_zero(vd, oprsz); + } +} + +void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + compute_brk_z(vd, vn, vg, oprsz, true); +} + +uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + return compute_brks_z(vd, vn, vg, oprsz, true); +} + +void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + compute_brk_z(vd, vn, vg, oprsz, false); +} + +uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + return compute_brks_z(vd, vn, vg, oprsz, false); +} + +void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + compute_brk_m(vd, vn, vg, oprsz, true); +} + +uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + return compute_brks_m(vd, vn, vg, oprsz, true); +} + +void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + compute_brk_m(vd, vn, vg, oprsz, false); +} + +uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + return compute_brks_m(vd, vn, vg, oprsz, false); +} + +void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + + if (!last_active_pred(vn, vg, oprsz)) { + do_zero(vd, oprsz); + } +} + +/* As if PredTest(Ones(PL), D, esz). */ +static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz, + uint64_t esz_mask) +{ + uint32_t flags = PREDTEST_INIT; + intptr_t i; + + for (i = 0; i < oprsz / 8; i++) { + flags = iter_predtest_fwd(d->p[i], esz_mask, flags); + } + if (oprsz & 7) { + uint64_t mask = ~(0xffffffffffffffffULL << (8 * (oprsz & 7))); + flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags); + } + return flags; +} + +uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + + if (last_active_pred(vn, vg, oprsz)) { + return predtest_ones(vd, oprsz, -1); + } else { + return do_zero(vd, oprsz); + } +} + +uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz]; + intptr_t i; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + uint64_t t = n[i] & g[i] & mask; + sum += ctpop64(t); + } + return sum; +} + +uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + uint64_t esz_mask = pred_esz_masks[esz]; + ARMPredicateReg *d = vd; + uint32_t flags; + intptr_t i; + + /* Begin with a zero predicate register. */ + flags = do_zero(d, oprsz); + if (count == 0) { + return flags; + } + + /* Set all of the requested bits. */ + for (i = 0; i < count / 64; ++i) { + d->p[i] = esz_mask; + } + if (count & 63) { + d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask; + } + + return predtest_ones(d, oprsz, esz_mask); +} + +/* Recursive reduction on a function; + * C.f. the ARM ARM function ReducePredicated. + * + * While it would be possible to write this without the DATA temporary, + * it is much simpler to process the predicate register this way. + * The recursion is bounded to depth 7 (128 fp16 elements), so there's + * little to gain with a more complex non-recursive form. + */ +#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \ +static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ +{ \ + if (n == 1) { \ + return *data; \ + } else { \ + uintptr_t half = n / 2; \ + TYPE lo = NAME##_reduce(data, status, half); \ + TYPE hi = NAME##_reduce(data + half, status, half); \ + return TYPE##_##FUNC(lo, hi, status); \ + } \ +} \ +uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ +{ \ + uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ + TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ + for (i = 0; i < oprsz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT); \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + for (; i < maxsz; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)data + i) = IDENT; \ + } \ + return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \ +} + +DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) +DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) +DO_REDUCE(sve_faddv_d, float64, , add, float64_zero) + +/* Identity is floatN_default_nan, without the function call. */ +DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) +DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) +DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL) + +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) +DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL) + +DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) +DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) +DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity) + +DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) +DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) +DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity)) + +#undef DO_REDUCE + +uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, + void *status, uint32_t desc) +{ + intptr_t i = 0, opr_sz = simd_oprsz(desc); + float16 result = nn; + + do { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + float16 mm = *(float16 *)((char *)vm + H1_2(i)); + result = float16_add(result, mm, status); + } + i += sizeof(float16), pg >>= sizeof(float16); + } while (i & 15); + } while (i < opr_sz); + + return result; +} + +uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, + void *status, uint32_t desc) +{ + intptr_t i = 0, opr_sz = simd_oprsz(desc); + float32 result = nn; + + do { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + float32 mm = *(float32 *)((char *)vm + H1_2(i)); + result = float32_add(result, mm, status); + } + i += sizeof(float32), pg >>= sizeof(float32); + } while (i & 15); + } while (i < opr_sz); + + return result; +} + +uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, + void *status, uint32_t desc) +{ + intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8; + uint64_t *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i++) { + if (pg[H1(i)] & 1) { + nn = float64_add(nn, m[i], status); + } + } + + return nn; +} + +/* Fully general three-operand expander, controlled by a predicate, + * With the extra float_status parameter. + */ +#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ +} + +DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) +DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) +DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add) + +DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) +DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) +DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub) + +DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) +DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) +DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul) + +DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) +DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) +DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div) + +DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) +DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) +DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min) + +DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) +DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) +DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max) + +DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) +DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) +DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum) + +DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) +DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) +DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum) + +static inline float16 abd_h(float16 a, float16 b, float_status *s) +{ + return float16_abs(float16_sub(a, b, s)); +} + +static inline float32 abd_s(float32 a, float32 b, float_status *s) +{ + return float32_abs(float32_sub(a, b, s)); +} + +static inline float64 abd_d(float64 a, float64 b, float_status *s) +{ + return float64_abs(float64_sub(a, b, s)); +} + +DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) +DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) +DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d) + +static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) +{ + int b_int = MIN(MAX(b, INT_MIN), INT_MAX); + return float64_scalbn(a, b_int, s); +} + +DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn) +DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn) +DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d) + +DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh) +DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs) +DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd) + +#undef DO_ZPZZ_FP + +/* Three-operand expander, with one scalar operand, controlled by + * a predicate, with the extra float_status parameter. + */ +#define DO_ZPZS_FP(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + TYPE mm = scalar; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ +} + +DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add) +DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add) +DO_ZPZS_FP(sve_fadds_d, float64, , float64_add) + +DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub) +DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub) +DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub) + +DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul) +DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul) +DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul) + +static inline float16 subr_h(float16 a, float16 b, float_status *s) +{ + return float16_sub(b, a, s); +} + +static inline float32 subr_s(float32 a, float32 b, float_status *s) +{ + return float32_sub(b, a, s); +} + +static inline float64 subr_d(float64 a, float64 b, float_status *s) +{ + return float64_sub(b, a, s); +} + +DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h) +DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s) +DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d) + +DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum) +DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum) +DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum) + +DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum) +DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum) +DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum) + +DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max) +DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max) +DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max) + +DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) +DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) +DO_ZPZS_FP(sve_fmins_d, float64, , float64_min) + +/* Fully general two-operand expander, controlled by a predicate, + * With the extra float_status parameter. + */ +#define DO_ZPZ_FP(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ +} + +/* SVE fp16 conversions always use IEEE mode. Like AdvSIMD, they ignore + * FZ16. When converting from fp16, this affects flushing input denormals; + * when converting to fp16, this affects flushing output denormals. + */ +static inline float32 sve_f16_to_f32(float16 f, float_status *fpst) +{ + bool save = get_flush_inputs_to_zero(fpst); + float32 ret; + + set_flush_inputs_to_zero(false, fpst); + ret = float16_to_float32(f, true, fpst); + set_flush_inputs_to_zero(save, fpst); + return ret; +} + +static inline float64 sve_f16_to_f64(float16 f, float_status *fpst) +{ + bool save = get_flush_inputs_to_zero(fpst); + float64 ret; + + set_flush_inputs_to_zero(false, fpst); + ret = float16_to_float64(f, true, fpst); + set_flush_inputs_to_zero(save, fpst); + return ret; +} + +static inline float16 sve_f32_to_f16(float32 f, float_status *fpst) +{ + bool save = get_flush_to_zero(fpst); + float16 ret; + + set_flush_to_zero(false, fpst); + ret = float32_to_float16(f, true, fpst); + set_flush_to_zero(save, fpst); + return ret; +} + +static inline float16 sve_f64_to_f16(float64 f, float_status *fpst) +{ + bool save = get_flush_to_zero(fpst); + float16 ret; + + set_flush_to_zero(false, fpst); + ret = float64_to_float16(f, true, fpst); + set_flush_to_zero(save, fpst); + return ret; +} + +static inline int16_t vfp_float16_to_int16_rtz(float16 f, float_status *s) +{ + if (float16_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float16_to_int16_round_to_zero(f, s); +} + +static inline int64_t vfp_float16_to_int64_rtz(float16 f, float_status *s) +{ + if (float16_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float16_to_int64_round_to_zero(f, s); +} + +static inline int64_t vfp_float32_to_int64_rtz(float32 f, float_status *s) +{ + if (float32_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float32_to_int64_round_to_zero(f, s); +} + +static inline int64_t vfp_float64_to_int64_rtz(float64 f, float_status *s) +{ + if (float64_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float64_to_int64_round_to_zero(f, s); +} + +static inline uint16_t vfp_float16_to_uint16_rtz(float16 f, float_status *s) +{ + if (float16_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float16_to_uint16_round_to_zero(f, s); +} + +static inline uint64_t vfp_float16_to_uint64_rtz(float16 f, float_status *s) +{ + if (float16_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float16_to_uint64_round_to_zero(f, s); +} + +static inline uint64_t vfp_float32_to_uint64_rtz(float32 f, float_status *s) +{ + if (float32_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float32_to_uint64_round_to_zero(f, s); +} + +static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s) +{ + if (float64_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float64_to_uint64_round_to_zero(f, s); +} + +DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16) +DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32) +DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16) +DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64) +DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32) +DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64) + +DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz) +DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh) +DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs) +DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd) +DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz) + +DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz) +DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh) +DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs) +DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd) +DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz) + +DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth) +DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints) +DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd) + +DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int) +DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int) +DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int) + +DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16) +DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32) +DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64) + +DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt) +DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt) +DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt) + +DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16) +DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16) +DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32) +DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64) +DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16) +DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32) +DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64) + +DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16) +DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16) +DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32) +DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64) +DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16) +DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32) +DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) + +#undef DO_ZPZ_FP + +static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint16_t neg1, uint16_t neg3) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 2; + if (likely((pg >> (i & 63)) & 1)) { + float16 e1, e2, e3, r; + + e1 = *(uint16_t *)((char *)vn + H1_2(i)) ^ neg1; + e2 = *(uint16_t *)((char *)vm + H1_2(i)); + e3 = *(uint16_t *)((char *)va + H1_2(i)) ^ neg3; + r = float16_muladd(e1, e2, e3, 0, status); + *(uint16_t *)((char *)vd + H1_2(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); +} + +void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); +} + +void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); +} + +static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint32_t neg1, uint32_t neg3) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 4; + if (likely((pg >> (i & 63)) & 1)) { + float32 e1, e2, e3, r; + + e1 = *(uint32_t *)((char *)vn + H1_4(i)) ^ neg1; + e2 = *(uint32_t *)((char *)vm + H1_4(i)); + e3 = *(uint32_t *)((char *)va + H1_4(i)) ^ neg3; + r = float32_muladd(e1, e2, e3, 0, status); + *(uint32_t *)((char *)vd + H1_4(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); +} + +void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); +} + +void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); +} + +static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint64_t neg1, uint64_t neg3) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 8; + if (likely((pg >> (i & 63)) & 1)) { + float64 e1, e2, e3, r; + + e1 = *(uint64_t *)((char *)vn + i) ^ neg1; + e2 = *(uint64_t *)((char *)vm + i); + e3 = *(uint64_t *)((char *)va + i) ^ neg3; + r = float64_muladd(e1, e2, e3, 0, status); + *(uint64_t *)((char *)vd + i) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); +} + +void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); +} + +void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); +} + +/* Two operand floating-point comparison controlled by a predicate. + * Unlike the integer version, we are not allowed to optimistically + * compare operands, since the comparison may have side effects wrt + * the FPSR. + */ +#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + out |= OP(TYPE, nn, mm, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ +} + +#define DO_FPCMP_PPZZ_H(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZZ_S(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZZ_D(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZZ_ALL(NAME, OP) \ + DO_FPCMP_PPZZ_H(NAME, OP) \ + DO_FPCMP_PPZZ_S(NAME, OP) \ + DO_FPCMP_PPZZ_D(NAME, OP) + +#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0 +#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0 +#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0 +#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0 +#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0 +#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0 +#define DO_FCMUO(TYPE, X, Y, ST) \ + TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered +#define DO_FACGE(TYPE, X, Y, ST) \ + TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0 +#define DO_FACGT(TYPE, X, Y, ST) \ + TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0 + +DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE) +DO_FPCMP_PPZZ_ALL(sve_fcmgt, DO_FCMGT) +DO_FPCMP_PPZZ_ALL(sve_fcmeq, DO_FCMEQ) +DO_FPCMP_PPZZ_ALL(sve_fcmne, DO_FCMNE) +DO_FPCMP_PPZZ_ALL(sve_fcmuo, DO_FCMUO) +DO_FPCMP_PPZZ_ALL(sve_facge, DO_FACGE) +DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT) + +#undef DO_FPCMP_PPZZ_ALL +#undef DO_FPCMP_PPZZ_D +#undef DO_FPCMP_PPZZ_S +#undef DO_FPCMP_PPZZ_H +#undef DO_FPCMP_PPZZ + +/* One operand floating-point comparison against zero, controlled + * by a predicate. + */ +#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if ((pg >> (i & 63)) & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= OP(TYPE, nn, 0, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ +} + +#define DO_FPCMP_PPZ0_H(NAME, OP) \ + DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZ0_S(NAME, OP) \ + DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZ0_D(NAME, OP) \ + DO_FPCMP_PPZ0(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZ0_ALL(NAME, OP) \ + DO_FPCMP_PPZ0_H(NAME, OP) \ + DO_FPCMP_PPZ0_S(NAME, OP) \ + DO_FPCMP_PPZ0_D(NAME, OP) + +DO_FPCMP_PPZ0_ALL(sve_fcmge0, DO_FCMGE) +DO_FPCMP_PPZ0_ALL(sve_fcmgt0, DO_FCMGT) +DO_FPCMP_PPZ0_ALL(sve_fcmle0, DO_FCMLE) +DO_FPCMP_PPZ0_ALL(sve_fcmlt0, DO_FCMLT) +DO_FPCMP_PPZ0_ALL(sve_fcmeq0, DO_FCMEQ) +DO_FPCMP_PPZ0_ALL(sve_fcmne0, DO_FCMNE) + +/* FP Trig Multiply-Add. */ + +void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +{ + static const float16 coeff[16] = { + 0x3c00, 0xb155, 0x2030, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); + intptr_t x = simd_data(desc); + float16 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { + float16 mm = m[i]; + intptr_t xx = x; + if (float16_is_neg(mm)) { + mm = float16_abs(mm); + xx += 8; + } + d[i] = float16_muladd(n[i], mm, coeff[xx], 0, vs); + } +} + +void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +{ + static const float32 coeff[16] = { + 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9, + 0x36369d6d, 0x00000000, 0x00000000, 0x00000000, + 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705, + 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); + intptr_t x = simd_data(desc); + float32 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { + float32 mm = m[i]; + intptr_t xx = x; + if (float32_is_neg(mm)) { + mm = float32_abs(mm); + xx += 8; + } + d[i] = float32_muladd(n[i], mm, coeff[xx], 0, vs); + } +} + +void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +{ + static const float64 coeff[16] = { + 0x3ff0000000000000ull, 0xbfc5555555555543ull, + 0x3f8111111110f30cull, 0xbf2a01a019b92fc6ull, + 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull, + 0x3de5d8408868552full, 0x0000000000000000ull, + 0x3ff0000000000000ull, 0xbfe0000000000000ull, + 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull, + 0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull, + 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); + intptr_t x = simd_data(desc); + float64 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { + float64 mm = m[i]; + intptr_t xx = x; + if (float64_is_neg(mm)) { + mm = float64_abs(mm); + xx += 8; + } + d[i] = float64_muladd(n[i], mm, coeff[xx], 0, vs); + } +} + +/* + * FP Complex Add + */ + +void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, + void *vs, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + uint64_t *g = vg; + float16 neg_imag = float16_set_sign(0, simd_data(desc)); + float16 neg_real = float16_chs(neg_imag); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float16 e0, e1, e2, e3; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float16); + i -= 2 * sizeof(float16); + + e0 = *(float16 *)((char *)vn + H1_2(i)); + e1 = *(float16 *)((char *)vm + H1_2(j)) ^ neg_real; + e2 = *(float16 *)((char *)vn + H1_2(j)); + e3 = *(float16 *)((char *)vm + H1_2(i)) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + *(float16 *)((char *)vd + H1_2(i)) = float16_add(e0, e1, vs); + } + if (likely((pg >> (j & 63)) & 1)) { + *(float16 *)((char *)vd + H1_2(j)) = float16_add(e2, e3, vs); + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, + void *vs, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + uint64_t *g = vg; + float32 neg_imag = float32_set_sign(0, simd_data(desc)); + float32 neg_real = float32_chs(neg_imag); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float32 e0, e1, e2, e3; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float32); + i -= 2 * sizeof(float32); + + e0 = *(float32 *)((char *)vn + H1_2(i)); + e1 = *(float32 *)((char *)vm + H1_2(j)) ^ neg_real; + e2 = *(float32 *)((char *)vn + H1_2(j)); + e3 = *(float32 *)((char *)vm + H1_2(i)) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + *(float32 *)((char *)vd + H1_2(i)) = float32_add(e0, e1, vs); + } + if (likely((pg >> (j & 63)) & 1)) { + *(float32 *)((char *)vd + H1_2(j)) = float32_add(e2, e3, vs); + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, + void *vs, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + uint64_t *g = vg; + float64 neg_imag = float64_set_sign(0, simd_data(desc)); + float64 neg_real = float64_chs(neg_imag); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float64 e0, e1, e2, e3; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float64); + i -= 2 * sizeof(float64); + + e0 = *(float64 *)((char *)vn + H1_2(i)); + e1 = *(float64 *)((char *)vm + H1_2(j)) ^ neg_real; + e2 = *(float64 *)((char *)vn + H1_2(j)); + e3 = *(float64 *)((char *)vm + H1_2(i)) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + *(float64 *)((char *)vd + H1_2(i)) = float64_add(e0, e1, vs); + } + if (likely((pg >> (j & 63)) & 1)) { + *(float64 *)((char *)vd + H1_2(j)) = float64_add(e2, e3, vs); + } + } while (i & 63); + } while (i != 0); +} + +/* + * FP Complex Multiply + */ + +void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + unsigned rot = simd_data(desc); + bool flip = rot & 1; + float16 neg_imag, neg_real; + uint64_t *g = vg; + + neg_imag = float16_set_sign(0, (rot & 2) != 0); + neg_real = float16_set_sign(0, rot == 1 || rot == 2); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float16 e1, e2, e3, e4, nr, ni, mr, mi, d; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float16); + i -= 2 * sizeof(float16); + + nr = *(float16 *)((char *)vn + H1_2(i)); + ni = *(float16 *)((char *)vn + H1_2(j)); + mr = *(float16 *)((char *)vm + H1_2(i)); + mi = *(float16 *)((char *)vm + H1_2(j)); + + e2 = (flip ? ni : nr); + e1 = (flip ? mi : mr) ^ neg_real; + e4 = e2; + e3 = (flip ? mr : mi) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + d = *(float16 *)((char *)va + H1_2(i)); + d = float16_muladd(e2, e1, d, 0, status); + *(float16 *)((char *)vd + H1_2(i)) = d; + } + if (likely((pg >> (j & 63)) & 1)) { + d = *(float16 *)((char *)va + H1_2(j)); + d = float16_muladd(e4, e3, d, 0, status); + *(float16 *)((char *)vd + H1_2(j)) = d; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + unsigned rot = simd_data(desc); + bool flip = rot & 1; + float32 neg_imag, neg_real; + uint64_t *g = vg; + + neg_imag = float32_set_sign(0, (rot & 2) != 0); + neg_real = float32_set_sign(0, rot == 1 || rot == 2); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float32 e1, e2, e3, e4, nr, ni, mr, mi, d; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float32); + i -= 2 * sizeof(float32); + + nr = *(float32 *)((char *)vn + H1_2(i)); + ni = *(float32 *)((char *)vn + H1_2(j)); + mr = *(float32 *)((char *)vm + H1_2(i)); + mi = *(float32 *)((char *)vm + H1_2(j)); + + e2 = (flip ? ni : nr); + e1 = (flip ? mi : mr) ^ neg_real; + e4 = e2; + e3 = (flip ? mr : mi) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + d = *(float32 *)((char *)va + H1_2(i)); + d = float32_muladd(e2, e1, d, 0, status); + *(float32 *)((char *)vd + H1_2(i)) = d; + } + if (likely((pg >> (j & 63)) & 1)) { + d = *(float32 *)((char *)va + H1_2(j)); + d = float32_muladd(e4, e3, d, 0, status); + *(float32 *)((char *)vd + H1_2(j)) = d; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + unsigned rot = simd_data(desc); + bool flip = rot & 1; + float64 neg_imag, neg_real; + uint64_t *g = vg; + + neg_imag = float64_set_sign(0, (rot & 2) != 0); + neg_real = float64_set_sign(0, rot == 1 || rot == 2); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float64 e1, e2, e3, e4, nr, ni, mr, mi, d; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float64); + i -= 2 * sizeof(float64); + + nr = *(float64 *)((char *)vn + H1_2(i)); + ni = *(float64 *)((char *)vn + H1_2(j)); + mr = *(float64 *)((char *)vm + H1_2(i)); + mi = *(float64 *)((char *)vm + H1_2(j)); + + e2 = (flip ? ni : nr); + e1 = (flip ? mi : mr) ^ neg_real; + e4 = e2; + e3 = (flip ? mr : mi) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + d = *(float64 *)((char *)va + H1_2(i)); + d = float64_muladd(e2, e1, d, 0, status); + *(float64 *)((char *)vd + H1_2(i)) = d; + } + if (likely((pg >> (j & 63)) & 1)) { + d = *(float64 *)((char *)va + H1_2(j)); + d = float64_muladd(e4, e3, d, 0, status); + *(float64 *)((char *)vd + H1_2(j)) = d; + } + } while (i & 63); + } while (i != 0); +} + +/* + * Load contiguous data, protected by a governing predicate. + */ + +/* + * Load one element into @vd + @reg_off from @host. + * The controlling predicate is known to be true. + */ +typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); + +/* + * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). + * The controlling predicate is known to be true. + */ +typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, uintptr_t retaddr); + +/* + * Generate the above primitives. + */ + +#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ +{ \ + TYPEM val = HOST(host); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ +} + +#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ +{ HOST(host, (TYPEM)*(TYPEE *)(vd + H(reg_off))); } + +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, uintptr_t ra) \ +{ \ + *(TYPEE *)(vd + H(reg_off)) = \ + (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \ +} + +#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, uintptr_t ra) \ +{ \ + TLB(env, useronly_clean_ptr(addr), \ + (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ +} + + +#define DO_LD_PRIM_1(NAME, H, TE, TM) \ + DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ + DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) + +DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) +DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) +DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) +DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) +DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) +DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) +DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) + +#define DO_ST_PRIM_1(NAME, H, TE, TM) \ + DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ + DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) + +DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) +DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) +DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) +DO_ST_PRIM_1(bd, , uint64_t, uint8_t) + +#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ + DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ + DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ + DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ + DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) + +#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ + DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ + DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ + DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ + DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) + +DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) +DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) +DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) +DO_LD_PRIM_2(hdu, , uint64_t, uint16_t, lduw) +DO_LD_PRIM_2(hds, , uint64_t, int16_t, lduw) + +DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) +DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) +DO_ST_PRIM_2(hd, , uint64_t, uint16_t, stw) + +DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) +DO_LD_PRIM_2(sdu, , uint64_t, uint32_t, ldl) +DO_LD_PRIM_2(sds, , uint64_t, int32_t, ldl) + +DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) +DO_ST_PRIM_2(sd, , uint64_t, uint32_t, stl) + +DO_LD_PRIM_2(dd, , uint64_t, uint64_t, ldq) +DO_ST_PRIM_2(dd, , uint64_t, uint64_t, stq) + +#undef DO_LD_TLB +#undef DO_ST_TLB +#undef DO_LD_HOST +#undef DO_LD_PRIM_1 +#undef DO_ST_PRIM_1 +#undef DO_LD_PRIM_2 +#undef DO_ST_PRIM_2 + +/* + * Skip through a sequence of inactive elements in the guarding predicate @vg, + * beginning at @reg_off bounded by @reg_max. Return the offset of the active + * element >= @reg_off, or @reg_max if there were no active elements at all. + */ +static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, + intptr_t reg_max, int esz) +{ + uint64_t pg_mask = pred_esz_masks[esz]; + uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63); + + /* In normal usage, the first element is active. */ + if (likely(pg & 1)) { + return reg_off; + } + + if (pg == 0) { + reg_off &= -64; + do { + reg_off += 64; + if (unlikely(reg_off >= reg_max)) { + /* The entire predicate was false. */ + return reg_max; + } + pg = vg[reg_off >> 6] & pg_mask; + } while (pg == 0); + } + reg_off += ctz64(pg); + + /* We should never see an out of range predicate bit set. */ + tcg_debug_assert(reg_off < reg_max); + return reg_off; +} + +/* + * Resolve the guest virtual address to info->host and info->flags. + * If @nofault, return false if the page is invalid, otherwise + * exit via page fault exception. + */ + +typedef struct { + void *host; + int flags; + MemTxAttrs attrs; +} SVEHostPage; + +static bool sve_probe_page(SVEHostPage *info, bool nofault, + CPUARMState *env, target_ulong addr, + int mem_off, MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + int flags; + + addr += mem_off; + + /* + * User-only currently always issues with TBI. See the comment + * above useronly_clean_ptr. Usually we clean this top byte away + * during translation, but we can't do that for e.g. vector + imm + * addressing modes. + * + * We currently always enable TBI for user-only, and do not provide + * a way to turn it off. So clean the pointer unconditionally here, + * rather than look it up here, or pass it down from above. + */ + addr = useronly_clean_ptr(addr); + + flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, + &info->host, retaddr); + info->flags = flags; + + if (flags & TLB_INVALID_MASK) { + g_assert(nofault); + return false; + } + + /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ + info->host -= mem_off; + + /* + * Find the iotlbentry for addr and return the transaction attributes. + * This *must* be present in the TLB because we just found the mapping. + */ + { + uintptr_t index = tlb_index(env, mmu_idx, addr); + +# ifdef CONFIG_DEBUG_TCG + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong comparator = (access_type == MMU_DATA_LOAD + ? entry->addr_read + : tlb_addr_write(entry)); + g_assert(tlb_hit(comparator, addr)); +# endif + + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + info->attrs = iotlbentry->attrs; + } + + return true; +} + + +/* + * Analyse contiguous data, protected by a governing predicate. + */ + +typedef enum { + FAULT_NO, + FAULT_FIRST, + FAULT_ALL, +} SVEContFault; + +typedef struct { + /* + * First and last element wholly contained within the two pages. + * mem_off_first[0] and reg_off_first[0] are always set >= 0. + * reg_off_last[0] may be < 0 if the first element crosses pages. + * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] + * are set >= 0 only if there are complete elements on a second page. + * + * The reg_off_* offsets are relative to the internal vector register. + * The mem_off_first offset is relative to the memory address; the + * two offsets are different when a load operation extends, a store + * operation truncates, or for multi-register operations. + */ + int16_t mem_off_first[2]; + int16_t reg_off_first[2]; + int16_t reg_off_last[2]; + + /* + * One element that is misaligned and spans both pages, + * or -1 if there is no such active element. + */ + int16_t mem_off_split; + int16_t reg_off_split; + + /* + * The byte offset at which the entire operation crosses a page boundary. + * Set >= 0 if and only if the entire operation spans two pages. + */ + int16_t page_split; + + /* TLB data for the two pages. */ + SVEHostPage page[2]; +} SVEContLdSt; + +/* + * Common helper for all contiguous one-register predicated loads. + */ +static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, + sve_ld1_host_fn *host_fn, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + ARMVectorReg scratch; + void *host; + intptr_t split, reg_off, mem_off; + + /* Find the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } + mem_off = reg_off >> diffsz; + set_helper_retaddr(retaddr); + + /* + * If the (remaining) load is entirely within a single page, then: + * For softmmu, and the tlb hits, then no faults will occur; + * For user-only, either the first load will fault or none will. + * We can thus perform the load directly to the destination and + * Vd will be unmodified on any exception path. + */ + split = max_for_page(env->uc, addr, mem_off, mem_max); + if (likely(split == mem_max)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (test_host_page(host)) { + mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max); + tcg_debug_assert(mem_off == mem_max); + clear_helper_retaddr(); + /* After having taken any fault, zero leading inactive elements. */ + swap_memzero(vd, reg_off); + return; + } + } + + /* + * Perform the predicated read into a temporary, thus ensuring + * if the load of the last element faults, Vd is not modified. + */ + memset(&scratch, 0, reg_max); + goto start; + while (1) { + reg_off = find_next_active(vg, reg_off, reg_max, esz); + if (reg_off >= reg_max) { + break; + } + mem_off = reg_off >> diffsz; + split = max_for_page(env->uc, addr, mem_off, mem_max); + + start: + if (split - mem_off >= (1ULL << msz)) { + /* At least one whole element on this page. */ + host = tlb_vaddr_to_host(env, addr + mem_off, + MMU_DATA_LOAD, mmu_idx); + if (host) { + mem_off = host_fn(&scratch, vg, (char *)host - mem_off, + mem_off, split); + reg_off = mem_off << diffsz; + continue; + } + } + + /* + * Perform one normal read. This may fault, longjmping out to the + * main loop in order to raise an exception. It may succeed, and + * as a side-effect load the TLB entry for the next round. Finally, + * in the extremely unlikely case we're performing this operation + * on I/O memory, it may succeed but not bring in the TLB entry. + * But even then we have still made forward progress. + */ + tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr); + reg_off += 1ULL << esz; + } + + clear_helper_retaddr(); + memcpy(vd, &scratch, reg_max); +} + +#define DO_LD1_1(NAME, ESZ) \ +void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_##NAME##_host, sve_##NAME##_tlb); \ +} + +#define DO_LD1_2(NAME, ESZ, MSZ) \ +void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +} + +DO_LD1_1(ld1bb, 0) +DO_LD1_1(ld1bhu, 1) +DO_LD1_1(ld1bhs, 1) +DO_LD1_1(ld1bsu, 2) +DO_LD1_1(ld1bss, 2) +DO_LD1_1(ld1bdu, 3) +DO_LD1_1(ld1bds, 3) + +DO_LD1_2(ld1hh, 1, 1) +DO_LD1_2(ld1hsu, 2, 1) +DO_LD1_2(ld1hss, 2, 1) +DO_LD1_2(ld1hdu, 3, 1) +DO_LD1_2(ld1hds, 3, 1) + +DO_LD1_2(ld1ss, 2, 2) +DO_LD1_2(ld1sdu, 3, 2) +DO_LD1_2(ld1sds, 3, 2) + +DO_LD1_2(ld1dd, 3, 3) + +#undef DO_LD1_1 +#undef DO_LD1_2 + +/* + * Common helpers for all contiguous 2,3,4-register predicated loads. + */ +static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[2] = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + } + i += size, pg >>= size; + addr += 2 * size; + } while (i & 15); + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); +} + +static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[3] = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); + } + i += size, pg >>= size; + addr += 3 * size; + } while (i & 15); + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); +} + +static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[4] = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); + tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra); + } + i += size, pg >>= size; + addr += 4 * size; + } while (i & 15); + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); + memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz); +} + +#define DO_LDN_1(N) \ +void QEMU_FLATTEN HELPER(sve_ld##N##bb_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \ +} + +#define DO_LDN_2(N, SUFF, SIZE) \ +void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_le_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ + sve_ld1##SUFF##_le_tlb); \ +} \ +void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_be_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ + sve_ld1##SUFF##_be_tlb); \ +} + +DO_LDN_1(2) +DO_LDN_1(3) +DO_LDN_1(4) + +DO_LDN_2(2, hh, 2) +DO_LDN_2(3, hh, 2) +DO_LDN_2(4, hh, 2) + +DO_LDN_2(2, ss, 4) +DO_LDN_2(3, ss, 4) +DO_LDN_2(4, ss, 4) + +DO_LDN_2(2, dd, 8) +DO_LDN_2(3, dd, 8) +DO_LDN_2(4, dd, 8) + +#undef DO_LDN_1 +#undef DO_LDN_2 + +/* + * Load contiguous data, first-fault and no-fault. + * + * For user-only, one could argue that we should hold the mmap_lock during + * the operation so that there is no race between page_check_range and the + * load operation. However, unmapping pages out from under a running thread + * is extraordinarily unlikely. This theoretical race condition also affects + * linux-user/ in its get_user/put_user macros. + * + * TODO: Construct some helpers, written in assembly, that interact with + * handle_cpu_signal to produce memory ops which can properly report errors + * without racing. + */ + +/* Fault on byte I. All bits in FFR from I are cleared. The vector + * result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE + * option, which leaves subsequent data unchanged. + */ +static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) +{ + uint64_t *ffr = env->vfp.pregs[FFR_PRED_NUM].p; + + if (i & 63) { + ffr[i / 64] &= MAKE_64BIT_MASK(0, i & 63); + i = ROUND_UP(i, 64); + } + for (; i < oprsz; i += 64) { + ffr[i / 64] = 0; + } +} + +/* + * Common helper for all contiguous first-fault loads. + */ +static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, + sve_ld1_host_fn *host_fn, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + intptr_t split, reg_off, mem_off; + void *host; + + /* Skip to the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } + mem_off = reg_off >> diffsz; + set_helper_retaddr(retaddr); + + /* + * If the (remaining) load is entirely within a single page, then: + * For softmmu, and the tlb hits, then no faults will occur; + * For user-only, either the first load will fault or none will. + * We can thus perform the load directly to the destination and + * Vd will be unmodified on any exception path. + */ + split = max_for_page(env->uc, addr, mem_off, mem_max); + if (likely(split == mem_max)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (test_host_page(host)) { + mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max); + tcg_debug_assert(mem_off == mem_max); + clear_helper_retaddr(); + /* After any fault, zero any leading inactive elements. */ + swap_memzero(vd, reg_off); + return; + } + } + + /* + * Perform one normal read, which will fault or not. + * But it is likely to bring the page into the tlb. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr); + + /* After any fault, zero any leading predicated false elts. */ + swap_memzero(vd, reg_off); + mem_off += 1ULL << msz; + reg_off += 1ULL << esz; + + /* Try again to read the balance of the page. */ + split = max_for_page(env->uc, addr, mem_off - 1, mem_max); + if (split >= (1ULL << msz)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (host) { + mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split); + reg_off = mem_off << diffsz; + } + } + + clear_helper_retaddr(); + record_fault(env, reg_off, reg_max); +} + +/* + * Common helper for all contiguous no-fault loads. + */ +static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const int esz, const int msz, + sve_ld1_host_fn *host_fn) +{ + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + const int mmu_idx = cpu_mmu_index(env, false); + intptr_t split, reg_off, mem_off; + void *host; + + /* There will be no fault, so we may modify in advance. */ + memset(vd, 0, reg_max); + + /* Skip to the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + return; + } + mem_off = reg_off >> diffsz; + + /* + * If the address is not in the TLB, we have no way to bring the + * entry into the TLB without also risking a fault. Note that + * the corollary is that we never load from an address not in RAM. + * + * This last is out of spec, in a weird corner case. + * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory + * must not actually hit the bus -- it returns UNKNOWN data instead. + * But if you map non-RAM with Normal memory attributes and do a NF + * load then it should access the bus. (Nobody ought actually do this + * in the real world, obviously.) + * + * Then there are the annoying special cases with watchpoints... + * TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true). + */ + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + split = max_for_page(env->uc, addr, mem_off, mem_max); + if (host && split >= (1ULL << msz)) { + mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split); + reg_off = mem_off << diffsz; + } + + record_fault(env, reg_off, reg_max); +} + +#define DO_LDFF1_LDNF1_1(PART, ESZ) \ +void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \ +} + +#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ +void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \ +} \ +void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \ +} + +DO_LDFF1_LDNF1_1(bb, 0) +DO_LDFF1_LDNF1_1(bhu, 1) +DO_LDFF1_LDNF1_1(bhs, 1) +DO_LDFF1_LDNF1_1(bsu, 2) +DO_LDFF1_LDNF1_1(bss, 2) +DO_LDFF1_LDNF1_1(bdu, 3) +DO_LDFF1_LDNF1_1(bds, 3) + +DO_LDFF1_LDNF1_2(hh, 1, 1) +DO_LDFF1_LDNF1_2(hsu, 2, 1) +DO_LDFF1_LDNF1_2(hss, 2, 1) +DO_LDFF1_LDNF1_2(hdu, 3, 1) +DO_LDFF1_LDNF1_2(hds, 3, 1) + +DO_LDFF1_LDNF1_2(ss, 2, 2) +DO_LDFF1_LDNF1_2(sdu, 3, 2) +DO_LDFF1_LDNF1_2(sds, 3, 2) + +DO_LDFF1_LDNF1_2(dd, 3, 3) + +#undef DO_LDFF1_LDNF1_1 +#undef DO_LDFF1_LDNF1_2 + +/* + * Store contiguous data, protected by a governing predicate. + */ + +#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + TLB(env, addr, *(TYPEM *)((char *)vd + H(reg_off)), oi, ra); \ +} + +DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu) + +DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu) +DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu) +DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu) + +DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu) +DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu) + +DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu) + +DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu) + +DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu) +DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu) + +DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu) + +#undef DO_ST_TLB + +/* + * Common helpers for all contiguous 1,2,3,4-register predicated stores. + */ +static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *vd = &env->vfp.zregs[rd]; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, vd, i, addr, oi, ra); + } + i += esize, pg >>= esize; + addr += msize; + } while (i & 15); + } + clear_helper_retaddr(); +} + +static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 2 * msize; + } while (i & 15); + } + clear_helper_retaddr(); +} + +static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + void *d3 = &env->vfp.zregs[(rd + 2) & 31]; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 3 * msize; + } while (i & 15); + } + clear_helper_retaddr(); +} + +static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + void *d3 = &env->vfp.zregs[(rd + 2) & 31]; + void *d4 = &env->vfp.zregs[(rd + 3) & 31]; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); + tlb_fn(env, d4, i, addr + 3 * msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 4 * msize; + } while (i & 15); + } + clear_helper_retaddr(); +} + +#define DO_STN_1(N, NAME, ESIZE) \ +void QEMU_FLATTEN HELPER(sve_st##N##NAME##_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \ + sve_st1##NAME##_tlb); \ +} + +#define DO_STN_2(N, NAME, ESIZE, MSIZE) \ +void QEMU_FLATTEN HELPER(sve_st##N##NAME##_le_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ + sve_st1##NAME##_le_tlb); \ +} \ +void QEMU_FLATTEN HELPER(sve_st##N##NAME##_be_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ + sve_st1##NAME##_be_tlb); \ +} + +DO_STN_1(1, bb, 1) +DO_STN_1(1, bh, 2) +DO_STN_1(1, bs, 4) +DO_STN_1(1, bd, 8) +DO_STN_1(2, bb, 1) +DO_STN_1(3, bb, 1) +DO_STN_1(4, bb, 1) + +DO_STN_2(1, hh, 2, 2) +DO_STN_2(1, hs, 4, 2) +DO_STN_2(1, hd, 8, 2) +DO_STN_2(2, hh, 2, 2) +DO_STN_2(3, hh, 2, 2) +DO_STN_2(4, hh, 2, 2) + +DO_STN_2(1, ss, 4, 4) +DO_STN_2(1, sd, 8, 4) +DO_STN_2(2, ss, 4, 4) +DO_STN_2(3, ss, 4, 4) +DO_STN_2(4, ss, 4, 4) + +DO_STN_2(1, dd, 8, 8) +DO_STN_2(2, dd, 8, 8) +DO_STN_2(3, dd, 8, 8) +DO_STN_2(4, dd, 8, 8) + +#undef DO_STN_1 +#undef DO_STN_2 + +/* + * Loads with a vector index. + */ + +/* + * Load the element at @reg + @reg_ofs, sign or zero-extend as needed. + */ +typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs); + +static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs) +{ + return *(uint32_t *)((char *)reg + H1_4(reg_ofs)); +} + +static target_ulong off_zss_s(void *reg, intptr_t reg_ofs) +{ + return *(int32_t *)((char *)reg + H1_4(reg_ofs)); +} + +static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs) +{ + return (uint32_t)*(uint64_t *)((char *)reg + reg_ofs); +} + +static target_ulong off_zss_d(void *reg, intptr_t reg_ofs) +{ + return (int32_t)*(uint64_t *)((char *)reg + reg_ofs); +} + +static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) +{ + return *(uint64_t *)((char *)reg + reg_ofs); +} + +static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, &scratch, i, base + (off << scale), oi, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz); +} + +static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc) / 8; + ARMVectorReg scratch = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)((char *)vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra); + } + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz * 8); +} + +#define DO_LD1_ZPZ_S(MEM, OFS) \ +void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb); \ +} + +#define DO_LD1_ZPZ_D(MEM, OFS) \ +void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb); \ +} + +DO_LD1_ZPZ_S(bsu, zsu) +DO_LD1_ZPZ_S(bsu, zss) +DO_LD1_ZPZ_D(bdu, zsu) +DO_LD1_ZPZ_D(bdu, zss) +DO_LD1_ZPZ_D(bdu, zd) + +DO_LD1_ZPZ_S(bss, zsu) +DO_LD1_ZPZ_S(bss, zss) +DO_LD1_ZPZ_D(bds, zsu) +DO_LD1_ZPZ_D(bds, zss) +DO_LD1_ZPZ_D(bds, zd) + +DO_LD1_ZPZ_S(hsu_le, zsu) +DO_LD1_ZPZ_S(hsu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zsu) +DO_LD1_ZPZ_D(hdu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zd) + +DO_LD1_ZPZ_S(hsu_be, zsu) +DO_LD1_ZPZ_S(hsu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zsu) +DO_LD1_ZPZ_D(hdu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zd) + +DO_LD1_ZPZ_S(hss_le, zsu) +DO_LD1_ZPZ_S(hss_le, zss) +DO_LD1_ZPZ_D(hds_le, zsu) +DO_LD1_ZPZ_D(hds_le, zss) +DO_LD1_ZPZ_D(hds_le, zd) + +DO_LD1_ZPZ_S(hss_be, zsu) +DO_LD1_ZPZ_S(hss_be, zss) +DO_LD1_ZPZ_D(hds_be, zsu) +DO_LD1_ZPZ_D(hds_be, zss) +DO_LD1_ZPZ_D(hds_be, zd) + +DO_LD1_ZPZ_S(ss_le, zsu) +DO_LD1_ZPZ_S(ss_le, zss) +DO_LD1_ZPZ_D(sdu_le, zsu) +DO_LD1_ZPZ_D(sdu_le, zss) +DO_LD1_ZPZ_D(sdu_le, zd) + +DO_LD1_ZPZ_S(ss_be, zsu) +DO_LD1_ZPZ_S(ss_be, zss) +DO_LD1_ZPZ_D(sdu_be, zsu) +DO_LD1_ZPZ_D(sdu_be, zss) +DO_LD1_ZPZ_D(sdu_be, zd) + +DO_LD1_ZPZ_D(sds_le, zsu) +DO_LD1_ZPZ_D(sds_le, zss) +DO_LD1_ZPZ_D(sds_le, zd) + +DO_LD1_ZPZ_D(sds_be, zsu) +DO_LD1_ZPZ_D(sds_be, zss) +DO_LD1_ZPZ_D(sds_be, zd) + +DO_LD1_ZPZ_D(dd_le, zsu) +DO_LD1_ZPZ_D(dd_le, zss) +DO_LD1_ZPZ_D(dd_le, zd) + +DO_LD1_ZPZ_D(dd_be, zsu) +DO_LD1_ZPZ_D(dd_be, zss) +DO_LD1_ZPZ_D(dd_be, zd) + +#undef DO_LD1_ZPZ_S +#undef DO_LD1_ZPZ_D + +/* First fault loads with a vector index. */ + +/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting. + * The controlling predicate is known to be true. Return true if the + * load was successful. + */ +typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, int mmu_idx); + +#ifdef _MSC_VER +#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ +static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, int mmu_idx) \ +{ \ + struct uc_struct *uc = env->uc; \ + target_ulong next_page = 0ULL - (addr | TARGET_PAGE_MASK); \ + if (likely(next_page - addr >= sizeof(TYPEM))) { \ + void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ + if (likely(host)) { \ + TYPEM val = HOST(host); \ + *(TYPEE *)((char *)vd + H(reg_off)) = val; \ + return true; \ + } \ + } \ + return false; \ +} +#else +#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ +static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, int mmu_idx) \ +{ \ + struct uc_struct *uc = env->uc; \ + target_ulong next_page = -(addr | TARGET_PAGE_MASK); \ + if (likely(next_page - addr >= sizeof(TYPEM))) { \ + void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ + if (likely(host)) { \ + TYPEM val = HOST(host); \ + *(TYPEE *)((char *)vd + H(reg_off)) = val; \ + return true; \ + } \ + } \ + return false; \ +} +#endif + +DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p) +DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p) +DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p) +DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p) + +DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p) +DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p) +DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p) +DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p) +DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p) +DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p) +DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p) +DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p) + +DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p) +DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p) +DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p) +DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p) +DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p) +DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p) + +DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p) +DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p) + +/* + * Common helper for all gather first-faulting loads. + */ +static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, + sve_ld1_nf_fn *nonfault_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t reg_off, reg_max = simd_oprsz(desc); + target_ulong addr; + + /* Skip to the first true predicate. */ + reg_off = find_next_active(vg, 0, reg_max, MO_32); + if (likely(reg_off < reg_max)) { + /* Perform one normal read, which will fault or not. */ + set_helper_retaddr(ra); + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + tlb_fn(env, vd, reg_off, addr, oi, ra); + + /* The rest of the reads will be non-faulting. */ + clear_helper_retaddr(); + } + + /* After any fault, zero the leading predicated false elements. */ + swap_memzero(vd, reg_off); + + while (likely((reg_off += 4) < reg_max)) { + uint64_t pg = *(uint64_t *)((char *)vg + (reg_off >> 6) * 8); + if (likely((pg >> (reg_off & 63)) & 1)) { + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { + record_fault(env, reg_off, reg_max); + break; + } + } else { + *(uint32_t *)((char *)vd + H1_4(reg_off)) = 0; + } + } +} + +static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, + sve_ld1_nf_fn *nonfault_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t reg_off, reg_max = simd_oprsz(desc); + target_ulong addr; + + /* Skip to the first true predicate. */ + reg_off = find_next_active(vg, 0, reg_max, MO_64); + if (likely(reg_off < reg_max)) { + /* Perform one normal read, which will fault or not. */ + set_helper_retaddr(ra); + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + tlb_fn(env, vd, reg_off, addr, oi, ra); + + /* The rest of the reads will be non-faulting. */ + clear_helper_retaddr(); + } + + /* After any fault, zero the leading predicated false elements. */ + swap_memzero(vd, reg_off); + + while (likely((reg_off += 8) < reg_max)) { + uint8_t pg = *(uint8_t *)((char *)vg + H1(reg_off >> 3)); + if (likely(pg & 1)) { + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { + record_fault(env, reg_off, reg_max); + break; + } + } else { + *(uint64_t *)((char *)vd + reg_off) = 0; + } + } +} + +#define DO_LDFF1_ZPZ_S(MEM, OFS) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ +} + +#define DO_LDFF1_ZPZ_D(MEM, OFS) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ +} + +DO_LDFF1_ZPZ_S(bsu, zsu) +DO_LDFF1_ZPZ_S(bsu, zss) +DO_LDFF1_ZPZ_D(bdu, zsu) +DO_LDFF1_ZPZ_D(bdu, zss) +DO_LDFF1_ZPZ_D(bdu, zd) + +DO_LDFF1_ZPZ_S(bss, zsu) +DO_LDFF1_ZPZ_S(bss, zss) +DO_LDFF1_ZPZ_D(bds, zsu) +DO_LDFF1_ZPZ_D(bds, zss) +DO_LDFF1_ZPZ_D(bds, zd) + +DO_LDFF1_ZPZ_S(hsu_le, zsu) +DO_LDFF1_ZPZ_S(hsu_le, zss) +DO_LDFF1_ZPZ_D(hdu_le, zsu) +DO_LDFF1_ZPZ_D(hdu_le, zss) +DO_LDFF1_ZPZ_D(hdu_le, zd) + +DO_LDFF1_ZPZ_S(hsu_be, zsu) +DO_LDFF1_ZPZ_S(hsu_be, zss) +DO_LDFF1_ZPZ_D(hdu_be, zsu) +DO_LDFF1_ZPZ_D(hdu_be, zss) +DO_LDFF1_ZPZ_D(hdu_be, zd) + +DO_LDFF1_ZPZ_S(hss_le, zsu) +DO_LDFF1_ZPZ_S(hss_le, zss) +DO_LDFF1_ZPZ_D(hds_le, zsu) +DO_LDFF1_ZPZ_D(hds_le, zss) +DO_LDFF1_ZPZ_D(hds_le, zd) + +DO_LDFF1_ZPZ_S(hss_be, zsu) +DO_LDFF1_ZPZ_S(hss_be, zss) +DO_LDFF1_ZPZ_D(hds_be, zsu) +DO_LDFF1_ZPZ_D(hds_be, zss) +DO_LDFF1_ZPZ_D(hds_be, zd) + +DO_LDFF1_ZPZ_S(ss_le, zsu) +DO_LDFF1_ZPZ_S(ss_le, zss) +DO_LDFF1_ZPZ_D(sdu_le, zsu) +DO_LDFF1_ZPZ_D(sdu_le, zss) +DO_LDFF1_ZPZ_D(sdu_le, zd) + +DO_LDFF1_ZPZ_S(ss_be, zsu) +DO_LDFF1_ZPZ_S(ss_be, zss) +DO_LDFF1_ZPZ_D(sdu_be, zsu) +DO_LDFF1_ZPZ_D(sdu_be, zss) +DO_LDFF1_ZPZ_D(sdu_be, zd) + +DO_LDFF1_ZPZ_D(sds_le, zsu) +DO_LDFF1_ZPZ_D(sds_le, zss) +DO_LDFF1_ZPZ_D(sds_le, zd) + +DO_LDFF1_ZPZ_D(sds_be, zsu) +DO_LDFF1_ZPZ_D(sds_be, zss) +DO_LDFF1_ZPZ_D(sds_be, zd) + +DO_LDFF1_ZPZ_D(dd_le, zsu) +DO_LDFF1_ZPZ_D(dd_le, zss) +DO_LDFF1_ZPZ_D(dd_le, zd) + +DO_LDFF1_ZPZ_D(dd_be, zsu) +DO_LDFF1_ZPZ_D(dd_be, zss) +DO_LDFF1_ZPZ_D(dd_be, zd) + +/* Stores with a vector index. */ + +static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc); + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, vd, i, base + (off << scale), oi, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + clear_helper_retaddr(); +} + +static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc) / 8; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)((char *)vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra); + } + } + clear_helper_retaddr(); +} + +#define DO_ST1_ZPZ_S(MEM, OFS) \ +void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_st1##MEM##_tlb); \ +} + +#define DO_ST1_ZPZ_D(MEM, OFS) \ +void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_st1##MEM##_tlb); \ +} + +DO_ST1_ZPZ_S(bs, zsu) +DO_ST1_ZPZ_S(hs_le, zsu) +DO_ST1_ZPZ_S(hs_be, zsu) +DO_ST1_ZPZ_S(ss_le, zsu) +DO_ST1_ZPZ_S(ss_be, zsu) + +DO_ST1_ZPZ_S(bs, zss) +DO_ST1_ZPZ_S(hs_le, zss) +DO_ST1_ZPZ_S(hs_be, zss) +DO_ST1_ZPZ_S(ss_le, zss) +DO_ST1_ZPZ_S(ss_be, zss) + +DO_ST1_ZPZ_D(bd, zsu) +DO_ST1_ZPZ_D(hd_le, zsu) +DO_ST1_ZPZ_D(hd_be, zsu) +DO_ST1_ZPZ_D(sd_le, zsu) +DO_ST1_ZPZ_D(sd_be, zsu) +DO_ST1_ZPZ_D(dd_le, zsu) +DO_ST1_ZPZ_D(dd_be, zsu) + +DO_ST1_ZPZ_D(bd, zss) +DO_ST1_ZPZ_D(hd_le, zss) +DO_ST1_ZPZ_D(hd_be, zss) +DO_ST1_ZPZ_D(sd_le, zss) +DO_ST1_ZPZ_D(sd_be, zss) +DO_ST1_ZPZ_D(dd_le, zss) +DO_ST1_ZPZ_D(dd_be, zss) + +DO_ST1_ZPZ_D(bd, zd) +DO_ST1_ZPZ_D(hd_le, zd) +DO_ST1_ZPZ_D(hd_be, zd) +DO_ST1_ZPZ_D(sd_le, zd) +DO_ST1_ZPZ_D(sd_be, zd) +DO_ST1_ZPZ_D(dd_le, zd) +DO_ST1_ZPZ_D(dd_be, zd) + +#undef DO_ST1_ZPZ_S +#undef DO_ST1_ZPZ_D diff --git a/qemu/target/arm/cpu-param.h b/qemu/target/arm/cpu-param.h index 208858c700..9e7aaea79f 100644 --- a/qemu/target/arm/cpu-param.h +++ b/qemu/target/arm/cpu-param.h @@ -25,6 +25,6 @@ # define TARGET_PAGE_BITS_VARY # define TARGET_PAGE_BITS_MIN 10 -#define NB_MMU_MODES 12 +#define NB_MMU_MODES 11 #endif diff --git a/qemu/target/arm/cpu-qom.h b/qemu/target/arm/cpu-qom.h index 963a628d7c..bf5037d346 100644 --- a/qemu/target/arm/cpu-qom.h +++ b/qemu/target/arm/cpu-qom.h @@ -32,7 +32,14 @@ struct arm_boot_info; #define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU -typedef struct ARMCPUInfo ARMCPUInfo; +typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(struct uc_struct *uc, CPUState *obj); + void (*class_init)(struct uc_struct *uc, CPUClass *oc, void *data); +} ARMCPUInfo; + +void arm_cpu_register(const ARMCPUInfo *info); +void aarch64_cpu_register(const ARMCPUInfo *info); /** * ARMCPUClass: diff --git a/qemu/target/arm/cpu.c b/qemu/target/arm/cpu.c index 7613381980..e0c59cf0f1 100644 --- a/qemu/target/arm/cpu.c +++ b/qemu/target/arm/cpu.c @@ -596,16 +596,6 @@ void arm_cpu_update_vfiq(ARMCPU *cpu) } } -static inline void set_feature(CPUARMState *env, int feature) -{ - env->features |= 1ULL << feature; -} - -static inline void unset_feature(CPUARMState *env, int feature) -{ - env->features &= ~(1ULL << feature); -} - static uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz) { uint32_t Aff1 = idx / clustersz; @@ -2003,6 +1993,7 @@ static void arm_max_initfn(struct uc_struct *uc, CPUState *obj) FIELD_DP32(t, ID_MMFR4, HPDS, 1, t); /* AA32HPD */ FIELD_DP32(t, ID_MMFR4, AC2, 1, t); /* ACTLR2, HACTLR2 */ FIELD_DP32(t, ID_MMFR4, CNP, 1, t); /* TTCNP */ + FIELD_DP32(t, ID_MMFR4, XNX, 1, t); /* TTS2UXN */ cpu->isar.id_mmfr4 = t; } //#endif @@ -2012,12 +2003,6 @@ static void arm_max_initfn(struct uc_struct *uc, CPUState *obj) #endif /* !defined(TARGET_AARCH64) */ -struct ARMCPUInfo { - const char *name; - void (*initfn)(struct uc_struct *uc, CPUState *obj); - void (*class_init)(struct uc_struct *uc, CPUClass *oc, void *data); -}; - #if !defined(TARGET_AARCH64) static struct ARMCPUInfo arm_cpus[] = { { "arm926", arm926_initfn }, diff --git a/qemu/target/arm/cpu.h b/qemu/target/arm/cpu.h index f857850cfc..794c5ab05b 100644 --- a/qemu/target/arm/cpu.h +++ b/qemu/target/arm/cpu.h @@ -480,6 +480,9 @@ typedef struct CPUARMState { uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */ uint64_t vpidr_el2; /* Virtualization Processor ID Register */ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */ + uint64_t tfsr_el[4]; /* tfsre0_el1 is index 0. */ + uint64_t gcr_el1; + uint64_t rgsr_el1; } cp15; struct { @@ -548,6 +551,8 @@ typedef struct CPUARMState { uint64_t esr; } serror; + uint8_t ext_dabt_raised; /* Tracking/verifying injection of ext DABT */ + /* State of our input IRQ/FIQ/VIRQ/VFIQ lines */ uint32_t irq_line_state; @@ -680,6 +685,16 @@ typedef struct CPUARMState { struct uc_struct *uc; } CPUARMState; +static inline void set_feature(CPUARMState *env, int feature) +{ + env->features |= 1ULL << feature; +} + +static inline void unset_feature(CPUARMState *env, int feature) +{ + env->features &= ~(1ULL << feature); +} + /** * ARMELChangeHookFn: * type of a function which can be registered via arm_register_el_change_hook() @@ -757,6 +772,10 @@ struct ARMCPU { /* MemoryRegion to use for secure physical accesses */ MemoryRegion *secure_memory; + /* MemoryRegion to use for allocation tag accesses */ + MemoryRegion *tag_memory; + MemoryRegion *secure_tag_memory; + /* For v8M, pointer to the IDAU interface provided by board/SoC */ void *idau; @@ -858,7 +877,7 @@ struct ARMCPU { uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; } isar; - uint32_t midr; + uint64_t midr; uint32_t revidr; uint32_t reset_fpsid; uint32_t ctr; @@ -1152,7 +1171,7 @@ void pmu_init(ARMCPU *cpu); #define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \ | CPSR_NZCV) /* Bits writable in user mode. */ -#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE) +#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE | CPSR_E) /* Execution state bits. MRS read as zero, MSR writes ignored. */ #define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J | CPSR_IL) @@ -1204,6 +1223,7 @@ void pmu_init(ARMCPU *cpu); #define PSTATE_SS (1U << 21) #define PSTATE_PAN (1U << 22) #define PSTATE_UAO (1U << 23) +#define PSTATE_TCO (1U << 25) #define PSTATE_V (1U << 28) #define PSTATE_C (1U << 29) #define PSTATE_Z (1U << 30) @@ -2327,7 +2347,7 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) * migration or KVM state synchronization. (Typically this is for "registers" * which are actually used as instructions for cache maintenance and so on.) * IO indicates that this register does I/O and therefore its accesses - * need to be surrounded by gen_io_start()/gen_io_end(). In particular, + * need to be marked with gen_io_start() and also end the TB. In particular, * registers which implement clocks or timers require this. * RAISES_EXC is for when the read or write hook might raise an exception; * the generated code will synchronize the CPU state before calling the hook @@ -2349,7 +2369,9 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) #define ARM_CP_NZCV (ARM_CP_SPECIAL | 0x0300) #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | 0x0400) #define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500) -#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA +#define ARM_CP_DC_GVA (ARM_CP_SPECIAL | 0x0600) +#define ARM_CP_DC_GZVA (ARM_CP_SPECIAL | 0x0700) +#define ARM_LAST_SPECIAL ARM_CP_DC_GZVA #define ARM_CP_FPU 0x1000 #define ARM_CP_SVE 0x2000 #define ARM_CP_NO_GDB 0x4000 @@ -2804,6 +2826,9 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * handling via the TLB. The only way to do a stage 1 translation without * the immediate stage 2 translation is via the ATS or AT system insns, * which can be slow-pathed and always do a page table walk. + * The only use of stage 2 translations is either as part of an s1+2 + * lookup or when loading the descriptors during a stage 1 page table walk, + * and in both those cases we don't use the TLB. * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" * translation regimes, because they map reasonably well to each other * and they can't both be active at the same time. @@ -2819,15 +2844,15 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * NS EL1 EL1&0 stage 1+2 (aka NS PL1) * NS EL1 EL1&0 stage 1+2 +PAN * NS EL0 EL2&0 + * NS EL2 EL2&0 * NS EL2 EL2&0 +PAN * NS EL2 (aka NS PL2) * S EL0 EL1&0 (aka S PL0) * S EL1 EL1&0 (not used if EL3 is 32 bit) * S EL1 EL1&0 +PAN * S EL3 (aka S PL1) - * NS EL1&0 stage 2 * - * for a total of 12 different mmu_idx. + * for a total of 11 different mmu_idx. * * R profile CPUs have an MPU, but can use the same set of MMU indexes * as A profile. They only need to distinguish NS EL0 and NS EL1 (and @@ -2850,8 +2875,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * vs A/R profile) would like to use MMU indexes with different semantics, * but since we don't ever need to use all of those in a single CPU we * can avoid setting NB_MMU_MODES to more than 8. The lower bits of - * ARMMMUIdx are the core TLB mmu index, and the higher bits are always - * the same for any particular CPU. + * can avoid having to set NB_MMU_MODES to "total number of A profile MMU + * modes + total number of M profile MMU modes". The lower bits of * Variables of type ARMMUIdx are always full values, and the core * index values are in variables of type 'int'. * @@ -2897,8 +2922,6 @@ typedef enum ARMMMUIdx { ARMMMUIdx_SE10_1_PAN = 9 | ARM_MMU_IDX_A, ARMMMUIdx_SE3 = 10 | ARM_MMU_IDX_A, - ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A, - /* * These are not allocated TLBs and are used only for AT system * instructions or for the first stage of an S12 page table walk. @@ -2906,6 +2929,14 @@ typedef enum ARMMMUIdx { ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, + /* + * Not allocated a TLB: used only for second stage of an S12 page + * table walk, or for descriptor loads during first stage of an S1 + * page table walk. Note that if we ever want to have a TLB for this + * then various TLB flush insns which currently are no-ops or flush + * only stage 1 MMU indexes will need to change to flush stage 2. + */ + ARMMMUIdx_Stage2 = 3 | ARM_MMU_IDX_NOTLB, /* * M-profile. @@ -2939,7 +2970,6 @@ typedef enum ARMMMUIdxBit { TO_CORE_BIT(SE10_1), TO_CORE_BIT(SE10_1_PAN), TO_CORE_BIT(SE3), - TO_CORE_BIT(Stage2), TO_CORE_BIT(MUser), TO_CORE_BIT(MPriv), @@ -2959,6 +2989,8 @@ typedef enum ARMMMUIdxBit { typedef enum ARMASIdx { ARMASIdx_NS = 0, ARMASIdx_S = 1, + ARMASIdx_TagNS = 2, + ARMASIdx_TagS = 3, } ARMASIdx; /* Return the Exception Level targeted by debug exceptions. */ @@ -3145,10 +3177,10 @@ typedef ARMCPU ArchCPU; * | | | TBFLAG_A32 | | * | | +-----+----------+ TBFLAG_AM32 | * | TBFLAG_ANY | |TBFLAG_M32| | - * | | +-+----------+--------------| - * | | | TBFLAG_A64 | - * +--------------+---------+---------------------------+ - * 31 20 15 0 + * | +-----------+----------+--------------| + * | | TBFLAG_A64 | + * +--------------+-------------------------------------+ + * 31 20 0 * * Unless otherwise noted, these bits are cached in env->hflags. */ @@ -3215,6 +3247,10 @@ FIELD(TBFLAG_A64, BT, 9, 1) FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */ FIELD(TBFLAG_A64, TBID, 12, 2) FIELD(TBFLAG_A64, UNPRIV, 14, 1) +FIELD(TBFLAG_A64, ATA, 15, 1) +FIELD(TBFLAG_A64, TCMA, 16, 2) +FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1) +FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) /** * cpu_mmu_index: @@ -3300,6 +3336,20 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) /* Shared between translate-sve.c and sve_helper.c. */ extern const uint64_t pred_esz_masks[4]; +/* Helper for the macros below, validating the argument type. */ +static inline MemTxAttrs *typecheck_memtxattrs(MemTxAttrs *x) +{ + return x; +} + +/* + * Lvalue macros for ARM TLB bits that we must cache in the TCG TLB. + * Using these should be a bit more self-documenting than using the + * generic target bits directly. + */ +#define arm_tlb_bti_gp(x) (typecheck_memtxattrs(x)->target_tlb_bit0) +#define arm_tlb_mte_tagged(x) (typecheck_memtxattrs(x)->target_tlb_bit1) + /* * Naming convention for isar_feature functions: * Functions which test 32-bit ID registers should have _aa32_ in @@ -3539,6 +3589,11 @@ static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id) return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0; } +static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; +} + /* * 64-bit feature tests via id registers. */ @@ -3727,6 +3782,18 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; } +static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; +} + +static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; +} + + + static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && @@ -3754,6 +3821,11 @@ static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; } +static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; +} + /* * Feature tests for "does this exist in either 32-bit or 64-bit?" */ @@ -3782,6 +3854,11 @@ static inline bool isar_feature_any_ccidx(const ARMISARegisters *id) return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id); } +static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id) +{ + return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id); +} + /* * Forward to the above feature tests given an ARMCPU pointer. */ diff --git a/qemu/target/arm/cpu64.c b/qemu/target/arm/cpu64.c index 3c57a52aee..b012e5d185 100644 --- a/qemu/target/arm/cpu64.c +++ b/qemu/target/arm/cpu64.c @@ -28,12 +28,6 @@ void arm_cpu_post_init(CPUState *obj); void arm_cpu_initfn(struct uc_struct *uc, CPUState *obj); ARMCPU *cpu_arm_init(struct uc_struct *uc); - -static inline void set_feature(CPUARMState *env, int feature) -{ - env->features |= 1ULL << feature; -} - static void aarch64_a57_initfn(struct uc_struct *uc, CPUState *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -262,6 +256,7 @@ static void aarch64_max_initfn(struct uc_struct *uc, CPUState *obj) FIELD_DP64(t, ID_AA64MMFR1, VH, 1, t); FIELD_DP64(t, ID_AA64MMFR1, PAN, 2, t); /* ATS1E1 */ FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2, t); /* VMID16 */ + FIELD_DP64(t, ID_AA64MMFR1, XNX, 1, t); /* TTS2UXN */ cpu->isar.id_aa64mmfr1 = t; t = cpu->isar.id_aa64mmfr2; @@ -295,22 +290,18 @@ static void aarch64_max_initfn(struct uc_struct *uc, CPUState *obj) FIELD_DP32(u, ID_MMFR4, HPDS, 1, u); /* AA32HPD */ FIELD_DP32(u, ID_MMFR4, AC2, 1, u); /* ACTLR2, HACTLR2 */ FIELD_DP32(u, ID_MMFR4, CNP, 1, u); /* TTCNP */ + FIELD_DP32(u, ID_MMFR4, XNX, 1, t); /* TTS2UXN */ cpu->isar.id_mmfr4 = u; - u = cpu->isar.id_aa64dfr0; - FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5, u); /* v8.4-PMU */ - cpu->isar.id_aa64dfr0 = u; + t = cpu->isar.id_aa64dfr0; + FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5, t); /* v8.4-PMU */ + cpu->isar.id_aa64dfr0 = t; u = cpu->isar.id_dfr0; FIELD_DP32(u, ID_DFR0, PERFMON, 5, u); /* v8.4-PMU */ cpu->isar.id_dfr0 = u; } -struct ARMCPUInfo { - const char *name; - void (*initfn)(struct uc_struct *uc, CPUState *obj); -}; - static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, diff --git a/qemu/target/arm/crypto_helper.c b/qemu/target/arm/crypto_helper.c index 117be6f89f..137e776059 100644 --- a/qemu/target/arm/crypto_helper.c +++ b/qemu/target/arm/crypto_helper.c @@ -13,7 +13,9 @@ #include "cpu.h" #include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" #include "crypto/aes.h" +#include "vec_internal.h" union CRYPTO_STATE { uint8_t bytes[16]; @@ -22,25 +24,35 @@ union CRYPTO_STATE { }; #ifdef HOST_WORDS_BIGENDIAN -#define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8]) -#define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2]) +#define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) +#define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) #else -#define CR_ST_BYTE(state, i) (state.bytes[i]) -#define CR_ST_WORD(state, i) (state.words[i]) +#define CR_ST_BYTE(state, i) ((state).bytes[i]) +#define CR_ST_WORD(state, i) ((state).words[i]) #endif -void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) +/* + * The caller has not been converted to full gvec, and so only + * modifies the low 16 bytes of the vector register. + */ +static void clear_tail_16(void *vd, uint32_t desc) +{ + int opr_sz = simd_oprsz(desc); + int max_sz = simd_maxsz(desc); + + assert(opr_sz == 16); + clear_tail(vd, opr_sz, max_sz); +} + +static void do_crypto_aese(uint64_t *rd, uint64_t *rn, + uint64_t *rm, bool decrypt) { static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; - uint64_t *rd = vd; - uint64_t *rm = vm; union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; - union CRYPTO_STATE st = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; int i; - assert(decrypt < 2); - /* xor state vector with round key */ rk.l[0] ^= st.l[0]; rk.l[1] ^= st.l[1]; @@ -54,7 +66,18 @@ void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } -void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) +void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aese((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i), decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) { static uint32_t const mc[][256] = { { /* MixColumns lookup table */ @@ -190,13 +213,9 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, } }; - uint64_t *rd = vd; - uint64_t *rm = vm; union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; int i; - assert(decrypt < 2); - for (i = 0; i < 16; i += 4) { CR_ST_WORD(st, i >> 2) = mc[decrypt][CR_ST_BYTE(st, i)] ^ @@ -209,6 +228,17 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } +void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aesmc((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vm + i), decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + /* * SHA-1 logical functions */ @@ -228,52 +258,78 @@ static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) return (x & y) | ((x | y) & z); } -void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) +void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) +{ + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t d0, d1; + + d0 = d[1] ^ d[0] ^ m[0]; + d1 = n[0] ^ d[1] ^ m[1]; + d[0] = d0; + d[1] = d1; + + clear_tail_16(vd, desc); +} + + +static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, + uint64_t *rm, uint32_t desc, + uint32_t (*fn)(union CRYPTO_STATE *d)) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + int i; - if (op == 3) { /* sha1su0 */ - d.l[0] ^= d.l[1] ^ m.l[0]; - d.l[1] ^= n.l[0] ^ m.l[1]; - } else { - int i; - - for (i = 0; i < 4; i++) { - uint32_t t = 0; - - switch (op) { - case 0: /* sha1c */ - t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - case 1: /* sha1p */ - t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - case 2: /* sha1m */ - t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - default: - g_assert_not_reached(); - } - t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) - + CR_ST_WORD(m, i); - - CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); - CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); - CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); - CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); - CR_ST_WORD(d, 0) = t; - } + for (i = 0; i < 4; i++) { + uint32_t t = fn(&d); + + t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) + + CR_ST_WORD(m, i); + + CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); + CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); + CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); + CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); + CR_ST_WORD(d, 0) = t; } rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(rd, desc); +} + +static uint32_t do_sha1c(union CRYPTO_STATE *d) +{ + return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); +} + +static uint32_t do_sha1p(union CRYPTO_STATE *d) +{ + return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); } -void HELPER(crypto_sha1h)(void *vd, void *vm) +void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); +} + +static uint32_t do_sha1m(union CRYPTO_STATE *d) +{ + return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); +} + +void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -284,9 +340,11 @@ void HELPER(crypto_sha1h)(void *vd, void *vm) rd[0] = m.l[0]; rd[1] = m.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha1su1)(void *vd, void *vm) +void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -300,6 +358,8 @@ void HELPER(crypto_sha1su1)(void *vd, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } /* @@ -327,7 +387,7 @@ static uint32_t s1(uint32_t x) return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); } -void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -358,9 +418,11 @@ void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -383,9 +445,11 @@ void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256su0)(void *vd, void *vm) +void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -399,9 +463,11 @@ void HELPER(crypto_sha256su0)(void *vd, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -417,6 +483,8 @@ void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } /* @@ -453,7 +521,7 @@ static uint64_t s1_512(uint64_t x) return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); } -void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -466,9 +534,11 @@ void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -481,9 +551,11 @@ void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(rd, desc); } -void HELPER(crypto_sha512su0)(void *vd, void *vn) +void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -495,9 +567,11 @@ void HELPER(crypto_sha512su0)(void *vd, void *vn) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -505,9 +579,11 @@ void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) rd[0] += s1_512(rn[0]) + rm[0]; rd[1] += s1_512(rn[1]) + rm[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -531,9 +607,11 @@ void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -551,17 +629,18 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, - uint32_t opcode) +static inline void QEMU_ALWAYS_INLINE +crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, + uint32_t desc, uint32_t opcode) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + uint32_t imm2 = simd_data(desc); uint32_t t = 0; assert(imm2 < 4); @@ -576,7 +655,7 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, /* SM3TT2B */ t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); } else { - g_assert_not_reached(); + qemu_build_not_reached(); } t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); @@ -601,8 +680,21 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(rd, desc); } +#define DO_SM3TT(NAME, OPCODE) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } + +DO_SM3TT(crypto_sm3tt1a, 0) +DO_SM3TT(crypto_sm3tt1b, 1) +DO_SM3TT(crypto_sm3tt2a, 2) +DO_SM3TT(crypto_sm3tt2b, 3) + +#undef DO_SM3TT + static uint8_t const sm4_sbox[] = { 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, @@ -638,12 +730,10 @@ static uint8_t const sm4_sbox[] = { 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, }; -void HELPER(crypto_sm4e)(void *vd, void *vn) +static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; - union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; uint32_t t, i; for (i = 0; i < 4; i++) { @@ -665,11 +755,18 @@ void HELPER(crypto_sm4e)(void *vd, void *vn) rd[1] = d.l[1]; } -void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) +void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4e((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i)); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; @@ -693,3 +790,24 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) rd[0] = d.l[0]; rd[1] = d.l[1]; } + +void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4ekey((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i)); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] = n[i] ^ rol64(m[i], 1); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} diff --git a/qemu/target/arm/decode-a32.inc.c b/qemu/target/arm/decode-a32.inc.c index cf6c644a83..a1b30a2caa 100644 --- a/qemu/target/arm/decode-a32.inc.c +++ b/qemu/target/arm/decode-a32.inc.c @@ -21,9 +21,7 @@ typedef struct { } arg_disas_a3226; typedef struct { -#ifdef _MSC_VER - int dummy; -#endif + int : 0; } arg_empty; typedef struct { diff --git a/qemu/target/arm/decode-neon-dp.inc.c b/qemu/target/arm/decode-neon-dp.inc.c new file mode 100644 index 0000000000..c09bfb5e55 --- /dev/null +++ b/qemu/target/arm/decode-neon-dp.inc.c @@ -0,0 +1,2806 @@ +/* This file is autogenerated by scripts/decodetree.py. */ + +typedef struct { + int cmode; + int imm; + int op; + int q; + int vd; +} arg_1reg_imm; + +typedef struct { + int q; + int size; + int vd; + int vm; +} arg_2misc; + +typedef struct { + int q; + int shift; + int size; + int vd; + int vm; +} arg_2reg_shift; + +typedef struct { + int q; + int size; + int vd; + int vm; + int vn; +} arg_2scalar; + +typedef struct { + int size; + int vd; + int vm; + int vn; +} arg_3diff; + +typedef struct { + int q; + int size; + int vd; + int vm; + int vn; +} arg_3same; + +typedef struct { + int imm; + int q; + int vd; + int vm; + int vn; +} arg_disas_neon_dp3; + +typedef struct { + int len; + int op; + int vd; + int vm; + int vn; +} arg_disas_neon_dp4; + +typedef struct { + int index; + int q; + int size; + int vd; + int vm; +} arg_disas_neon_dp5; + +typedef arg_3same arg_VHADD_S_3s; +static bool trans_VHADD_S_3s(DisasContext *ctx, arg_VHADD_S_3s *a); +typedef arg_3same arg_VHADD_U_3s; +static bool trans_VHADD_U_3s(DisasContext *ctx, arg_VHADD_U_3s *a); +typedef arg_3same arg_VQADD_S_3s; +static bool trans_VQADD_S_3s(DisasContext *ctx, arg_VQADD_S_3s *a); +typedef arg_3same arg_VQADD_U_3s; +static bool trans_VQADD_U_3s(DisasContext *ctx, arg_VQADD_U_3s *a); +typedef arg_3same arg_VRHADD_S_3s; +static bool trans_VRHADD_S_3s(DisasContext *ctx, arg_VRHADD_S_3s *a); +typedef arg_3same arg_VRHADD_U_3s; +static bool trans_VRHADD_U_3s(DisasContext *ctx, arg_VRHADD_U_3s *a); +typedef arg_3same arg_VAND_3s; +static bool trans_VAND_3s(DisasContext *ctx, arg_VAND_3s *a); +typedef arg_3same arg_VBIC_3s; +static bool trans_VBIC_3s(DisasContext *ctx, arg_VBIC_3s *a); +typedef arg_3same arg_VORR_3s; +static bool trans_VORR_3s(DisasContext *ctx, arg_VORR_3s *a); +typedef arg_3same arg_VORN_3s; +static bool trans_VORN_3s(DisasContext *ctx, arg_VORN_3s *a); +typedef arg_3same arg_VEOR_3s; +static bool trans_VEOR_3s(DisasContext *ctx, arg_VEOR_3s *a); +typedef arg_3same arg_VBSL_3s; +static bool trans_VBSL_3s(DisasContext *ctx, arg_VBSL_3s *a); +typedef arg_3same arg_VBIT_3s; +static bool trans_VBIT_3s(DisasContext *ctx, arg_VBIT_3s *a); +typedef arg_3same arg_VBIF_3s; +static bool trans_VBIF_3s(DisasContext *ctx, arg_VBIF_3s *a); +typedef arg_3same arg_VHSUB_S_3s; +static bool trans_VHSUB_S_3s(DisasContext *ctx, arg_VHSUB_S_3s *a); +typedef arg_3same arg_VHSUB_U_3s; +static bool trans_VHSUB_U_3s(DisasContext *ctx, arg_VHSUB_U_3s *a); +typedef arg_3same arg_VQSUB_S_3s; +static bool trans_VQSUB_S_3s(DisasContext *ctx, arg_VQSUB_S_3s *a); +typedef arg_3same arg_VQSUB_U_3s; +static bool trans_VQSUB_U_3s(DisasContext *ctx, arg_VQSUB_U_3s *a); +typedef arg_3same arg_VCGT_S_3s; +static bool trans_VCGT_S_3s(DisasContext *ctx, arg_VCGT_S_3s *a); +typedef arg_3same arg_VCGT_U_3s; +static bool trans_VCGT_U_3s(DisasContext *ctx, arg_VCGT_U_3s *a); +typedef arg_3same arg_VCGE_S_3s; +static bool trans_VCGE_S_3s(DisasContext *ctx, arg_VCGE_S_3s *a); +typedef arg_3same arg_VCGE_U_3s; +static bool trans_VCGE_U_3s(DisasContext *ctx, arg_VCGE_U_3s *a); +typedef arg_3same arg_VSHL_S_3s; +static bool trans_VSHL_S_3s(DisasContext *ctx, arg_VSHL_S_3s *a); +typedef arg_3same arg_VSHL_U_3s; +static bool trans_VSHL_U_3s(DisasContext *ctx, arg_VSHL_U_3s *a); +typedef arg_3same arg_VQSHL_S64_3s; +static bool trans_VQSHL_S64_3s(DisasContext *ctx, arg_VQSHL_S64_3s *a); +typedef arg_3same arg_VQSHL_S_3s; +static bool trans_VQSHL_S_3s(DisasContext *ctx, arg_VQSHL_S_3s *a); +typedef arg_3same arg_VQSHL_U64_3s; +static bool trans_VQSHL_U64_3s(DisasContext *ctx, arg_VQSHL_U64_3s *a); +typedef arg_3same arg_VQSHL_U_3s; +static bool trans_VQSHL_U_3s(DisasContext *ctx, arg_VQSHL_U_3s *a); +typedef arg_3same arg_VRSHL_S64_3s; +static bool trans_VRSHL_S64_3s(DisasContext *ctx, arg_VRSHL_S64_3s *a); +typedef arg_3same arg_VRSHL_S_3s; +static bool trans_VRSHL_S_3s(DisasContext *ctx, arg_VRSHL_S_3s *a); +typedef arg_3same arg_VRSHL_U64_3s; +static bool trans_VRSHL_U64_3s(DisasContext *ctx, arg_VRSHL_U64_3s *a); +typedef arg_3same arg_VRSHL_U_3s; +static bool trans_VRSHL_U_3s(DisasContext *ctx, arg_VRSHL_U_3s *a); +typedef arg_3same arg_VQRSHL_S64_3s; +static bool trans_VQRSHL_S64_3s(DisasContext *ctx, arg_VQRSHL_S64_3s *a); +typedef arg_3same arg_VQRSHL_S_3s; +static bool trans_VQRSHL_S_3s(DisasContext *ctx, arg_VQRSHL_S_3s *a); +typedef arg_3same arg_VQRSHL_U64_3s; +static bool trans_VQRSHL_U64_3s(DisasContext *ctx, arg_VQRSHL_U64_3s *a); +typedef arg_3same arg_VQRSHL_U_3s; +static bool trans_VQRSHL_U_3s(DisasContext *ctx, arg_VQRSHL_U_3s *a); +typedef arg_3same arg_VMAX_S_3s; +static bool trans_VMAX_S_3s(DisasContext *ctx, arg_VMAX_S_3s *a); +typedef arg_3same arg_VMAX_U_3s; +static bool trans_VMAX_U_3s(DisasContext *ctx, arg_VMAX_U_3s *a); +typedef arg_3same arg_VMIN_S_3s; +static bool trans_VMIN_S_3s(DisasContext *ctx, arg_VMIN_S_3s *a); +typedef arg_3same arg_VMIN_U_3s; +static bool trans_VMIN_U_3s(DisasContext *ctx, arg_VMIN_U_3s *a); +typedef arg_3same arg_VABD_S_3s; +static bool trans_VABD_S_3s(DisasContext *ctx, arg_VABD_S_3s *a); +typedef arg_3same arg_VABD_U_3s; +static bool trans_VABD_U_3s(DisasContext *ctx, arg_VABD_U_3s *a); +typedef arg_3same arg_VABA_S_3s; +static bool trans_VABA_S_3s(DisasContext *ctx, arg_VABA_S_3s *a); +typedef arg_3same arg_VABA_U_3s; +static bool trans_VABA_U_3s(DisasContext *ctx, arg_VABA_U_3s *a); +typedef arg_3same arg_VADD_3s; +static bool trans_VADD_3s(DisasContext *ctx, arg_VADD_3s *a); +typedef arg_3same arg_VSUB_3s; +static bool trans_VSUB_3s(DisasContext *ctx, arg_VSUB_3s *a); +typedef arg_3same arg_VTST_3s; +static bool trans_VTST_3s(DisasContext *ctx, arg_VTST_3s *a); +typedef arg_3same arg_VCEQ_3s; +static bool trans_VCEQ_3s(DisasContext *ctx, arg_VCEQ_3s *a); +typedef arg_3same arg_VMLA_3s; +static bool trans_VMLA_3s(DisasContext *ctx, arg_VMLA_3s *a); +typedef arg_3same arg_VMLS_3s; +static bool trans_VMLS_3s(DisasContext *ctx, arg_VMLS_3s *a); +typedef arg_3same arg_VMUL_3s; +static bool trans_VMUL_3s(DisasContext *ctx, arg_VMUL_3s *a); +typedef arg_3same arg_VMUL_p_3s; +static bool trans_VMUL_p_3s(DisasContext *ctx, arg_VMUL_p_3s *a); +typedef arg_3same arg_VPMAX_S_3s; +static bool trans_VPMAX_S_3s(DisasContext *ctx, arg_VPMAX_S_3s *a); +typedef arg_3same arg_VPMAX_U_3s; +static bool trans_VPMAX_U_3s(DisasContext *ctx, arg_VPMAX_U_3s *a); +typedef arg_3same arg_VPMIN_S_3s; +static bool trans_VPMIN_S_3s(DisasContext *ctx, arg_VPMIN_S_3s *a); +typedef arg_3same arg_VPMIN_U_3s; +static bool trans_VPMIN_U_3s(DisasContext *ctx, arg_VPMIN_U_3s *a); +typedef arg_3same arg_VQDMULH_3s; +static bool trans_VQDMULH_3s(DisasContext *ctx, arg_VQDMULH_3s *a); +typedef arg_3same arg_VQRDMULH_3s; +static bool trans_VQRDMULH_3s(DisasContext *ctx, arg_VQRDMULH_3s *a); +typedef arg_3same arg_VPADD_3s; +static bool trans_VPADD_3s(DisasContext *ctx, arg_VPADD_3s *a); +typedef arg_3same arg_VQRDMLAH_3s; +static bool trans_VQRDMLAH_3s(DisasContext *ctx, arg_VQRDMLAH_3s *a); +typedef arg_3same arg_SHA1C_3s; +static bool trans_SHA1C_3s(DisasContext *ctx, arg_SHA1C_3s *a); +typedef arg_3same arg_SHA1P_3s; +static bool trans_SHA1P_3s(DisasContext *ctx, arg_SHA1P_3s *a); +typedef arg_3same arg_SHA1M_3s; +static bool trans_SHA1M_3s(DisasContext *ctx, arg_SHA1M_3s *a); +typedef arg_3same arg_SHA1SU0_3s; +static bool trans_SHA1SU0_3s(DisasContext *ctx, arg_SHA1SU0_3s *a); +typedef arg_3same arg_SHA256H_3s; +static bool trans_SHA256H_3s(DisasContext *ctx, arg_SHA256H_3s *a); +typedef arg_3same arg_SHA256H2_3s; +static bool trans_SHA256H2_3s(DisasContext *ctx, arg_SHA256H2_3s *a); +typedef arg_3same arg_SHA256SU1_3s; +static bool trans_SHA256SU1_3s(DisasContext *ctx, arg_SHA256SU1_3s *a); +typedef arg_3same arg_VFMA_fp_3s; +static bool trans_VFMA_fp_3s(DisasContext *ctx, arg_VFMA_fp_3s *a); +typedef arg_3same arg_VFMS_fp_3s; +static bool trans_VFMS_fp_3s(DisasContext *ctx, arg_VFMS_fp_3s *a); +typedef arg_3same arg_VQRDMLSH_3s; +static bool trans_VQRDMLSH_3s(DisasContext *ctx, arg_VQRDMLSH_3s *a); +typedef arg_3same arg_VADD_fp_3s; +static bool trans_VADD_fp_3s(DisasContext *ctx, arg_VADD_fp_3s *a); +typedef arg_3same arg_VSUB_fp_3s; +static bool trans_VSUB_fp_3s(DisasContext *ctx, arg_VSUB_fp_3s *a); +typedef arg_3same arg_VPADD_fp_3s; +static bool trans_VPADD_fp_3s(DisasContext *ctx, arg_VPADD_fp_3s *a); +typedef arg_3same arg_VABD_fp_3s; +static bool trans_VABD_fp_3s(DisasContext *ctx, arg_VABD_fp_3s *a); +typedef arg_3same arg_VMLA_fp_3s; +static bool trans_VMLA_fp_3s(DisasContext *ctx, arg_VMLA_fp_3s *a); +typedef arg_3same arg_VMLS_fp_3s; +static bool trans_VMLS_fp_3s(DisasContext *ctx, arg_VMLS_fp_3s *a); +typedef arg_3same arg_VMUL_fp_3s; +static bool trans_VMUL_fp_3s(DisasContext *ctx, arg_VMUL_fp_3s *a); +typedef arg_3same arg_VCEQ_fp_3s; +static bool trans_VCEQ_fp_3s(DisasContext *ctx, arg_VCEQ_fp_3s *a); +typedef arg_3same arg_VCGE_fp_3s; +static bool trans_VCGE_fp_3s(DisasContext *ctx, arg_VCGE_fp_3s *a); +typedef arg_3same arg_VACGE_fp_3s; +static bool trans_VACGE_fp_3s(DisasContext *ctx, arg_VACGE_fp_3s *a); +typedef arg_3same arg_VCGT_fp_3s; +static bool trans_VCGT_fp_3s(DisasContext *ctx, arg_VCGT_fp_3s *a); +typedef arg_3same arg_VACGT_fp_3s; +static bool trans_VACGT_fp_3s(DisasContext *ctx, arg_VACGT_fp_3s *a); +typedef arg_3same arg_VMAX_fp_3s; +static bool trans_VMAX_fp_3s(DisasContext *ctx, arg_VMAX_fp_3s *a); +typedef arg_3same arg_VMIN_fp_3s; +static bool trans_VMIN_fp_3s(DisasContext *ctx, arg_VMIN_fp_3s *a); +typedef arg_3same arg_VPMAX_fp_3s; +static bool trans_VPMAX_fp_3s(DisasContext *ctx, arg_VPMAX_fp_3s *a); +typedef arg_3same arg_VPMIN_fp_3s; +static bool trans_VPMIN_fp_3s(DisasContext *ctx, arg_VPMIN_fp_3s *a); +typedef arg_3same arg_VRECPS_fp_3s; +static bool trans_VRECPS_fp_3s(DisasContext *ctx, arg_VRECPS_fp_3s *a); +typedef arg_3same arg_VRSQRTS_fp_3s; +static bool trans_VRSQRTS_fp_3s(DisasContext *ctx, arg_VRSQRTS_fp_3s *a); +typedef arg_3same arg_VMAXNM_fp_3s; +static bool trans_VMAXNM_fp_3s(DisasContext *ctx, arg_VMAXNM_fp_3s *a); +typedef arg_3same arg_VMINNM_fp_3s; +static bool trans_VMINNM_fp_3s(DisasContext *ctx, arg_VMINNM_fp_3s *a); +typedef arg_2reg_shift arg_VSHR_S_2sh; +static bool trans_VSHR_S_2sh(DisasContext *ctx, arg_VSHR_S_2sh *a); +typedef arg_2reg_shift arg_VSHR_U_2sh; +static bool trans_VSHR_U_2sh(DisasContext *ctx, arg_VSHR_U_2sh *a); +typedef arg_2reg_shift arg_VSRA_S_2sh; +static bool trans_VSRA_S_2sh(DisasContext *ctx, arg_VSRA_S_2sh *a); +typedef arg_2reg_shift arg_VSRA_U_2sh; +static bool trans_VSRA_U_2sh(DisasContext *ctx, arg_VSRA_U_2sh *a); +typedef arg_2reg_shift arg_VRSHR_S_2sh; +static bool trans_VRSHR_S_2sh(DisasContext *ctx, arg_VRSHR_S_2sh *a); +typedef arg_2reg_shift arg_VRSHR_U_2sh; +static bool trans_VRSHR_U_2sh(DisasContext *ctx, arg_VRSHR_U_2sh *a); +typedef arg_2reg_shift arg_VRSRA_S_2sh; +static bool trans_VRSRA_S_2sh(DisasContext *ctx, arg_VRSRA_S_2sh *a); +typedef arg_2reg_shift arg_VRSRA_U_2sh; +static bool trans_VRSRA_U_2sh(DisasContext *ctx, arg_VRSRA_U_2sh *a); +typedef arg_2reg_shift arg_VSRI_2sh; +static bool trans_VSRI_2sh(DisasContext *ctx, arg_VSRI_2sh *a); +typedef arg_2reg_shift arg_VSHL_2sh; +static bool trans_VSHL_2sh(DisasContext *ctx, arg_VSHL_2sh *a); +typedef arg_2reg_shift arg_VSLI_2sh; +static bool trans_VSLI_2sh(DisasContext *ctx, arg_VSLI_2sh *a); +typedef arg_2reg_shift arg_VQSHLU_64_2sh; +static bool trans_VQSHLU_64_2sh(DisasContext *ctx, arg_VQSHLU_64_2sh *a); +typedef arg_2reg_shift arg_VQSHLU_2sh; +static bool trans_VQSHLU_2sh(DisasContext *ctx, arg_VQSHLU_2sh *a); +typedef arg_2reg_shift arg_VQSHL_S_64_2sh; +static bool trans_VQSHL_S_64_2sh(DisasContext *ctx, arg_VQSHL_S_64_2sh *a); +typedef arg_2reg_shift arg_VQSHL_S_2sh; +static bool trans_VQSHL_S_2sh(DisasContext *ctx, arg_VQSHL_S_2sh *a); +typedef arg_2reg_shift arg_VQSHL_U_64_2sh; +static bool trans_VQSHL_U_64_2sh(DisasContext *ctx, arg_VQSHL_U_64_2sh *a); +typedef arg_2reg_shift arg_VQSHL_U_2sh; +static bool trans_VQSHL_U_2sh(DisasContext *ctx, arg_VQSHL_U_2sh *a); +typedef arg_2reg_shift arg_VSHRN_64_2sh; +static bool trans_VSHRN_64_2sh(DisasContext *ctx, arg_VSHRN_64_2sh *a); +typedef arg_2reg_shift arg_VSHRN_32_2sh; +static bool trans_VSHRN_32_2sh(DisasContext *ctx, arg_VSHRN_32_2sh *a); +typedef arg_2reg_shift arg_VSHRN_16_2sh; +static bool trans_VSHRN_16_2sh(DisasContext *ctx, arg_VSHRN_16_2sh *a); +typedef arg_2reg_shift arg_VRSHRN_64_2sh; +static bool trans_VRSHRN_64_2sh(DisasContext *ctx, arg_VRSHRN_64_2sh *a); +typedef arg_2reg_shift arg_VRSHRN_32_2sh; +static bool trans_VRSHRN_32_2sh(DisasContext *ctx, arg_VRSHRN_32_2sh *a); +typedef arg_2reg_shift arg_VRSHRN_16_2sh; +static bool trans_VRSHRN_16_2sh(DisasContext *ctx, arg_VRSHRN_16_2sh *a); +typedef arg_2reg_shift arg_VQSHRUN_64_2sh; +static bool trans_VQSHRUN_64_2sh(DisasContext *ctx, arg_VQSHRUN_64_2sh *a); +typedef arg_2reg_shift arg_VQSHRUN_32_2sh; +static bool trans_VQSHRUN_32_2sh(DisasContext *ctx, arg_VQSHRUN_32_2sh *a); +typedef arg_2reg_shift arg_VQSHRUN_16_2sh; +static bool trans_VQSHRUN_16_2sh(DisasContext *ctx, arg_VQSHRUN_16_2sh *a); +typedef arg_2reg_shift arg_VQRSHRUN_64_2sh; +static bool trans_VQRSHRUN_64_2sh(DisasContext *ctx, arg_VQRSHRUN_64_2sh *a); +typedef arg_2reg_shift arg_VQRSHRUN_32_2sh; +static bool trans_VQRSHRUN_32_2sh(DisasContext *ctx, arg_VQRSHRUN_32_2sh *a); +typedef arg_2reg_shift arg_VQRSHRUN_16_2sh; +static bool trans_VQRSHRUN_16_2sh(DisasContext *ctx, arg_VQRSHRUN_16_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_S64_2sh; +static bool trans_VQSHRN_S64_2sh(DisasContext *ctx, arg_VQSHRN_S64_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_S32_2sh; +static bool trans_VQSHRN_S32_2sh(DisasContext *ctx, arg_VQSHRN_S32_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_S16_2sh; +static bool trans_VQSHRN_S16_2sh(DisasContext *ctx, arg_VQSHRN_S16_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_S64_2sh; +static bool trans_VQRSHRN_S64_2sh(DisasContext *ctx, arg_VQRSHRN_S64_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_S32_2sh; +static bool trans_VQRSHRN_S32_2sh(DisasContext *ctx, arg_VQRSHRN_S32_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_S16_2sh; +static bool trans_VQRSHRN_S16_2sh(DisasContext *ctx, arg_VQRSHRN_S16_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_U64_2sh; +static bool trans_VQSHRN_U64_2sh(DisasContext *ctx, arg_VQSHRN_U64_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_U32_2sh; +static bool trans_VQSHRN_U32_2sh(DisasContext *ctx, arg_VQSHRN_U32_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_U16_2sh; +static bool trans_VQSHRN_U16_2sh(DisasContext *ctx, arg_VQSHRN_U16_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_U64_2sh; +static bool trans_VQRSHRN_U64_2sh(DisasContext *ctx, arg_VQRSHRN_U64_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_U32_2sh; +static bool trans_VQRSHRN_U32_2sh(DisasContext *ctx, arg_VQRSHRN_U32_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_U16_2sh; +static bool trans_VQRSHRN_U16_2sh(DisasContext *ctx, arg_VQRSHRN_U16_2sh *a); +typedef arg_2reg_shift arg_VSHLL_S_2sh; +static bool trans_VSHLL_S_2sh(DisasContext *ctx, arg_VSHLL_S_2sh *a); +typedef arg_2reg_shift arg_VSHLL_U_2sh; +static bool trans_VSHLL_U_2sh(DisasContext *ctx, arg_VSHLL_U_2sh *a); +typedef arg_2reg_shift arg_VCVT_SF_2sh; +static bool trans_VCVT_SF_2sh(DisasContext *ctx, arg_VCVT_SF_2sh *a); +typedef arg_2reg_shift arg_VCVT_UF_2sh; +static bool trans_VCVT_UF_2sh(DisasContext *ctx, arg_VCVT_UF_2sh *a); +typedef arg_2reg_shift arg_VCVT_FS_2sh; +static bool trans_VCVT_FS_2sh(DisasContext *ctx, arg_VCVT_FS_2sh *a); +typedef arg_2reg_shift arg_VCVT_FU_2sh; +static bool trans_VCVT_FU_2sh(DisasContext *ctx, arg_VCVT_FU_2sh *a); +typedef arg_1reg_imm arg_Vimm_1r; +static bool trans_Vimm_1r(DisasContext *ctx, arg_Vimm_1r *a); +typedef arg_disas_neon_dp3 arg_VEXT; +static bool trans_VEXT(DisasContext *ctx, arg_VEXT *a); +typedef arg_disas_neon_dp4 arg_VTBL; +static bool trans_VTBL(DisasContext *ctx, arg_VTBL *a); +typedef arg_disas_neon_dp5 arg_VDUP_scalar; +static bool trans_VDUP_scalar(DisasContext *ctx, arg_VDUP_scalar *a); +typedef arg_2misc arg_VREV64; +static bool trans_VREV64(DisasContext *ctx, arg_VREV64 *a); +typedef arg_2misc arg_VREV32; +static bool trans_VREV32(DisasContext *ctx, arg_VREV32 *a); +typedef arg_2misc arg_VREV16; +static bool trans_VREV16(DisasContext *ctx, arg_VREV16 *a); +typedef arg_2misc arg_VPADDL_S; +static bool trans_VPADDL_S(DisasContext *ctx, arg_VPADDL_S *a); +typedef arg_2misc arg_VPADDL_U; +static bool trans_VPADDL_U(DisasContext *ctx, arg_VPADDL_U *a); +typedef arg_2misc arg_AESE; +static bool trans_AESE(DisasContext *ctx, arg_AESE *a); +typedef arg_2misc arg_AESD; +static bool trans_AESD(DisasContext *ctx, arg_AESD *a); +typedef arg_2misc arg_AESMC; +static bool trans_AESMC(DisasContext *ctx, arg_AESMC *a); +typedef arg_2misc arg_AESIMC; +static bool trans_AESIMC(DisasContext *ctx, arg_AESIMC *a); +typedef arg_2misc arg_VCLS; +static bool trans_VCLS(DisasContext *ctx, arg_VCLS *a); +typedef arg_2misc arg_VCLZ; +static bool trans_VCLZ(DisasContext *ctx, arg_VCLZ *a); +typedef arg_2misc arg_VCNT; +static bool trans_VCNT(DisasContext *ctx, arg_VCNT *a); +typedef arg_2misc arg_VMVN; +static bool trans_VMVN(DisasContext *ctx, arg_VMVN *a); +typedef arg_2misc arg_VPADAL_S; +static bool trans_VPADAL_S(DisasContext *ctx, arg_VPADAL_S *a); +typedef arg_2misc arg_VPADAL_U; +static bool trans_VPADAL_U(DisasContext *ctx, arg_VPADAL_U *a); +typedef arg_2misc arg_VQABS; +static bool trans_VQABS(DisasContext *ctx, arg_VQABS *a); +typedef arg_2misc arg_VQNEG; +static bool trans_VQNEG(DisasContext *ctx, arg_VQNEG *a); +typedef arg_2misc arg_VCGT0; +static bool trans_VCGT0(DisasContext *ctx, arg_VCGT0 *a); +typedef arg_2misc arg_VCGE0; +static bool trans_VCGE0(DisasContext *ctx, arg_VCGE0 *a); +typedef arg_2misc arg_VCEQ0; +static bool trans_VCEQ0(DisasContext *ctx, arg_VCEQ0 *a); +typedef arg_2misc arg_VCLE0; +static bool trans_VCLE0(DisasContext *ctx, arg_VCLE0 *a); +typedef arg_2misc arg_VCLT0; +static bool trans_VCLT0(DisasContext *ctx, arg_VCLT0 *a); +typedef arg_2misc arg_SHA1H; +static bool trans_SHA1H(DisasContext *ctx, arg_SHA1H *a); +typedef arg_2misc arg_VABS; +static bool trans_VABS(DisasContext *ctx, arg_VABS *a); +typedef arg_2misc arg_VNEG; +static bool trans_VNEG(DisasContext *ctx, arg_VNEG *a); +typedef arg_2misc arg_VCGT0_F; +static bool trans_VCGT0_F(DisasContext *ctx, arg_VCGT0_F *a); +typedef arg_2misc arg_VCGE0_F; +static bool trans_VCGE0_F(DisasContext *ctx, arg_VCGE0_F *a); +typedef arg_2misc arg_VCEQ0_F; +static bool trans_VCEQ0_F(DisasContext *ctx, arg_VCEQ0_F *a); +typedef arg_2misc arg_VCLE0_F; +static bool trans_VCLE0_F(DisasContext *ctx, arg_VCLE0_F *a); +typedef arg_2misc arg_VCLT0_F; +static bool trans_VCLT0_F(DisasContext *ctx, arg_VCLT0_F *a); +typedef arg_2misc arg_VABS_F; +static bool trans_VABS_F(DisasContext *ctx, arg_VABS_F *a); +typedef arg_2misc arg_VNEG_F; +static bool trans_VNEG_F(DisasContext *ctx, arg_VNEG_F *a); +typedef arg_2misc arg_VSWP; +static bool trans_VSWP(DisasContext *ctx, arg_VSWP *a); +typedef arg_2misc arg_VTRN; +static bool trans_VTRN(DisasContext *ctx, arg_VTRN *a); +typedef arg_2misc arg_VUZP; +static bool trans_VUZP(DisasContext *ctx, arg_VUZP *a); +typedef arg_2misc arg_VZIP; +static bool trans_VZIP(DisasContext *ctx, arg_VZIP *a); +typedef arg_2misc arg_VMOVN; +static bool trans_VMOVN(DisasContext *ctx, arg_VMOVN *a); +typedef arg_2misc arg_VQMOVUN; +static bool trans_VQMOVUN(DisasContext *ctx, arg_VQMOVUN *a); +typedef arg_2misc arg_VQMOVN_S; +static bool trans_VQMOVN_S(DisasContext *ctx, arg_VQMOVN_S *a); +typedef arg_2misc arg_VQMOVN_U; +static bool trans_VQMOVN_U(DisasContext *ctx, arg_VQMOVN_U *a); +typedef arg_2misc arg_VSHLL; +static bool trans_VSHLL(DisasContext *ctx, arg_VSHLL *a); +typedef arg_2misc arg_SHA1SU1; +static bool trans_SHA1SU1(DisasContext *ctx, arg_SHA1SU1 *a); +typedef arg_2misc arg_SHA256SU0; +static bool trans_SHA256SU0(DisasContext *ctx, arg_SHA256SU0 *a); +typedef arg_2misc arg_VRINTN; +static bool trans_VRINTN(DisasContext *ctx, arg_VRINTN *a); +typedef arg_2misc arg_VRINTX; +static bool trans_VRINTX(DisasContext *ctx, arg_VRINTX *a); +typedef arg_2misc arg_VRINTA; +static bool trans_VRINTA(DisasContext *ctx, arg_VRINTA *a); +typedef arg_2misc arg_VRINTZ; +static bool trans_VRINTZ(DisasContext *ctx, arg_VRINTZ *a); +typedef arg_2misc arg_VCVT_F16_F32; +static bool trans_VCVT_F16_F32(DisasContext *ctx, arg_VCVT_F16_F32 *a); +typedef arg_2misc arg_VRINTM; +static bool trans_VRINTM(DisasContext *ctx, arg_VRINTM *a); +typedef arg_2misc arg_VCVT_F32_F16; +static bool trans_VCVT_F32_F16(DisasContext *ctx, arg_VCVT_F32_F16 *a); +typedef arg_2misc arg_VRINTP; +static bool trans_VRINTP(DisasContext *ctx, arg_VRINTP *a); +typedef arg_2misc arg_VCVTAS; +static bool trans_VCVTAS(DisasContext *ctx, arg_VCVTAS *a); +typedef arg_2misc arg_VCVTAU; +static bool trans_VCVTAU(DisasContext *ctx, arg_VCVTAU *a); +typedef arg_2misc arg_VCVTNS; +static bool trans_VCVTNS(DisasContext *ctx, arg_VCVTNS *a); +typedef arg_2misc arg_VCVTNU; +static bool trans_VCVTNU(DisasContext *ctx, arg_VCVTNU *a); +typedef arg_2misc arg_VCVTPS; +static bool trans_VCVTPS(DisasContext *ctx, arg_VCVTPS *a); +typedef arg_2misc arg_VCVTPU; +static bool trans_VCVTPU(DisasContext *ctx, arg_VCVTPU *a); +typedef arg_2misc arg_VCVTMS; +static bool trans_VCVTMS(DisasContext *ctx, arg_VCVTMS *a); +typedef arg_2misc arg_VCVTMU; +static bool trans_VCVTMU(DisasContext *ctx, arg_VCVTMU *a); +typedef arg_2misc arg_VRECPE; +static bool trans_VRECPE(DisasContext *ctx, arg_VRECPE *a); +typedef arg_2misc arg_VRSQRTE; +static bool trans_VRSQRTE(DisasContext *ctx, arg_VRSQRTE *a); +typedef arg_2misc arg_VRECPE_F; +static bool trans_VRECPE_F(DisasContext *ctx, arg_VRECPE_F *a); +typedef arg_2misc arg_VRSQRTE_F; +static bool trans_VRSQRTE_F(DisasContext *ctx, arg_VRSQRTE_F *a); +typedef arg_2misc arg_VCVT_FS; +static bool trans_VCVT_FS(DisasContext *ctx, arg_VCVT_FS *a); +typedef arg_2misc arg_VCVT_FU; +static bool trans_VCVT_FU(DisasContext *ctx, arg_VCVT_FU *a); +typedef arg_2misc arg_VCVT_SF; +static bool trans_VCVT_SF(DisasContext *ctx, arg_VCVT_SF *a); +typedef arg_2misc arg_VCVT_UF; +static bool trans_VCVT_UF(DisasContext *ctx, arg_VCVT_UF *a); +typedef arg_3diff arg_VADDL_S_3d; +static bool trans_VADDL_S_3d(DisasContext *ctx, arg_VADDL_S_3d *a); +typedef arg_3diff arg_VADDL_U_3d; +static bool trans_VADDL_U_3d(DisasContext *ctx, arg_VADDL_U_3d *a); +typedef arg_3diff arg_VADDW_S_3d; +static bool trans_VADDW_S_3d(DisasContext *ctx, arg_VADDW_S_3d *a); +typedef arg_3diff arg_VADDW_U_3d; +static bool trans_VADDW_U_3d(DisasContext *ctx, arg_VADDW_U_3d *a); +typedef arg_3diff arg_VSUBL_S_3d; +static bool trans_VSUBL_S_3d(DisasContext *ctx, arg_VSUBL_S_3d *a); +typedef arg_3diff arg_VSUBL_U_3d; +static bool trans_VSUBL_U_3d(DisasContext *ctx, arg_VSUBL_U_3d *a); +typedef arg_3diff arg_VSUBW_S_3d; +static bool trans_VSUBW_S_3d(DisasContext *ctx, arg_VSUBW_S_3d *a); +typedef arg_3diff arg_VSUBW_U_3d; +static bool trans_VSUBW_U_3d(DisasContext *ctx, arg_VSUBW_U_3d *a); +typedef arg_3diff arg_VADDHN_3d; +static bool trans_VADDHN_3d(DisasContext *ctx, arg_VADDHN_3d *a); +typedef arg_3diff arg_VRADDHN_3d; +static bool trans_VRADDHN_3d(DisasContext *ctx, arg_VRADDHN_3d *a); +typedef arg_3diff arg_VABAL_S_3d; +static bool trans_VABAL_S_3d(DisasContext *ctx, arg_VABAL_S_3d *a); +typedef arg_3diff arg_VABAL_U_3d; +static bool trans_VABAL_U_3d(DisasContext *ctx, arg_VABAL_U_3d *a); +typedef arg_3diff arg_VSUBHN_3d; +static bool trans_VSUBHN_3d(DisasContext *ctx, arg_VSUBHN_3d *a); +typedef arg_3diff arg_VRSUBHN_3d; +static bool trans_VRSUBHN_3d(DisasContext *ctx, arg_VRSUBHN_3d *a); +typedef arg_3diff arg_VABDL_S_3d; +static bool trans_VABDL_S_3d(DisasContext *ctx, arg_VABDL_S_3d *a); +typedef arg_3diff arg_VABDL_U_3d; +static bool trans_VABDL_U_3d(DisasContext *ctx, arg_VABDL_U_3d *a); +typedef arg_3diff arg_VMLAL_S_3d; +static bool trans_VMLAL_S_3d(DisasContext *ctx, arg_VMLAL_S_3d *a); +typedef arg_3diff arg_VMLAL_U_3d; +static bool trans_VMLAL_U_3d(DisasContext *ctx, arg_VMLAL_U_3d *a); +typedef arg_3diff arg_VQDMLAL_3d; +static bool trans_VQDMLAL_3d(DisasContext *ctx, arg_VQDMLAL_3d *a); +typedef arg_3diff arg_VMLSL_S_3d; +static bool trans_VMLSL_S_3d(DisasContext *ctx, arg_VMLSL_S_3d *a); +typedef arg_3diff arg_VMLSL_U_3d; +static bool trans_VMLSL_U_3d(DisasContext *ctx, arg_VMLSL_U_3d *a); +typedef arg_3diff arg_VQDMLSL_3d; +static bool trans_VQDMLSL_3d(DisasContext *ctx, arg_VQDMLSL_3d *a); +typedef arg_3diff arg_VMULL_S_3d; +static bool trans_VMULL_S_3d(DisasContext *ctx, arg_VMULL_S_3d *a); +typedef arg_3diff arg_VMULL_U_3d; +static bool trans_VMULL_U_3d(DisasContext *ctx, arg_VMULL_U_3d *a); +typedef arg_3diff arg_VQDMULL_3d; +static bool trans_VQDMULL_3d(DisasContext *ctx, arg_VQDMULL_3d *a); +typedef arg_3diff arg_VMULL_P_3d; +static bool trans_VMULL_P_3d(DisasContext *ctx, arg_VMULL_P_3d *a); +typedef arg_2scalar arg_VMLA_2sc; +static bool trans_VMLA_2sc(DisasContext *ctx, arg_VMLA_2sc *a); +typedef arg_2scalar arg_VMLA_F_2sc; +static bool trans_VMLA_F_2sc(DisasContext *ctx, arg_VMLA_F_2sc *a); +typedef arg_2scalar arg_VMLAL_S_2sc; +static bool trans_VMLAL_S_2sc(DisasContext *ctx, arg_VMLAL_S_2sc *a); +typedef arg_2scalar arg_VMLAL_U_2sc; +static bool trans_VMLAL_U_2sc(DisasContext *ctx, arg_VMLAL_U_2sc *a); +typedef arg_2scalar arg_VQDMLAL_2sc; +static bool trans_VQDMLAL_2sc(DisasContext *ctx, arg_VQDMLAL_2sc *a); +typedef arg_2scalar arg_VMLS_2sc; +static bool trans_VMLS_2sc(DisasContext *ctx, arg_VMLS_2sc *a); +typedef arg_2scalar arg_VMLS_F_2sc; +static bool trans_VMLS_F_2sc(DisasContext *ctx, arg_VMLS_F_2sc *a); +typedef arg_2scalar arg_VMLSL_S_2sc; +static bool trans_VMLSL_S_2sc(DisasContext *ctx, arg_VMLSL_S_2sc *a); +typedef arg_2scalar arg_VMLSL_U_2sc; +static bool trans_VMLSL_U_2sc(DisasContext *ctx, arg_VMLSL_U_2sc *a); +typedef arg_2scalar arg_VQDMLSL_2sc; +static bool trans_VQDMLSL_2sc(DisasContext *ctx, arg_VQDMLSL_2sc *a); +typedef arg_2scalar arg_VMUL_2sc; +static bool trans_VMUL_2sc(DisasContext *ctx, arg_VMUL_2sc *a); +typedef arg_2scalar arg_VMUL_F_2sc; +static bool trans_VMUL_F_2sc(DisasContext *ctx, arg_VMUL_F_2sc *a); +typedef arg_2scalar arg_VMULL_S_2sc; +static bool trans_VMULL_S_2sc(DisasContext *ctx, arg_VMULL_S_2sc *a); +typedef arg_2scalar arg_VMULL_U_2sc; +static bool trans_VMULL_U_2sc(DisasContext *ctx, arg_VMULL_U_2sc *a); +typedef arg_2scalar arg_VQDMULL_2sc; +static bool trans_VQDMULL_2sc(DisasContext *ctx, arg_VQDMULL_2sc *a); +typedef arg_2scalar arg_VQDMULH_2sc; +static bool trans_VQDMULH_2sc(DisasContext *ctx, arg_VQDMULH_2sc *a); +typedef arg_2scalar arg_VQRDMULH_2sc; +static bool trans_VQRDMULH_2sc(DisasContext *ctx, arg_VQRDMULH_2sc *a); +typedef arg_2scalar arg_VQRDMLAH_2sc; +static bool trans_VQRDMLAH_2sc(DisasContext *ctx, arg_VQRDMLAH_2sc *a); +typedef arg_2scalar arg_VQRDMLSH_2sc; +static bool trans_VQRDMLSH_2sc(DisasContext *ctx, arg_VQRDMLSH_2sc *a); + +static void disas_neon_dp_extract_1reg_imm(DisasContext *ctx, arg_1reg_imm *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->imm = deposit32(deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 16, 3)), 7, 25, extract32(insn, 24, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_2misc(DisasContext *ctx, arg_2misc *a, uint32_t insn) +{ + a->size = extract32(insn, 18, 2); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_2misc_q0(DisasContext *ctx, arg_2misc *a, uint32_t insn) +{ + a->size = extract32(insn, 18, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_dp_extract_2misc_q1(DisasContext *ctx, arg_2misc *a, uint32_t insn) +{ + a->size = extract32(insn, 18, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 1; +} + +static void disas_neon_dp_extract_2reg_shl_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 3); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; +} + +static void disas_neon_dp_extract_2reg_shl_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 6); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 3; +} + +static void disas_neon_dp_extract_2reg_shl_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 4); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; +} + +static void disas_neon_dp_extract_2reg_shl_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 5); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; +} + +static void disas_neon_dp_extract_2reg_shll_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 3); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->q = 0; +} + +static void disas_neon_dp_extract_2reg_shll_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 4); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->q = 0; +} + +static void disas_neon_dp_extract_2reg_shll_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 5); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; + a->q = 0; +} + +static void disas_neon_dp_extract_2reg_shr_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->shift = rsub_8(ctx, extract32(insn, 16, 3)); +} + +static void disas_neon_dp_extract_2reg_shr_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 3; + a->shift = rsub_64(ctx, extract32(insn, 16, 6)); +} + +static void disas_neon_dp_extract_2reg_shr_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->shift = rsub_16(ctx, extract32(insn, 16, 4)); +} + +static void disas_neon_dp_extract_2reg_shr_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; + a->shift = rsub_32(ctx, extract32(insn, 16, 5)); +} + +static void disas_neon_dp_extract_2reg_shrn_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 3; + a->q = 0; + a->shift = rsub_32(ctx, extract32(insn, 16, 5)); +} + +static void disas_neon_dp_extract_2reg_shrn_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->q = 0; + a->shift = rsub_8(ctx, extract32(insn, 16, 3)); +} + +static void disas_neon_dp_extract_2reg_shrn_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; + a->q = 0; + a->shift = rsub_16(ctx, extract32(insn, 16, 4)); +} + +static void disas_neon_dp_extract_2reg_vcvt(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->shift = rsub_32(ctx, extract32(insn, 16, 5)); +} + +static void disas_neon_dp_extract_2scalar(DisasContext *ctx, arg_2scalar *a, uint32_t insn) +{ + a->q = extract32(insn, 24, 1); + a->size = extract32(insn, 20, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_2scalar_q0(DisasContext *ctx, arg_2scalar *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_dp_extract_3diff(DisasContext *ctx, arg_3diff *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_3same(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_3same_64_rev(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vn = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 3; +} + +static void disas_neon_dp_extract_3same_crypto(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->q = 1; +} + +static void disas_neon_dp_extract_3same_fp(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 1); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_3same_fp_q0(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_dp_extract_3same_logic(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; +} + +static void disas_neon_dp_extract_3same_q0(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_dp_extract_3same_rev(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->q = extract32(insn, 6, 1); + a->vn = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vm = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_24(DisasContext *ctx, arg_disas_neon_dp3 *a, uint32_t insn) +{ + a->imm = extract32(insn, 8, 4); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_25(DisasContext *ctx, arg_disas_neon_dp4 *a, uint32_t insn) +{ + a->len = extract32(insn, 8, 2); + a->op = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_26(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn) +{ + a->index = extract32(insn, 17, 3); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_27(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn) +{ + a->index = extract32(insn, 18, 2); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_28(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn) +{ + a->index = extract32(insn, 19, 1); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; +} + +static bool disas_neon_dp(DisasContext *ctx, uint32_t insn) +{ + union { + arg_1reg_imm f_1reg_imm; + arg_2misc f_2misc; + arg_2reg_shift f_2reg_shift; + arg_2scalar f_2scalar; + arg_3diff f_3diff; + arg_3same f_3same; + arg_disas_neon_dp3 f_disas_neon_dp3; + arg_disas_neon_dp4 f_disas_neon_dp4; + arg_disas_neon_dp5 f_disas_neon_dp5; + } u; + + switch (insn & 0xfe800010) { + case 0xf2000000: + /* 1111001. 0....... ........ ...0.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 0....... ....0000 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VHADD_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000100: + /* 11110010 0....... ....0001 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VRHADD_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000200: + /* 11110010 0....... ....0010 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VHSUB_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000300: + /* 11110010 0....... ....0011 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCGT_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000400: + /* 11110010 0....... ....0100 ...0.... */ + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VSHL_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000500: + /* 11110010 0....... ....0101 ...0.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110010 0.11.... ....0101 ...0.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VRSHL_S64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VRSHL_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000600: + /* 11110010 0....... ....0110 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMAX_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000700: + /* 11110010 0....... ....0111 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VABD_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000800: + /* 11110010 0....... ....1000 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VADD_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000900: + /* 11110010 0....... ....1001 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMLA_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000a00: + /* 11110010 0....... ....1010 ...0.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 0....... ....1010 .0.0.... */ + if (trans_VPMAX_S_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000b00: + /* 11110010 0....... ....1011 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQDMULH_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000c00: + /* 11110010 0....... ....1100 ...0.... */ + disas_neon_dp_extract_3same_crypto(ctx, &u.f_3same, insn); + switch (insn & 0x00300040) { + case 0x00000040: + /* 11110010 0.00.... ....1100 .1.0.... */ + if (trans_SHA1C_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00100040: + /* 11110010 0.01.... ....1100 .1.0.... */ + if (trans_SHA1P_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00200040: + /* 11110010 0.10.... ....1100 .1.0.... */ + if (trans_SHA1M_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00300040: + /* 11110010 0.11.... ....1100 .1.0.... */ + if (trans_SHA1SU0_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000d00: + /* 11110010 0....... ....1101 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1101 ...0.... */ + if (trans_VADD_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1101 ...0.... */ + if (trans_VSUB_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000e00: + /* 11110010 0....... ....1110 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1110 ...0.... */ + if (trans_VCEQ_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000f00: + /* 11110010 0....... ....1111 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1111 ...0.... */ + if (trans_VMAX_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1111 ...0.... */ + if (trans_VMIN_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000000: + /* 11110011 0....... ....0000 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VHADD_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000100: + /* 11110011 0....... ....0001 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VRHADD_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000200: + /* 11110011 0....... ....0010 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VHSUB_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000300: + /* 11110011 0....... ....0011 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCGT_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000400: + /* 11110011 0....... ....0100 ...0.... */ + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VSHL_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000500: + /* 11110011 0....... ....0101 ...0.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110011 0.11.... ....0101 ...0.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VRSHL_U64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VRSHL_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000600: + /* 11110011 0....... ....0110 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMAX_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000700: + /* 11110011 0....... ....0111 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VABD_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000800: + /* 11110011 0....... ....1000 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VSUB_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000900: + /* 11110011 0....... ....1001 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMLS_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000a00: + /* 11110011 0....... ....1010 ...0.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 0....... ....1010 .0.0.... */ + if (trans_VPMAX_U_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000b00: + /* 11110011 0....... ....1011 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQRDMULH_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000c00: + /* 11110011 0....... ....1100 ...0.... */ + disas_neon_dp_extract_3same_crypto(ctx, &u.f_3same, insn); + switch (insn & 0x00300040) { + case 0x00000040: + /* 11110011 0.00.... ....1100 .1.0.... */ + if (trans_SHA256H_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00100040: + /* 11110011 0.01.... ....1100 .1.0.... */ + if (trans_SHA256H2_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00200040: + /* 11110011 0.10.... ....1100 .1.0.... */ + if (trans_SHA256SU1_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000d00: + /* 11110011 0....... ....1101 ...0.... */ + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1101 ...0.... */ + disas_neon_dp_extract_3same_fp_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1101 .0.0.... */ + if (trans_VPADD_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x1: + /* 11110011 0.1..... ....1101 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + if (trans_VABD_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000e00: + /* 11110011 0....... ....1110 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1110 ...0.... */ + if (trans_VCGE_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110011 0.1..... ....1110 ...0.... */ + if (trans_VCGT_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000f00: + /* 11110011 0....... ....1111 ...0.... */ + disas_neon_dp_extract_3same_fp_q0(ctx, &u.f_3same, insn); + switch (insn & 0x00200040) { + case 0x00000000: + /* 11110011 0.0..... ....1111 .0.0.... */ + if (trans_VPMAX_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00200000: + /* 11110011 0.1..... ....1111 .0.0.... */ + if (trans_VPMIN_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + } + return false; + case 0xf2000010: + /* 1111001. 0....... ........ ...1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 0....... ....0000 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQADD_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000100: + /* 11110010 0....... ....0001 ...1.... */ + disas_neon_dp_extract_3same_logic(ctx, &u.f_3same, insn); + switch ((insn >> 20) & 0x3) { + case 0x0: + /* 11110010 0.00.... ....0001 ...1.... */ + if (trans_VAND_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.01.... ....0001 ...1.... */ + if (trans_VBIC_3s(ctx, &u.f_3same)) return true; + return false; + case 0x2: + /* 11110010 0.10.... ....0001 ...1.... */ + if (trans_VORR_3s(ctx, &u.f_3same)) return true; + return false; + case 0x3: + /* 11110010 0.11.... ....0001 ...1.... */ + if (trans_VORN_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000200: + /* 11110010 0....... ....0010 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQSUB_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000300: + /* 11110010 0....... ....0011 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCGE_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000400: + /* 11110010 0....... ....0100 ...1.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110010 0.11.... ....0100 ...1.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VQSHL_S64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VQSHL_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000500: + /* 11110010 0....... ....0101 ...1.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110010 0.11.... ....0101 ...1.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VQRSHL_S64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VQRSHL_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000600: + /* 11110010 0....... ....0110 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMIN_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000700: + /* 11110010 0....... ....0111 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VABA_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000800: + /* 11110010 0....... ....1000 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VTST_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000900: + /* 11110010 0....... ....1001 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMUL_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000a00: + /* 11110010 0....... ....1010 ...1.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 0....... ....1010 .0.1.... */ + if (trans_VPMIN_S_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000b00: + /* 11110010 0....... ....1011 ...1.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 0....... ....1011 .0.1.... */ + if (trans_VPADD_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000c00: + /* 11110010 0....... ....1100 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1100 ...1.... */ + if (trans_VFMA_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1100 ...1.... */ + if (trans_VFMS_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000d00: + /* 11110010 0....... ....1101 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1101 ...1.... */ + if (trans_VMLA_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1101 ...1.... */ + if (trans_VMLS_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000f00: + /* 11110010 0....... ....1111 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1111 ...1.... */ + if (trans_VRECPS_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1111 ...1.... */ + if (trans_VRSQRTS_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000000: + /* 11110011 0....... ....0000 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQADD_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000100: + /* 11110011 0....... ....0001 ...1.... */ + disas_neon_dp_extract_3same_logic(ctx, &u.f_3same, insn); + switch ((insn >> 20) & 0x3) { + case 0x0: + /* 11110011 0.00.... ....0001 ...1.... */ + if (trans_VEOR_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110011 0.01.... ....0001 ...1.... */ + if (trans_VBSL_3s(ctx, &u.f_3same)) return true; + return false; + case 0x2: + /* 11110011 0.10.... ....0001 ...1.... */ + if (trans_VBIT_3s(ctx, &u.f_3same)) return true; + return false; + case 0x3: + /* 11110011 0.11.... ....0001 ...1.... */ + if (trans_VBIF_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000200: + /* 11110011 0....... ....0010 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQSUB_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000300: + /* 11110011 0....... ....0011 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCGE_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000400: + /* 11110011 0....... ....0100 ...1.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110011 0.11.... ....0100 ...1.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VQSHL_U64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VQSHL_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000500: + /* 11110011 0....... ....0101 ...1.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110011 0.11.... ....0101 ...1.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VQRSHL_U64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VQRSHL_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000600: + /* 11110011 0....... ....0110 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMIN_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000700: + /* 11110011 0....... ....0111 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VABA_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000800: + /* 11110011 0....... ....1000 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCEQ_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000900: + /* 11110011 0....... ....1001 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMUL_p_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000a00: + /* 11110011 0....... ....1010 ...1.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 0....... ....1010 .0.1.... */ + if (trans_VPMIN_U_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000b00: + /* 11110011 0....... ....1011 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQRDMLAH_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000c00: + /* 11110011 0....... ....1100 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQRDMLSH_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000d00: + /* 11110011 0....... ....1101 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1101 ...1.... */ + if (trans_VMUL_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000e00: + /* 11110011 0....... ....1110 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1110 ...1.... */ + if (trans_VACGE_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110011 0.1..... ....1110 ...1.... */ + if (trans_VACGT_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000f00: + /* 11110011 0....... ....1111 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1111 ...1.... */ + if (trans_VMAXNM_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110011 0.1..... ....1111 ...1.... */ + if (trans_VMINNM_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + } + return false; + case 0xf2800000: + /* 1111001. 1....... ........ ...0.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 1111001. 1.11.... ........ ...0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1.11.... ........ ...0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_24(ctx, &u.f_disas_neon_dp3, insn); + if (trans_VEXT(ctx, &u.f_disas_neon_dp3)) return true; + return false; + case 0x1: + /* 11110011 1.11.... ........ ...0.... */ + switch ((insn >> 10) & 0x3) { + case 0x0: + /* 11110011 1.11.... ....00.. ...0.... */ + switch (insn & 0x00030380) { + case 0x00000000: + /* 11110011 1.11..00 ....0000 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VREV64(ctx, &u.f_2misc)) return true; + return false; + case 0x00000080: + /* 11110011 1.11..00 ....0000 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VREV32(ctx, &u.f_2misc)) return true; + return false; + case 0x00000100: + /* 11110011 1.11..00 ....0001 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VREV16(ctx, &u.f_2misc)) return true; + return false; + case 0x00000200: + /* 11110011 1.11..00 ....0010 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VPADDL_S(ctx, &u.f_2misc)) return true; + return false; + case 0x00000280: + /* 11110011 1.11..00 ....0010 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VPADDL_U(ctx, &u.f_2misc)) return true; + return false; + case 0x00000300: + /* 11110011 1.11..00 ....0011 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..00 ....0011 00.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_AESE(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..00 ....0011 01.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_AESD(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00000380: + /* 11110011 1.11..00 ....0011 1..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..00 ....0011 10.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_AESMC(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..00 ....0011 11.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_AESIMC(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00010000: + /* 11110011 1.11..01 ....0000 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCGT0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010080: + /* 11110011 1.11..01 ....0000 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCGE0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010100: + /* 11110011 1.11..01 ....0001 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCEQ0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010180: + /* 11110011 1.11..01 ....0001 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLE0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010200: + /* 11110011 1.11..01 ....0010 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLT0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010280: + /* 11110011 1.11..01 ....0010 1..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x1: + /* 11110011 1.11..01 ....0010 11.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_SHA1H(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00010300: + /* 11110011 1.11..01 ....0011 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VABS(ctx, &u.f_2misc)) return true; + return false; + case 0x00010380: + /* 11110011 1.11..01 ....0011 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VNEG(ctx, &u.f_2misc)) return true; + return false; + case 0x00020000: + /* 11110011 1.11..10 ....0000 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VSWP(ctx, &u.f_2misc)) return true; + return false; + case 0x00020080: + /* 11110011 1.11..10 ....0000 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VTRN(ctx, &u.f_2misc)) return true; + return false; + case 0x00020100: + /* 11110011 1.11..10 ....0001 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VUZP(ctx, &u.f_2misc)) return true; + return false; + case 0x00020180: + /* 11110011 1.11..10 ....0001 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VZIP(ctx, &u.f_2misc)) return true; + return false; + case 0x00020200: + /* 11110011 1.11..10 ....0010 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0010 00.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VMOVN(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..10 ....0010 01.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VQMOVUN(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020280: + /* 11110011 1.11..10 ....0010 1..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0010 10.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VQMOVN_S(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..10 ....0010 11.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VQMOVN_U(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020300: + /* 11110011 1.11..10 ....0011 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0011 00.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VSHLL(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020380: + /* 11110011 1.11..10 ....0011 1..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0011 10.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_SHA1SU1(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..10 ....0011 11.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_SHA256SU0(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00030000: + /* 11110011 1.11..11 ....0000 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTAS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030080: + /* 11110011 1.11..11 ....0000 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTAU(ctx, &u.f_2misc)) return true; + return false; + case 0x00030100: + /* 11110011 1.11..11 ....0001 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTNS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030180: + /* 11110011 1.11..11 ....0001 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTNU(ctx, &u.f_2misc)) return true; + return false; + case 0x00030200: + /* 11110011 1.11..11 ....0010 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTPS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030280: + /* 11110011 1.11..11 ....0010 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTPU(ctx, &u.f_2misc)) return true; + return false; + case 0x00030300: + /* 11110011 1.11..11 ....0011 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTMS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030380: + /* 11110011 1.11..11 ....0011 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTMU(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x1: + /* 11110011 1.11.... ....01.. ...0.... */ + switch (insn & 0x00030380) { + case 0x00000000: + /* 11110011 1.11..00 ....0100 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLS(ctx, &u.f_2misc)) return true; + return false; + case 0x00000080: + /* 11110011 1.11..00 ....0100 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLZ(ctx, &u.f_2misc)) return true; + return false; + case 0x00000100: + /* 11110011 1.11..00 ....0101 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCNT(ctx, &u.f_2misc)) return true; + return false; + case 0x00000180: + /* 11110011 1.11..00 ....0101 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VMVN(ctx, &u.f_2misc)) return true; + return false; + case 0x00000200: + /* 11110011 1.11..00 ....0110 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VPADAL_S(ctx, &u.f_2misc)) return true; + return false; + case 0x00000280: + /* 11110011 1.11..00 ....0110 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VPADAL_U(ctx, &u.f_2misc)) return true; + return false; + case 0x00000300: + /* 11110011 1.11..00 ....0111 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VQABS(ctx, &u.f_2misc)) return true; + return false; + case 0x00000380: + /* 11110011 1.11..00 ....0111 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VQNEG(ctx, &u.f_2misc)) return true; + return false; + case 0x00010000: + /* 11110011 1.11..01 ....0100 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCGT0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010080: + /* 11110011 1.11..01 ....0100 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCGE0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010100: + /* 11110011 1.11..01 ....0101 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCEQ0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010180: + /* 11110011 1.11..01 ....0101 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLE0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010200: + /* 11110011 1.11..01 ....0110 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLT0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010300: + /* 11110011 1.11..01 ....0111 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VABS_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010380: + /* 11110011 1.11..01 ....0111 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VNEG_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00020000: + /* 11110011 1.11..10 ....0100 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTN(ctx, &u.f_2misc)) return true; + return false; + case 0x00020080: + /* 11110011 1.11..10 ....0100 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTX(ctx, &u.f_2misc)) return true; + return false; + case 0x00020100: + /* 11110011 1.11..10 ....0101 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTA(ctx, &u.f_2misc)) return true; + return false; + case 0x00020180: + /* 11110011 1.11..10 ....0101 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTZ(ctx, &u.f_2misc)) return true; + return false; + case 0x00020200: + /* 11110011 1.11..10 ....0110 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0110 00.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VCVT_F16_F32(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020280: + /* 11110011 1.11..10 ....0110 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTM(ctx, &u.f_2misc)) return true; + return false; + case 0x00020300: + /* 11110011 1.11..10 ....0111 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0111 00.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VCVT_F32_F16(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020380: + /* 11110011 1.11..10 ....0111 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTP(ctx, &u.f_2misc)) return true; + return false; + case 0x00030000: + /* 11110011 1.11..11 ....0100 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRECPE(ctx, &u.f_2misc)) return true; + return false; + case 0x00030080: + /* 11110011 1.11..11 ....0100 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRSQRTE(ctx, &u.f_2misc)) return true; + return false; + case 0x00030100: + /* 11110011 1.11..11 ....0101 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRECPE_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00030180: + /* 11110011 1.11..11 ....0101 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRSQRTE_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00030200: + /* 11110011 1.11..11 ....0110 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVT_FS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030280: + /* 11110011 1.11..11 ....0110 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVT_FU(ctx, &u.f_2misc)) return true; + return false; + case 0x00030300: + /* 11110011 1.11..11 ....0111 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVT_SF(ctx, &u.f_2misc)) return true; + return false; + case 0x00030380: + /* 11110011 1.11..11 ....0111 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVT_UF(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x2: + /* 11110011 1.11.... ....10.. ...0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_25(ctx, &u.f_disas_neon_dp4, insn); + if (trans_VTBL(ctx, &u.f_disas_neon_dp4)) return true; + return false; + case 0x3: + /* 11110011 1.11.... ....11.. ...0.... */ + switch (insn & 0x00010380) { + case 0x00000000: + /* 11110011 1.11...0 ....1100 0..0.... */ + switch ((insn >> 17) & 0x1) { + case 0x0: + /* 11110011 1.11..00 ....1100 0..0.... */ + switch ((insn >> 18) & 0x1) { + case 0x1: + /* 11110011 1.11.100 ....1100 0..0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_28(ctx, &u.f_disas_neon_dp5, insn); + if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true; + return false; + } + return false; + case 0x1: + /* 11110011 1.11..10 ....1100 0..0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_27(ctx, &u.f_disas_neon_dp5, insn); + if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true; + return false; + } + return false; + case 0x00010000: + /* 11110011 1.11...1 ....1100 0..0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_26(ctx, &u.f_disas_neon_dp5, insn); + if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true; + return false; + } + return false; + } + return false; + } + } + switch (insn & 0x00000f40) { + case 0x00000000: + /* 1111001. 1....... ....0000 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0000 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0000 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000040: + /* 1111001. 1....... ....0000 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMLA_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000100: + /* 1111001. 1....... ....0001 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0001 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDW_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0001 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDW_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000140: + /* 1111001. 1....... ....0001 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMLA_F_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000200: + /* 1111001. 1....... ....0010 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0010 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0010 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000240: + /* 1111001. 1....... ....0010 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0010 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMLAL_S_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0010 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMLAL_U_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000300: + /* 1111001. 1....... ....0011 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0011 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBW_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0011 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBW_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000340: + /* 1111001. 1....... ....0011 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0011 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VQDMLAL_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000400: + /* 1111001. 1....... ....0100 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0100 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDHN_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0100 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VRADDHN_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000440: + /* 1111001. 1....... ....0100 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMLS_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000500: + /* 1111001. 1....... ....0101 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0101 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VABAL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0101 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VABAL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000540: + /* 1111001. 1....... ....0101 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMLS_F_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000600: + /* 1111001. 1....... ....0110 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0110 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBHN_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0110 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VRSUBHN_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000640: + /* 1111001. 1....... ....0110 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0110 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMLSL_S_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0110 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMLSL_U_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000700: + /* 1111001. 1....... ....0111 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0111 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VABDL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0111 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VABDL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000740: + /* 1111001. 1....... ....0111 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0111 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VQDMLSL_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000800: + /* 1111001. 1....... ....1000 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1000 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMLAL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....1000 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMLAL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000840: + /* 1111001. 1....... ....1000 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMUL_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000900: + /* 1111001. 1....... ....1001 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1001 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VQDMLAL_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000940: + /* 1111001. 1....... ....1001 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMUL_F_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000a00: + /* 1111001. 1....... ....1010 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1010 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMLSL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....1010 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMLSL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000a40: + /* 1111001. 1....... ....1010 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1010 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMULL_S_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x1: + /* 11110011 1....... ....1010 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMULL_U_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000b00: + /* 1111001. 1....... ....1011 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1011 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VQDMLSL_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000b40: + /* 1111001. 1....... ....1011 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1011 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VQDMULL_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000c00: + /* 1111001. 1....... ....1100 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1100 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMULL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....1100 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMULL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000c40: + /* 1111001. 1....... ....1100 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VQDMULH_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000d00: + /* 1111001. 1....... ....1101 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1101 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VQDMULL_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000d40: + /* 1111001. 1....... ....1101 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VQRDMULH_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000e00: + /* 1111001. 1....... ....1110 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1110 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMULL_P_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000e40: + /* 1111001. 1....... ....1110 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VQRDMLAH_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000f40: + /* 1111001. 1....... ....1111 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VQRDMLSH_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0xf2800010: + /* 1111001. 1....... ........ ...1.... */ + switch ((insn >> 7) & 0x1) { + case 0x0: + /* 1111001. 1....... ........ 0..1.... */ + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 1111001. 1.0..... ........ 0..1.... */ + switch ((insn >> 20) & 0x1) { + case 0x0: + /* 1111001. 1.00.... ........ 0..1.... */ + switch ((insn >> 19) & 0x1) { + case 0x0: + /* 1111001. 1.000... ........ 0..1.... */ + disas_neon_dp_extract_1reg_imm(ctx, &u.f_1reg_imm, insn); + u.f_1reg_imm.cmode = extract32(insn, 8, 4); + u.f_1reg_imm.op = extract32(insn, 5, 1); + if (trans_Vimm_1r(ctx, &u.f_1reg_imm)) return true; + return false; + case 0x1: + /* 1111001. 1.001... ........ 0..1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 1.001... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000100: + /* 11110010 1.001... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000200: + /* 11110010 1.001... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000300: + /* 11110010 1.001... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000500: + /* 11110010 1.001... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000700: + /* 11110010 1.001... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000800: + /* 11110010 1.001... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.001... ....1000 00.1.... */ + if (trans_VSHRN_16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.001... ....1000 01.1.... */ + if (trans_VRSHRN_16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000900: + /* 11110010 1.001... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.001... ....1001 00.1.... */ + if (trans_VQSHRN_S16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.001... ....1001 01.1.... */ + if (trans_VQRSHRN_S16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000a00: + /* 11110010 1.001... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_b(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.001... ....1010 00.1.... */ + if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000000: + /* 11110011 1.001... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000100: + /* 11110011 1.001... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000200: + /* 11110011 1.001... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000300: + /* 11110011 1.001... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000400: + /* 11110011 1.001... ....0100 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000500: + /* 11110011 1.001... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000600: + /* 11110011 1.001... ....0110 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000700: + /* 11110011 1.001... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000800: + /* 11110011 1.001... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.001... ....1000 00.1.... */ + if (trans_VQSHRUN_16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.001... ....1000 01.1.... */ + if (trans_VQRSHRUN_16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000900: + /* 11110011 1.001... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.001... ....1001 00.1.... */ + if (trans_VQSHRN_U16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.001... ....1001 01.1.... */ + if (trans_VQRSHRN_U16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000a00: + /* 11110011 1.001... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_b(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.001... ....1010 00.1.... */ + if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + } + return false; + } + return false; + case 0x1: + /* 1111001. 1.01.... ........ 0..1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 1.01.... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000100: + /* 11110010 1.01.... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000200: + /* 11110010 1.01.... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000300: + /* 11110010 1.01.... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000500: + /* 11110010 1.01.... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000700: + /* 11110010 1.01.... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000800: + /* 11110010 1.01.... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.01.... ....1000 00.1.... */ + if (trans_VSHRN_32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.01.... ....1000 01.1.... */ + if (trans_VRSHRN_32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000900: + /* 11110010 1.01.... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.01.... ....1001 00.1.... */ + if (trans_VQSHRN_S32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.01.... ....1001 01.1.... */ + if (trans_VQRSHRN_S32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000a00: + /* 11110010 1.01.... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.01.... ....1010 00.1.... */ + if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000000: + /* 11110011 1.01.... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000100: + /* 11110011 1.01.... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000200: + /* 11110011 1.01.... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000300: + /* 11110011 1.01.... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000400: + /* 11110011 1.01.... ....0100 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000500: + /* 11110011 1.01.... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000600: + /* 11110011 1.01.... ....0110 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000700: + /* 11110011 1.01.... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000800: + /* 11110011 1.01.... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.01.... ....1000 00.1.... */ + if (trans_VQSHRUN_32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.01.... ....1000 01.1.... */ + if (trans_VQRSHRUN_32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000900: + /* 11110011 1.01.... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.01.... ....1001 00.1.... */ + if (trans_VQSHRN_U32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.01.... ....1001 01.1.... */ + if (trans_VQRSHRN_U32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000a00: + /* 11110011 1.01.... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.01.... ....1010 00.1.... */ + if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + } + return false; + } + return false; + case 0x1: + /* 1111001. 1.1..... ........ 0..1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 1.1..... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000100: + /* 11110010 1.1..... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000200: + /* 11110010 1.1..... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000300: + /* 11110010 1.1..... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000500: + /* 11110010 1.1..... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000700: + /* 11110010 1.1..... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000800: + /* 11110010 1.1..... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.1..... ....1000 00.1.... */ + if (trans_VSHRN_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.1..... ....1000 01.1.... */ + if (trans_VRSHRN_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000900: + /* 11110010 1.1..... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.1..... ....1001 00.1.... */ + if (trans_VQSHRN_S64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.1..... ....1001 01.1.... */ + if (trans_VQRSHRN_S64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000a00: + /* 11110010 1.1..... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.1..... ....1010 00.1.... */ + if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000e00: + /* 11110010 1.1..... ....1110 0..1.... */ + disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn); + if (trans_VCVT_SF_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000f00: + /* 11110010 1.1..... ....1111 0..1.... */ + disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn); + if (trans_VCVT_FS_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000000: + /* 11110011 1.1..... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000100: + /* 11110011 1.1..... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000200: + /* 11110011 1.1..... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000300: + /* 11110011 1.1..... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000400: + /* 11110011 1.1..... ....0100 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000500: + /* 11110011 1.1..... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000600: + /* 11110011 1.1..... ....0110 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000700: + /* 11110011 1.1..... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000800: + /* 11110011 1.1..... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.1..... ....1000 00.1.... */ + if (trans_VQSHRUN_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.1..... ....1000 01.1.... */ + if (trans_VQRSHRUN_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000900: + /* 11110011 1.1..... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.1..... ....1001 00.1.... */ + if (trans_VQSHRN_U64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.1..... ....1001 01.1.... */ + if (trans_VQRSHRN_U64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000a00: + /* 11110011 1.1..... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.1..... ....1010 00.1.... */ + if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000e00: + /* 11110011 1.1..... ....1110 0..1.... */ + disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn); + if (trans_VCVT_UF_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000f00: + /* 11110011 1.1..... ....1111 0..1.... */ + disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn); + if (trans_VCVT_FU_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + } + return false; + case 0x1: + /* 1111001. 1....... ........ 1..1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 1....... ....0000 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000100: + /* 11110010 1....... ....0001 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000200: + /* 11110010 1....... ....0010 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000300: + /* 11110010 1....... ....0011 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000500: + /* 11110010 1....... ....0101 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000700: + /* 11110010 1....... ....0111 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_S_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000000: + /* 11110011 1....... ....0000 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000100: + /* 11110011 1....... ....0001 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000200: + /* 11110011 1....... ....0010 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000300: + /* 11110011 1....... ....0011 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000400: + /* 11110011 1....... ....0100 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000500: + /* 11110011 1....... ....0101 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000600: + /* 11110011 1....... ....0110 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHLU_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000700: + /* 11110011 1....... ....0111 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_U_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + } + return false; + } + return false; +} diff --git a/qemu/target/arm/decode-neon-ls.inc.c b/qemu/target/arm/decode-neon-ls.inc.c new file mode 100644 index 0000000000..aaf998abb2 --- /dev/null +++ b/qemu/target/arm/decode-neon-ls.inc.c @@ -0,0 +1,149 @@ +/* This file is autogenerated by scripts/decodetree.py. */ + +typedef struct { + int align; + int itype; + int l; + int rm; + int rn; + int size; + int vd; +} arg_disas_neon_ls0; + +typedef struct { + int a; + int n; + int rm; + int rn; + int size; + int t; + int vd; +} arg_disas_neon_ls1; + +typedef struct { + int align; + int l; + int n; + int reg_idx; + int rm; + int rn; + int size; + int stride; + int vd; +} arg_disas_neon_ls2; + +typedef arg_disas_neon_ls0 arg_VLDST_multiple; +static bool trans_VLDST_multiple(DisasContext *ctx, arg_VLDST_multiple *a); +typedef arg_disas_neon_ls1 arg_VLD_all_lanes; +static bool trans_VLD_all_lanes(DisasContext *ctx, arg_VLD_all_lanes *a); +typedef arg_disas_neon_ls2 arg_VLDST_single; +static bool trans_VLDST_single(DisasContext *ctx, arg_VLDST_single *a); + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_0(DisasContext *ctx, arg_disas_neon_ls0 *a, uint32_t insn) +{ + a->l = extract32(insn, 21, 1); + a->rn = extract32(insn, 16, 4); + a->itype = extract32(insn, 8, 4); + a->size = extract32(insn, 6, 2); + a->align = extract32(insn, 4, 2); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_1(DisasContext *ctx, arg_disas_neon_ls1 *a, uint32_t insn) +{ + a->rn = extract32(insn, 16, 4); + a->n = extract32(insn, 8, 2); + a->size = extract32(insn, 6, 2); + a->t = extract32(insn, 5, 1); + a->a = extract32(insn, 4, 1); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_2(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn) +{ + a->l = extract32(insn, 21, 1); + a->rn = extract32(insn, 16, 4); + a->n = extract32(insn, 8, 2); + a->reg_idx = extract32(insn, 5, 3); + a->align = extract32(insn, 4, 1); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->stride = 1; +} + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_3(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn) +{ + a->l = extract32(insn, 21, 1); + a->rn = extract32(insn, 16, 4); + a->n = extract32(insn, 8, 2); + a->reg_idx = extract32(insn, 6, 2); + a->align = extract32(insn, 4, 2); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->stride = plus1(ctx, extract32(insn, 5, 1)); +} + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_4(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn) +{ + a->l = extract32(insn, 21, 1); + a->rn = extract32(insn, 16, 4); + a->n = extract32(insn, 8, 2); + a->reg_idx = extract32(insn, 7, 1); + a->align = extract32(insn, 4, 3); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; + a->stride = plus1(ctx, extract32(insn, 6, 1)); +} + +static bool disas_neon_ls(DisasContext *ctx, uint32_t insn) +{ + union { + arg_disas_neon_ls0 f_disas_neon_ls0; + arg_disas_neon_ls1 f_disas_neon_ls1; + arg_disas_neon_ls2 f_disas_neon_ls2; + } u; + + switch (insn & 0xff900000) { + case 0xf4000000: + /* 11110100 0..0.... ........ ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_0(ctx, &u.f_disas_neon_ls0, insn); + if (trans_VLDST_multiple(ctx, &u.f_disas_neon_ls0)) return true; + return false; + case 0xf4800000: + /* 11110100 1..0.... ........ ........ */ + switch ((insn >> 10) & 0x3) { + case 0x0: + /* 11110100 1..0.... ....00.. ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_2(ctx, &u.f_disas_neon_ls2, insn); + if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true; + return false; + case 0x1: + /* 11110100 1..0.... ....01.. ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_3(ctx, &u.f_disas_neon_ls2, insn); + if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true; + return false; + case 0x2: + /* 11110100 1..0.... ....10.. ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_4(ctx, &u.f_disas_neon_ls2, insn); + if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true; + return false; + case 0x3: + /* 11110100 1..0.... ....11.. ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_1(ctx, &u.f_disas_neon_ls1, insn); + switch ((insn >> 21) & 0x1) { + case 0x1: + /* 11110100 1.10.... ....11.. ........ */ + if (trans_VLD_all_lanes(ctx, &u.f_disas_neon_ls1)) return true; + return false; + } + return false; + } + return false; + } + return false; +} diff --git a/qemu/target/arm/decode-neon-shared.inc.c b/qemu/target/arm/decode-neon-shared.inc.c new file mode 100644 index 0000000000..58913b4365 --- /dev/null +++ b/qemu/target/arm/decode-neon-shared.inc.c @@ -0,0 +1,271 @@ +/* This file is autogenerated by scripts/decodetree.py. */ + +typedef struct { + int q; + int rot; + int size; + int vd; + int vm; + int vn; +} arg_disas_neon_shared0; + +typedef struct { + int q; + int u; + int vd; + int vm; + int vn; +} arg_disas_neon_shared1; + +typedef struct { + int q; + int s; + int vd; + int vm; + int vn; +} arg_disas_neon_shared2; + +typedef struct { + int index; + int q; + int rot; + int size; + int vd; + int vm; + int vn; +} arg_disas_neon_shared3; + +typedef struct { + int index; + int q; + int rm; + int u; + int vd; + int vm; + int vn; +} arg_disas_neon_shared4; + +typedef struct { + int index; + int q; + int rm; + int s; + int vd; + int vn; +} arg_disas_neon_shared5; + +typedef arg_disas_neon_shared0 arg_VCMLA; +static bool trans_VCMLA(DisasContext *ctx, arg_VCMLA *a); +typedef arg_disas_neon_shared0 arg_VCADD; +static bool trans_VCADD(DisasContext *ctx, arg_VCADD *a); +typedef arg_disas_neon_shared1 arg_VDOT; +static bool trans_VDOT(DisasContext *ctx, arg_VDOT *a); +typedef arg_disas_neon_shared2 arg_VFML; +static bool trans_VFML(DisasContext *ctx, arg_VFML *a); +typedef arg_disas_neon_shared3 arg_VCMLA_scalar; +static bool trans_VCMLA_scalar(DisasContext *ctx, arg_VCMLA_scalar *a); +typedef arg_disas_neon_shared4 arg_VDOT_scalar; +static bool trans_VDOT_scalar(DisasContext *ctx, arg_VDOT_scalar *a); +typedef arg_disas_neon_shared5 arg_VFML_scalar; +static bool trans_VFML_scalar(DisasContext *ctx, arg_VFML_scalar *a); + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_0(DisasContext *ctx, arg_disas_neon_shared0 *a, uint32_t insn) +{ + a->rot = extract32(insn, 23, 2); + a->size = extract32(insn, 20, 1); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_1(DisasContext *ctx, arg_disas_neon_shared0 *a, uint32_t insn) +{ + a->rot = extract32(insn, 24, 1); + a->size = extract32(insn, 20, 1); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_2(DisasContext *ctx, arg_disas_neon_shared1 *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->u = extract32(insn, 4, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_3(DisasContext *ctx, arg_disas_neon_shared2 *a, uint32_t insn) +{ + a->s = extract32(insn, 23, 1); + a->vm = deposit32(extract32(insn, 5, 1), 1, 31, extract32(insn, 0, 4)); + a->vn = deposit32(extract32(insn, 7, 1), 1, 31, extract32(insn, 16, 4)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_4(DisasContext *ctx, arg_disas_neon_shared2 *a, uint32_t insn) +{ + a->s = extract32(insn, 23, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 1; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_5(DisasContext *ctx, arg_disas_neon_shared3 *a, uint32_t insn) +{ + a->rot = extract32(insn, 20, 2); + a->q = extract32(insn, 6, 1); + a->index = extract32(insn, 5, 1); + a->vm = extract32(insn, 0, 4); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_6(DisasContext *ctx, arg_disas_neon_shared3 *a, uint32_t insn) +{ + a->rot = extract32(insn, 20, 2); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->index = 0; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_7(DisasContext *ctx, arg_disas_neon_shared4 *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->index = extract32(insn, 5, 1); + a->u = extract32(insn, 4, 1); + a->rm = extract32(insn, 0, 4); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_8(DisasContext *ctx, arg_disas_neon_shared5 *a, uint32_t insn) +{ + a->s = extract32(insn, 20, 1); + a->index = extract32(insn, 3, 1); + a->rm = deposit32(extract32(insn, 5, 1), 1, 31, extract32(insn, 0, 3)); + a->vn = deposit32(extract32(insn, 7, 1), 1, 31, extract32(insn, 16, 4)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_9(DisasContext *ctx, arg_disas_neon_shared5 *a, uint32_t insn) +{ + a->s = extract32(insn, 20, 1); + a->rm = extract32(insn, 0, 3); + a->index = deposit32(extract32(insn, 3, 1), 1, 31, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 1; +} + +static bool disas_neon_shared(DisasContext *ctx, uint32_t insn) +{ + union { + arg_disas_neon_shared0 f_disas_neon_shared0; + arg_disas_neon_shared1 f_disas_neon_shared1; + arg_disas_neon_shared2 f_disas_neon_shared2; + arg_disas_neon_shared3 f_disas_neon_shared3; + arg_disas_neon_shared4 f_disas_neon_shared4; + arg_disas_neon_shared5 f_disas_neon_shared5; + } u; + + switch (insn & 0xfe000f00) { + case 0xfc000800: + /* 1111110. ........ ....1000 ........ */ + switch (insn & 0x00200010) { + case 0x00000000: + /* 1111110. ..0..... ....1000 ...0.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_1(ctx, &u.f_disas_neon_shared0, insn); + switch ((insn >> 23) & 0x1) { + case 0x1: + /* 1111110. 1.0..... ....1000 ...0.... */ + if (trans_VCADD(ctx, &u.f_disas_neon_shared0)) return true; + return false; + } + return false; + case 0x00200000: + /* 1111110. ..1..... ....1000 ...0.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_0(ctx, &u.f_disas_neon_shared0, insn); + if (trans_VCMLA(ctx, &u.f_disas_neon_shared0)) return true; + return false; + case 0x00200010: + /* 1111110. ..1..... ....1000 ...1.... */ + switch (insn & 0x01100040) { + case 0x00000000: + /* 11111100 ..10.... ....1000 .0.1.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_3(ctx, &u.f_disas_neon_shared2, insn); + if (trans_VFML(ctx, &u.f_disas_neon_shared2)) return true; + return false; + case 0x00000040: + /* 11111100 ..10.... ....1000 .1.1.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_4(ctx, &u.f_disas_neon_shared2, insn); + if (trans_VFML(ctx, &u.f_disas_neon_shared2)) return true; + return false; + } + return false; + } + return false; + case 0xfc000d00: + /* 1111110. ........ ....1101 ........ */ + disas_neon_shared_extract_disas_neon_shared_Fmt_2(ctx, &u.f_disas_neon_shared1, insn); + switch (insn & 0x01b00000) { + case 0x00200000: + /* 11111100 0.10.... ....1101 ........ */ + if (trans_VDOT(ctx, &u.f_disas_neon_shared1)) return true; + return false; + } + return false; + case 0xfe000800: + /* 1111111. ........ ....1000 ........ */ + switch (insn & 0x01800010) { + case 0x00000000: + /* 11111110 0....... ....1000 ...0.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_5(ctx, &u.f_disas_neon_shared3, insn); + if (trans_VCMLA_scalar(ctx, &u.f_disas_neon_shared3)) return true; + return false; + case 0x00000010: + /* 11111110 0....... ....1000 ...1.... */ + switch (insn & 0x00200040) { + case 0x00000000: + /* 11111110 0.0..... ....1000 .0.1.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_8(ctx, &u.f_disas_neon_shared5, insn); + if (trans_VFML_scalar(ctx, &u.f_disas_neon_shared5)) return true; + return false; + case 0x00000040: + /* 11111110 0.0..... ....1000 .1.1.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_9(ctx, &u.f_disas_neon_shared5, insn); + if (trans_VFML_scalar(ctx, &u.f_disas_neon_shared5)) return true; + return false; + } + return false; + case 0x00800000: + /* 11111110 1....... ....1000 ...0.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_6(ctx, &u.f_disas_neon_shared3, insn); + if (trans_VCMLA_scalar(ctx, &u.f_disas_neon_shared3)) return true; + return false; + } + return false; + case 0xfe000d00: + /* 1111111. ........ ....1101 ........ */ + disas_neon_shared_extract_disas_neon_shared_Fmt_7(ctx, &u.f_disas_neon_shared4, insn); + switch (insn & 0x01b00000) { + case 0x00200000: + /* 11111110 0.10.... ....1101 ........ */ + if (trans_VDOT_scalar(ctx, &u.f_disas_neon_shared4)) return true; + return false; + } + return false; + } + return false; +} diff --git a/qemu/target/arm/decode-sve.inc.c b/qemu/target/arm/decode-sve.inc.c index 9740f1aa80..d04c24c0ee 100644 --- a/qemu/target/arm/decode-sve.inc.c +++ b/qemu/target/arm/decode-sve.inc.c @@ -43,9 +43,7 @@ typedef struct { } arg_disas_sve31; typedef struct { -#ifdef _MSC_VER - int dummy; -#endif + int : 0; } arg_disas_sve32; typedef struct { diff --git a/qemu/target/arm/helper-a64.c b/qemu/target/arm/helper-a64.c index 12da114039..df30f11c47 100644 --- a/qemu/target/arm/helper-a64.c +++ b/qemu/target/arm/helper-a64.c @@ -1096,78 +1096,40 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) * alignment faults or any memory attribute handling). */ - struct uc_struct *uc = env->uc; - ARMCPU *cpu = env_archcpu(env); - uint64_t blocklen = 4 << cpu->dcz_blocksize; + UNICORN_UNUSED struct uc_struct *uc = env->uc; + int blocklen = 4 << env_archcpu(env)->dcz_blocksize; uint64_t vaddr = vaddr_in & ~(blocklen - 1); + int mmu_idx = cpu_mmu_index(env, false); + void *mem; + /* - * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than - * the block size so we might have to do more than one TLB lookup. - * We know that in fact for any v8 CPU the page size is at least 4K - * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only - * 1K as an artefact of legacy v5 subpage support being present in the - * same QEMU executable. So in practice the hostaddr[] array has - * two entries, given the current setting of TARGET_PAGE_BITS_MIN. + * Trapless lookup. In addition to actual invalid page, may + * return NULL for I/O, watchpoints, clean pages, etc. */ - int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); - void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; - int try, i; - unsigned mmu_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); - - assert(maxidx <= ARRAY_SIZE(hostaddr)); - - for (try = 0; try < 2; try++) { - - for (i = 0; i < maxidx; i++) { - hostaddr[i] = tlb_vaddr_to_host(env, - vaddr + TARGET_PAGE_SIZE * i, - 1, mmu_idx); - if (!hostaddr[i]) { - break; - } - } - if (i == maxidx) { + mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); + + if (unlikely(!mem)) { + uintptr_t ra = GETPC(); + + /* + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. + */ + (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); + mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); + + if (unlikely(!mem)) { /* - * If it's all in the TLB it's fair game for just writing to; - * we know we don't need to update dirty status, etc. + * The only remaining reason for mem == NULL is I/O. + * Just do a series of byte writes as the architecture demands. */ - for (i = 0; i < maxidx - 1; i++) { - memset(hostaddr[i], 0, TARGET_PAGE_SIZE); + for (int i = 0; i < blocklen; i++) { + cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); } - memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); return; } - /* - * OK, try a store and see if we can populate the tlb. This - * might cause an exception if the memory isn't writable, - * in which case we will longjmp out of here. We must for - * this purpose use the actual register value passed to us - * so that we get the fault address right. - */ - helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); - /* Now we can populate the other TLB entries, if any */ - for (i = 0; i < maxidx; i++) { - uint64_t va = vaddr + TARGET_PAGE_SIZE * i; - if (va != (vaddr_in & TARGET_PAGE_MASK)) { - helper_ret_stb_mmu(env, va, 0, oi, GETPC()); - } - } } - /* - * Slow path (probably attempt to do this to an I/O device or - * similar, or clearing of a block of code we have translations - * cached for). Just do a series of byte writes as the architecture - * demands. It's not worth trying to use a cpu_physical_memory_map(), - * memset(), unmap() sequence here because: - * + we'd need to account for the blocksize being larger than a page - * + the direct-RAM access case is almost always going to be dealt - * with in the fastpath code above, so there's no speed benefit - * + we would have to deal with the map returning NULL because the - * bounce buffer was in use - */ - for (i = 0; i < blocklen; i++) { - helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); - } + memset(mem, 0, blocklen); } diff --git a/qemu/target/arm/helper-a64.h b/qemu/target/arm/helper-a64.h index 3df7c185aa..5b0b699a50 100644 --- a/qemu/target/arm/helper-a64.h +++ b/qemu/target/arm/helper-a64.h @@ -103,3 +103,19 @@ DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) + +DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) +DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h index 2f47279155..199ffee9cc 100644 --- a/qemu/target/arm/helper-sve.h +++ b/qemu/target/arm/helper-sve.h @@ -1099,25 +1099,40 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -1181,6 +1196,64 @@ DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1212,6 +1285,55 @@ DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1243,6 +1365,55 @@ DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1290,6 +1461,53 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, @@ -1399,6 +1617,116 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + + DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG, @@ -1508,6 +1836,115 @@ DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, @@ -1575,4 +2012,71 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/qemu/target/arm/helper.c b/qemu/target/arm/helper.c index 60c9db9e3e..6e28646ad2 100644 --- a/qemu/target/arm/helper.c +++ b/qemu/target/arm/helper.c @@ -31,9 +31,11 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool s1_is_el0, hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, target_ulong *page_size_ptr, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + UNICORN_NONNULL; static void switch_mode(CPUARMState *env, int mode); @@ -78,35 +80,19 @@ uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri) } /* - * Some registers are not accessible if EL3.NS=0 and EL3 is using AArch32 but - * they are accessible when EL3 is using AArch64 regardless of EL3.NS. - * - * access_el3_aa32ns: Used to check AArch32 register views. - * access_el3_aa32ns_aa64any: Used to check both AArch32/64 register views. + * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0. */ static CPAccessResult access_el3_aa32ns(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - bool secure = arm_is_secure_below_el3(env); - - assert(!arm_el_is_aa64(env, 3)); - if (secure) { + if (!is_a64(env) && arm_current_el(env) == 3 && + arm_is_secure_below_el3(env)) { return CP_ACCESS_TRAP_UNCATEGORIZED; } return CP_ACCESS_OK; } -static CPAccessResult access_el3_aa32ns_aa64any(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - if (!arm_el_is_aa64(env, 3)) { - return access_el3_aa32ns(env, ri, isread); - } - return CP_ACCESS_OK; -} - /* Some secure-only AArch32 registers trap to EL3 if used from * Secure EL1 (but are just ordinary UNDEF in other non-EL3 contexts). * Note that an access from Secure EL1 can only happen if EL3 is AArch64. @@ -394,8 +380,7 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); } static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -406,45 +391,7 @@ static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); -} - -static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by IPA. This has to invalidate any structures that - * contain only stage 2 translation information, but does not need - * to apply to structures that contain combined stage 1 and stage 2 - * translation information. - * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero. - */ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 40); - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); -} - -static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 40); - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); } static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -1622,9 +1569,19 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) uint32_t valid_mask = 0x3fff; ARMCPU *cpu = env_archcpu(env); - if (arm_el_is_aa64(env, 3)) { + if (ri->state == ARM_CP_STATE_AA64) { value |= SCR_FW | SCR_AW; /* these two bits are RES1. */ valid_mask &= ~SCR_NET; + + if (cpu_isar_feature(aa64_lor, cpu)) { + valid_mask |= SCR_TLOR; + } + if (cpu_isar_feature(aa64_pauth, cpu)) { + valid_mask |= SCR_API | SCR_APK; + } + if (cpu_isar_feature(aa64_mte, cpu)) { + valid_mask |= SCR_ATA; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); } @@ -1643,12 +1600,6 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) valid_mask &= ~SCR_SMD; } } - if (cpu_isar_feature(aa64_lor, cpu)) { - valid_mask |= SCR_TLOR; - } - if (cpu_isar_feature(aa64_pauth, cpu)) { - valid_mask |= SCR_API | SCR_APK; - } /* Clear all-context RES0 bits. */ value &= valid_mask; @@ -1875,13 +1826,13 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .resetvalue = 0x0 }, { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, - .type = ARM_CP_ALIAS | ARM_CP_IO, + .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write, }, { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, - .type = ARM_CP_ALIAS | ARM_CP_IO, + .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, @@ -3044,7 +2995,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, * Report exception with ESR indicating a fault due to a * translation table walk for a cache maintenance instruction. */ - syn = syn_data_abort_no_iss(current_el == target_el, + syn = syn_data_abort_no_iss(current_el == target_el, 0, fi.ea, 1, fi.s1ptw, 1, fsc); env->exception.vaddress = value; env->exception.fsr = fsr; @@ -3567,8 +3518,7 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); raw_write(env, ri, value); } } @@ -4050,11 +4000,6 @@ static int alle1_tlbmask(CPUARMState *env) return ARMMMUIdxBit_SE10_1 | ARMMMUIdxBit_SE10_1_PAN | ARMMMUIdxBit_SE10_0; - } else if (arm_feature(env, ARM_FEATURE_EL2)) { - return ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2; } else { return ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | @@ -4201,44 +4146,6 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMMMUIdxBit_SE3); } -static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by IPA. This has to invalidate any structures that - * contain only stage 2 translation information, but does not need - * to apply to structures that contain combined stage 1 and stage 2 - * translation information. - * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero. - */ - ARMCPU *cpu = env_archcpu(env); - CPUState *cs = CPU(cpu); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 48); - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); -} - -static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 48); - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_Stage2); -} - static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -4306,6 +4213,15 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, { ARMCPU *cpu = env_archcpu(env); + if (ri->state == ARM_CP_STATE_AA64 && !cpu_isar_feature(aa64_mte, cpu)) { + if (ri->opc1 == 6) { /* SCTLR_EL3 */ + value &= ~(SCTLR_ITFSB | SCTLR_TCF | SCTLR_ATA); + } else { + value &= ~(SCTLR_ITFSB | SCTLR_TCF0 | SCTLR_TCF | + SCTLR_ATA0 | SCTLR_ATA); + } + } + if (raw_read(env, ri) == value) { /* Skip the TLB flush if nothing actually changed; Linux likes * to do a lot of pointless SCTLR writes. @@ -4320,6 +4236,7 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); /* ??? Lots of these bits are not implemented. */ + /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu)); @@ -4475,12 +4392,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbi_aa64_vae1_write }, { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1is_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1is_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -4491,12 +4406,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbi_aa64_alle1is_write }, { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -4575,20 +4488,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbimva_hyp_is_write }, { .name = "TLBIIPAS2", .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2IS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_is_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2L", .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2LIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_is_write }, + .type = ARM_CP_NOP, .access = PL2_W }, /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, @@ -4702,7 +4611,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH, - .type = ARM_CP_NO_RAW, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, @@ -4744,7 +4652,7 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "VTTBR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 6, .crm = 2, @@ -4792,7 +4700,7 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3, @@ -4849,15 +4757,19 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (cpu_isar_feature(aa64_pauth, cpu)) { valid_mask |= HCR_API | HCR_APK; } + if (cpu_isar_feature(aa64_mte, cpu)) { + valid_mask |= HCR_ATA | HCR_DCT | HCR_TID5; + } } /* Clear RES0 bits. */ value &= valid_mask; - /* These bits change the MMU setup: + /* + * These bits change the MMU setup: * HCR_VM enables stage 2 translation * HCR_PTW forbids certain page-table setups - * HCR_DC Disables stage1 and enables stage2 translation + * HCR_DC disables stage1 and enables stage2 translation */ if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) { tlb_flush(CPU(cpu)); @@ -5430,6 +5342,9 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0), "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve }, + { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0), + "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte }, + /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */ /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */ }; @@ -6382,7 +6297,7 @@ static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, /* RCU lock is already being held */ mr = memory_region_from_host(uc, haddr, &offset); if (mr) { - // memory_region_do_writeback(mr, offset, dline_size); FIXME + // memory_region_writeback(mr, offset, dline_size); FIXME } } } @@ -6405,6 +6320,159 @@ static const ARMCPRegInfo dcpodp_reg[] = { #endif +static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if ((arm_current_el(env) < 2) && (arm_hcr_el2_eff(env) & HCR_TID5)) { + return CP_ACCESS_TRAP_EL2; + } + + return CP_ACCESS_OK; +} + +static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + + if (el < 2 && + arm_feature(env, ARM_FEATURE_EL2) && + !(arm_hcr_el2_eff(env) & HCR_ATA)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && + arm_feature(env, ARM_FEATURE_EL3) && + !(env->cp15.scr_el3 & SCR_ATA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pstate & PSTATE_TCO; +} + +static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val) +{ + env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO); +} + +static const ARMCPRegInfo mte_reginfo[] = { + { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 1, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) }, + { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) }, + { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL2_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) }, + { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) }, + { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) }, + { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) }, + { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4, + .access = PL1_R, .accessfn = access_aa64_tid5, + .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS }, + { .name = "TCO", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, + .type = ARM_CP_NO_RAW, + .access = PL0_RW, .readfn = tco_read, .writefn = tco_write }, + { .name = "DC_IGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL1_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_IGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_IGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL1_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_IGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CIGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CIGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo mte_tco_ro_reginfo[] = { + { .name = "TCO", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, + .type = ARM_CP_CONST, .access = PL0_RW, }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = { + { .name = "DC_CGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGVAP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVAP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGVADP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVADP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CIGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CIGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_GVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3, + .access = PL0_W, .type = ARM_CP_DC_GVA, + .accessfn = aa64_zva_access, + }, + { .name = "DC_GZVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4, + .access = PL0_W, .type = ARM_CP_DC_GZVA, + .accessfn = aa64_zva_access, + }, + REGINFO_SENTINEL +}; + static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -7048,12 +7116,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) ARMCPRegInfo vpidr_regs[] = { { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = cpu->midr, .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) }, { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_NO_RAW, .writefn = arm_cp_write_ignore, .readfn = mpidr_read }, REGINFO_SENTINEL @@ -7466,6 +7534,19 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, dcpodp_reg); } } + + /* + * If full MTE is enabled, add all of the system registers. + * If only "instructions available at EL0" are enabled, + * then define only a RAZ/WI version of PSTATE.TCO. + */ + if (cpu_isar_feature(aa64_mte, cpu)) { + define_arm_cp_regs(cpu, mte_reginfo); + define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); + } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) { + define_arm_cp_regs(cpu, mte_tco_ro_reginfo); + define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); + } #endif if (cpu_isar_feature(any_predinv, cpu)) { @@ -8725,6 +8806,77 @@ static void arm_cpu_do_interrupt_aarch32_qemu(CPUState *cs) take_aarch32_exception(env, new_mode, mask, offset, addr); } +static int aarch64_regnum(CPUARMState *env, int aarch32_reg) +{ + /* + * Return the register number of the AArch64 view of the AArch32 + * register @aarch32_reg. The CPUARMState CPSR is assumed to still + * be that of the AArch32 mode the exception came from. + */ + int mode = env->uncached_cpsr & CPSR_M; + + switch (aarch32_reg) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + return aarch32_reg; + case 8: + case 9: + case 10: + case 11: + case 12: + return mode == ARM_CPU_MODE_FIQ ? aarch32_reg + 16 : aarch32_reg; + case 13: + switch (mode) { + case ARM_CPU_MODE_USR: + case ARM_CPU_MODE_SYS: + return 13; + case ARM_CPU_MODE_HYP: + return 15; + case ARM_CPU_MODE_IRQ: + return 17; + case ARM_CPU_MODE_SVC: + return 19; + case ARM_CPU_MODE_ABT: + return 21; + case ARM_CPU_MODE_UND: + return 23; + case ARM_CPU_MODE_FIQ: + return 29; + default: + g_assert_not_reached(); + } + case 14: + switch (mode) { + case ARM_CPU_MODE_USR: + case ARM_CPU_MODE_SYS: + case ARM_CPU_MODE_HYP: + return 14; + case ARM_CPU_MODE_IRQ: + return 16; + case ARM_CPU_MODE_SVC: + return 18; + case ARM_CPU_MODE_ABT: + return 20; + case ARM_CPU_MODE_UND: + return 22; + case ARM_CPU_MODE_FIQ: + return 30; + default: + g_assert_not_reached(); + } + case 15: + return 31; + default: + g_assert_not_reached(); + } +} + /* Handle exception entry to a target EL which is using AArch64 */ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) { @@ -8735,6 +8887,7 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) unsigned int new_mode = aarch64_pstate_mode(new_el, true); unsigned int old_mode; unsigned int cur_el = arm_current_el(env); + int rt; /* * Note that new_el can never be 0. If cur_el is 0, then @@ -8790,7 +8943,8 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) case EXCP_HVC: case EXCP_HYP_TRAP: case EXCP_SMC: - if (syn_get_ec(env->exception.syndrome) == EC_ADVSIMDFPACCESSTRAP) { + switch (syn_get_ec(env->exception.syndrome)) { + case EC_ADVSIMDFPACCESSTRAP: /* * QEMU internal FP/SIMD syndromes from AArch32 include the * TA and coproc fields which are only exposed if the exception @@ -8798,6 +8952,34 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) * AArch64 format syndrome. */ env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20); + break; + case EC_CP14RTTRAP: + case EC_CP15RTTRAP: + case EC_CP14DTTRAP: + /* + * For a trap on AArch32 MRC/MCR/LDC/STC the Rt field is currently + * the raw register field from the insn; when taking this to + * AArch64 we must convert it to the AArch64 view of the register + * number. Notice that we read a 4-bit AArch32 register number and + * write back a 5-bit AArch64 one. + */ + rt = extract32(env->exception.syndrome, 5, 4); + rt = aarch64_regnum(env, rt); + env->exception.syndrome = deposit32(env->exception.syndrome, + 5, 5, rt); + break; + case EC_CP15RRTTRAP: + case EC_CP14RRTTRAP: + /* Similarly for MRRC/MCRR traps for Rt and Rt2 fields */ + rt = extract32(env->exception.syndrome, 5, 4); + rt = aarch64_regnum(env, rt); + env->exception.syndrome = deposit32(env->exception.syndrome, + 5, 5, rt); + rt = extract32(env->exception.syndrome, 10, 4); + rt = aarch64_regnum(env, rt); + env->exception.syndrome = deposit32(env->exception.syndrome, + 10, 5, rt); + break; } env->cp15.esr_el[new_el] = env->exception.syndrome; break; @@ -8850,6 +9032,9 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) break; } } + if (cpu_isar_feature(aa64_mte, cpu)) { + new_mode |= PSTATE_TCO; + } pstate_write(env, PSTATE_DAIF | new_mode); env->aarch64 = 1; @@ -8908,44 +9093,6 @@ void arm_cpu_do_interrupt(CPUState *cs) cs->interrupt_request |= CPU_INTERRUPT_EXITTB; } -/* Return the exception level which controls this address translation regime */ -static uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_Stage2: - case ARMMMUIdx_E2: - return 2; - case ARMMMUIdx_SE3: - return 3; - case ARMMMUIdx_SE10_0: - return arm_el_is_aa64(env, 3) ? 1 : 3; - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_MPrivNegPri: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MPriv: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSPrivNegPri: - case ARMMMUIdx_MSUserNegPri: - case ARMMMUIdx_MSPriv: - case ARMMMUIdx_MSUser: - return 1; - default: - g_assert_not_reached(); - // never reach here - return 1; - } -} - uint64_t arm_sctlr(CPUARMState *env, int el) { /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ @@ -9024,15 +9171,6 @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, } } -/* Return the TCR controlling this translation regime */ -static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - if (mmu_idx == ARMMMUIdx_Stage2) { - return &env->cp15.vtcr_el2; - } - return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; -} - /* Convert a possible stage1+2 MMU index into the appropriate * stage 1 MMU index */ @@ -9189,9 +9327,10 @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) * * @env: CPUARMState * @s2ap: The 2-bit stage2 access permissions (S2AP) - * @xn: XN (execute-never) bit + * @xn: XN (execute-never) bits + * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 */ -static int get_S2prot(CPUARMState *env, int s2ap, int xn) +static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) { int prot = 0; @@ -9201,8 +9340,32 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn) if (s2ap & 2) { prot |= PAGE_WRITE; } - if (!xn) { - if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { + + if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { + switch (xn) { + case 0: + prot |= PAGE_EXEC; + break; + case 1: + if (s1_is_el0) { + prot |= PAGE_EXEC; + } + break; + case 2: + break; + case 3: + if (!s1_is_el0) { + prot |= PAGE_EXEC; + } + break; + default: + g_assert_not_reached(); + } + } else { + if (!extract32(xn, 1, 1)) { + if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { + prot |= PAGE_EXEC; + } prot |= PAGE_EXEC; } } @@ -9323,19 +9486,11 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, int s2prot; int ret; ARMCacheAttrs cacheattrs = { 0 }; - ARMCacheAttrs *pcacheattrs = NULL; - - if (env->cp15.hcr_el2 & HCR_PTW) { - /* - * PTW means we must fault if this S1 walk touches S2 Device - * memory; otherwise we don't care about the attributes and can - * save the S2 translation the effort of computing them. - */ - pcacheattrs = &cacheattrs; - } - ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_Stage2, &s2pa, - &txattrs, &s2prot, &s2size, fi, pcacheattrs); + ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, ARMMMUIdx_Stage2, + false, + &s2pa, &txattrs, &s2prot, &s2size, fi, + &cacheattrs); if (ret) { assert(fi->type != ARMFault_None); fi->s2addr = addr; @@ -9343,8 +9498,11 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, fi->s1ptw = true; return ~0; } - if (pcacheattrs && (pcacheattrs->attrs & 0xf0) == 0) { - /* Access was to Device memory: generate Permission fault */ + if ((env->cp15.hcr_el2 & HCR_PTW) && (cacheattrs.attrs & 0xf0) == 0) { + /* + * PTW set and S1 walk touched S2 Device memory: + * generate Permission fault. + */ fi->type = ARMFault_Permission; fi->s2addr = addr; fi->stage2 = true; @@ -9829,6 +9987,16 @@ static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) } } +static int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx) +{ + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 57, 2); + } else { + /* Replicate the single TCMA bit so we always have 2 bits. */ + return extract32(tcr, 30, 1) * 3; + } +} + ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ARMMMUIdx mmu_idx, bool data) { @@ -9952,8 +10120,32 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, }; } +/** + * get_phys_addr_lpae: perform one stage of page table walk, LPAE format + * + * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, + * prot and page_size may not be filled in, and the populated fsr value provides + * information on why the translation aborted, in the format of a long-format + * DFSR/IFSR fault register, with the following caveats: + * * the WnR bit is never set (the caller must do this). + * + * @env: CPUARMState + * @address: virtual address to get physical address for + * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH + * @mmu_idx: MMU index indicating required translation regime + * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table + * walk), must be true if this is stage 2 of a stage 1+2 walk for an + * EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored. + * @phys_ptr: set to the physical address corresponding to the virtual address + * @attrs: set to the memory transaction attributes to use + * @prot: set to the permissions for the page containing phys_ptr + * @page_size_ptr: set to the size of the page containing phys_ptr + * @fi: set to fault info if the translation fails + * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes + */ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool s1_is_el0, hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, target_ulong *page_size_ptr, ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) @@ -10176,13 +10368,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, } ap = extract32(attrs, 4, 2); - xn = extract32(attrs, 12, 1); if (mmu_idx == ARMMMUIdx_Stage2) { ns = true; - *prot = get_S2prot(env, ap, xn); + xn = extract32(attrs, 11, 2); + *prot = get_S2prot(env, ap, xn, s1_is_el0); } else { ns = extract32(attrs, 3, 1); + xn = extract32(attrs, 12, 1); pxn = extract32(attrs, 11, 1); *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); } @@ -10201,22 +10394,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, } /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */ if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) { - txattrs->target_tlb_bit0 = true; + arm_tlb_bti_gp(txattrs) = true; } - if (cacheattrs != NULL) { - if (mmu_idx == ARMMMUIdx_Stage2) { - cacheattrs->attrs = convert_stage2_attrs(env, - extract32(attrs, 0, 4)); - } else { - /* Index into MAIR registers for cache attributes */ - uint8_t attrindx = extract32(attrs, 0, 3); - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; - assert(attrindx <= 7); - cacheattrs->attrs = extract64(mair, attrindx * 8, 8); - } - cacheattrs->shareability = extract32(attrs, 6, 2); + if (mmu_idx == ARMMMUIdx_Stage2) { + cacheattrs->attrs = convert_stage2_attrs(env, extract32(attrs, 0, 4)); + } else { + /* Index into MAIR registers for cache attributes */ + uint8_t attrindx = extract32(attrs, 0, 3); + uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + assert(attrindx <= 7); + cacheattrs->attrs = extract64(mair, attrindx * 8, 8); } + cacheattrs->shareability = extract32(attrs, 6, 2); *phys_ptr = descaddr; *page_size_ptr = page_size; @@ -10923,9 +11113,19 @@ static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) */ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) { - uint8_t s1lo = extract32(s1.attrs, 0, 4), s2lo = extract32(s2.attrs, 0, 4); - uint8_t s1hi = extract32(s1.attrs, 4, 4), s2hi = extract32(s2.attrs, 4, 4); + uint8_t s1lo, s2lo, s1hi, s2hi; ARMCacheAttrs ret; + bool tagged = false; + + if (s1.attrs == 0xf0) { + tagged = true; + s1.attrs = 0xff; + } + + s1lo = extract32(s1.attrs, 0, 4); + s2lo = extract32(s2.attrs, 0, 4); + s1hi = extract32(s1.attrs, 4, 4); + s2hi = extract32(s2.attrs, 4, 4); /* Combine shareability attributes (table D4-43) */ if (s1.shareability == 2 || s2.shareability == 2) { @@ -10973,6 +11173,11 @@ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) } } + /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */ + if (tagged && ret.attrs == 0xff) { + ret.attrs = 0xf0; + } + return ret; } @@ -11034,29 +11239,35 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, /* S1 is done. Now do S2 translation. */ ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2, + mmu_idx == ARMMMUIdx_E10_0, phys_ptr, attrs, &s2_prot, - page_size, fi, - cacheattrs != NULL ? &cacheattrs2 : NULL); + page_size, fi, &cacheattrs2); fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ *prot &= s2_prot; - /* Combine the S1 and S2 cache attributes, if needed */ - if (!ret && cacheattrs != NULL) { - if (env->cp15.hcr_el2 & HCR_DC) { - /* - * HCR.DC forces the first stage attributes to - * Normal Non-Shareable, - * Inner Write-Back Read-Allocate Write-Allocate, - * Outer Write-Back Read-Allocate Write-Allocate. - */ + /* If S2 fails, return early. */ + if (ret) { + return ret; + } + + /* Combine the S1 and S2 cache attributes. */ + if (env->cp15.hcr_el2 & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + * Do not overwrite Tagged within attrs. + */ + if (cacheattrs->attrs != 0xf0) { cacheattrs->attrs = 0xff; - cacheattrs->shareability = 0; } - *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); + cacheattrs->shareability = 0; } - return ret; + *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); + return 0; } else { /* * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. @@ -11117,6 +11328,9 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, /* Definitely a real MMU, not an MPU */ if (regime_translation_disabled(env, mmu_idx)) { + uint64_t hcr; + uint8_t memattr; + /* * MMU disabled. S1 addresses within aa64 translation regimes are * still checked for bounds -- see AArch64.TranslateAddressS1Off. @@ -11154,11 +11368,32 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, *phys_ptr = address; *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; *page_size = TARGET_PAGE_SIZE; + + /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ + hcr = arm_hcr_el2_eff(env); + cacheattrs->shareability = 0; + if (hcr & HCR_DC) { + if (hcr & HCR_DCT) { + memattr = 0xf0; /* Tagged, Normal, WB, RWA */ + } else { + memattr = 0xff; /* Normal, WB, RWA */ + } + } else if (access_type == MMU_INST_FETCH) { + if (regime_sctlr(env, mmu_idx) & SCTLR_I) { + memattr = 0xee; /* Normal, WT, RA, NT */ + } else { + memattr = 0x44; /* Normal, NC, No */ + } + cacheattrs->shareability = 2; /* outer sharable */ + } else { + memattr = 0x00; /* Device, nGnRnE */ + } + cacheattrs->attrs = memattr; return 0; } if (regime_using_lpae_format(env, mmu_idx)) { - return get_phys_addr_lpae(env, address, access_type, mmu_idx, + return get_phys_addr_lpae(env, address, access_type, mmu_idx, false, phys_ptr, attrs, prot, page_size, fi, cacheattrs); } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { @@ -11181,11 +11416,12 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, bool ret; ARMMMUFaultInfo fi = { 0 }; ARMMMUIdx mmu_idx = arm_mmu_idx(env); + ARMCacheAttrs cacheattrs = {}; *attrs = (MemTxAttrs) { 0 }; ret = get_phys_addr(env, addr, 0, mmu_idx, &phys_addr, - attrs, &prot, &page_size, &fi, NULL); + attrs, &prot, &page_size, &fi, &cacheattrs); if (ret) { return -1; @@ -11719,6 +11955,35 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } } + if (cpu_isar_feature(aa64_mte, env_archcpu(env))) { + /* + * Set MTE_ACTIVE if any access may be Checked, and leave clear + * if all accesses must be Unchecked: + * 1) If no TBI, then there are no tags in the address to check, + * 2) If Tag Check Override, then all accesses are Unchecked, + * 3) If Tag Check Fail == 0, then Checked access have no effect, + * 4) If no Allocation Tag Access, then all accesses are Unchecked. + */ + if (allocation_tag_access_enabled(env, el, sctlr)) { + FIELD_DP32(flags, TBFLAG_A64, ATA, 1, flags); + if (tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { + FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1, flags); + } + } + /* And again for unprivileged accesses, if required. */ + if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV) + && tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & SCTLR_TCF0) + && allocation_tag_access_enabled(env, 0, sctlr)) { + FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1, flags); + } + /* Cache TCMA as well as TBI. */ + FIELD_DP32(flags, TBFLAG_A64, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx), flags); + } + return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index 616d032c84..b48d6eb94e 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -100,6 +100,8 @@ DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int) DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int) +DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32) + DEF_HELPER_1(vfp_get_fpscr, i32, env) DEF_HELPER_2(vfp_set_fpscr, void, env, i32) @@ -207,16 +209,16 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32) DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr) DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) -DEF_HELPER_3(recps_f32, f32, f32, f32, env) -DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) +DEF_HELPER_3(recps_f32, f32, env, f32, f32) +DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32) DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) -DEF_HELPER_2(recpe_u32, i32, i32, ptr) -DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr) +DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) +DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32) DEF_HELPER_3(shl_cc, i32, env, i32, i32) @@ -279,19 +281,6 @@ DEF_HELPER_2(neon_hsub_u16, i32, i32, i32) DEF_HELPER_2(neon_hsub_s32, s32, s32, s32) DEF_HELPER_2(neon_hsub_u32, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u8, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s8, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u16, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s16, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u32, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s32, i32, i32, i32) -DEF_HELPER_2(neon_cge_u8, i32, i32, i32) -DEF_HELPER_2(neon_cge_s8, i32, i32, i32) -DEF_HELPER_2(neon_cge_u16, i32, i32, i32) -DEF_HELPER_2(neon_cge_s16, i32, i32, i32) -DEF_HELPER_2(neon_cge_u32, i32, i32, i32) -DEF_HELPER_2(neon_cge_s32, i32, i32, i32) - DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) @@ -301,13 +290,6 @@ DEF_HELPER_2(neon_pmax_s8, i32, i32, i32) DEF_HELPER_2(neon_pmax_u16, i32, i32, i32) DEF_HELPER_2(neon_pmax_s16, i32, i32, i32) -DEF_HELPER_2(neon_abd_u8, i32, i32, i32) -DEF_HELPER_2(neon_abd_s8, i32, i32, i32) -DEF_HELPER_2(neon_abd_u16, i32, i32, i32) -DEF_HELPER_2(neon_abd_s16, i32, i32, i32) -DEF_HELPER_2(neon_abd_u32, i32, i32, i32) -DEF_HELPER_2(neon_abd_s32, i32, i32, i32) - DEF_HELPER_2(neon_shl_u16, i32, i32, i32) DEF_HELPER_2(neon_shl_s16, i32, i32, i32) DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) @@ -351,9 +333,6 @@ DEF_HELPER_2(neon_mul_u16, i32, i32, i32) DEF_HELPER_2(neon_tst_u8, i32, i32, i32) DEF_HELPER_2(neon_tst_u16, i32, i32, i32) DEF_HELPER_2(neon_tst_u32, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u8, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u16, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u32, i32, i32, i32) DEF_HELPER_1(neon_clz_u8, i32, i32) DEF_HELPER_1(neon_clz_u16, i32, i32) @@ -423,7 +402,6 @@ DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) -DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr) @@ -538,29 +516,40 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr) - -DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) -DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) @@ -622,6 +611,8 @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, @@ -690,6 +681,17 @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -700,6 +702,66 @@ DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/qemu/target/arm/internals.h b/qemu/target/arm/internals.h index 5bb1ad0e61..2bd763072b 100644 --- a/qemu/target/arm/internals.h +++ b/qemu/target/arm/internals.h @@ -454,13 +454,14 @@ static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc) | ARM_EL_IL | (ea << 9) | (s1ptw << 7) | fsc; } -static inline uint32_t syn_data_abort_no_iss(int same_el, +static inline uint32_t syn_data_abort_no_iss(int same_el, int fnv, int ea, int cm, int s1ptw, int wnr, int fsc) { return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) | ARM_EL_IL - | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc; + | (fnv << 10) | (ea << 9) | (cm << 8) | (s1ptw << 7) + | (wnr << 6) | fsc; } static inline uint32_t syn_data_abort_with_iss(int same_el, @@ -908,6 +909,51 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) } } +/* Return the exception level which controls this address translation regime */ +static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_Stage2: + case ARMMMUIdx_E2: + return 2; + case ARMMMUIdx_SE3: + return 3; + case ARMMMUIdx_SE10_0: + return arm_el_is_aa64(env, 3) ? 1 : 3; + case ARMMMUIdx_SE10_1: + case ARMMMUIdx_SE10_1_PAN: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_MPrivNegPri: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MPriv: + case ARMMMUIdx_MUser: + case ARMMMUIdx_MSPrivNegPri: + case ARMMMUIdx_MSUserNegPri: + case ARMMMUIdx_MSPriv: + case ARMMMUIdx_MSUser: + return 1; + default: + g_assert_not_reached(); + } +} + +/* Return the TCR controlling this translation regime */ +static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + if (mmu_idx == ARMMMUIdx_Stage2) { + return &env->cp15.vtcr_el2; + } + return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; +} + /* Return the FSR value for a debug exception (watchpoint, hardware * breakpoint or BKPT insn) targeting the specified exception level. */ @@ -975,11 +1021,6 @@ static inline int arm_num_ctx_cmps(ARMCPU *cpu) } } -/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3. - * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits. - */ -#define MEMOPIDX_SHIFT 8 - /** * v7m_using_psp: Return true if using process stack pointer * Return true if the CPU is currently using the process stack @@ -1154,6 +1195,9 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id) if (isar_feature_aa64_uao(id)) { valid |= PSTATE_UAO; } + if (isar_feature_aa64_mte(id)) { + valid |= PSTATE_TCO; + } return valid; } @@ -1190,6 +1234,24 @@ static inline int exception_target_el(CPUARMState *env) return target_el; } +/* Determine if allocation tags are available. */ +static inline bool allocation_tag_access_enabled(CPUARMState *env, int el, + uint64_t sctlr) +{ + if (el < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_ATA)) { + return false; + } + if (el < 2 + && arm_feature(env, ARM_FEATURE_EL2) + && !(arm_hcr_el2_eff(env) & HCR_ATA)) { + return false; + } + sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA); + return sctlr != 0; +} + /* Security attributes for an address, as returned by v8m_security_lookup. */ typedef struct V8M_SAttributes { bool subpage; /* true if these attrs don't cover the whole TARGET_PAGE */ @@ -1221,8 +1283,89 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, target_ulong *page_size, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + UNICORN_NONNULL; void arm_log_exception(int idx); +/* + * The log2 of the words in the tag block, for GMID_EL1.BS. + * The is the maximum, 256 bytes, which manipulates 64-bits of tags. + */ +#define GMID_EL1_BS 6 + +/* We associate one allocation tag per 16 bytes, the minimum. */ +#define LOG2_TAG_GRANULE 4 +#define TAG_GRANULE (1 << LOG2_TAG_GRANULE) + +/* + * The SVE simd_data field, for memory ops, contains either + * rd (5 bits) or a shift count (2 bits). + */ +#define SVE_MTEDESC_SHIFT 5 + +/* Bits within a descriptor passed to the helper_mte_check* functions. */ +FIELD(MTEDESC, MIDX, 0, 4) +FIELD(MTEDESC, TBI, 4, 2) +FIELD(MTEDESC, TCMA, 6, 2) +FIELD(MTEDESC, WRITE, 8, 1) +FIELD(MTEDESC, ESIZE, 9, 5) +FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ + +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); + +static inline int allocation_tag_from_addr(uint64_t ptr) +{ + return extract64(ptr, 56, 4); +} + +static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag) +{ + return deposit64(ptr, 56, 4, rtag); +} + +/* Return true if tbi bits mean that the access is checked. */ +static inline bool tbi_check(uint32_t desc, int bit55) +{ + return (desc >> (R_MTEDESC_TBI_SHIFT + bit55)) & 1; +} + +/* Return true if tcma bits mean that the access is unchecked. */ +static inline bool tcma_check(uint32_t desc, int bit55, int ptr_tag) +{ + /* + * We had extracted bit55 and ptr_tag for other reasons, so fold + * (ptr<59:55> == 00000 || ptr<59:55> == 11111) into a single test. + */ + bool match = ((ptr_tag + bit55) & 0xf) == 0; + bool tcma = (desc >> (R_MTEDESC_TCMA_SHIFT + bit55)) & 1; + return tcma && match; +} + +/* + * For TBI, ideally, we would do nothing. Proper behaviour on fault is + * for the tag to be present in the FAR_ELx register. But for user-only + * mode, we do not have a TLB with which to implement this, so we must + * remove the top byte. + */ +static inline uint64_t useronly_clean_ptr(uint64_t ptr) +{ + /* TBI is known to be enabled. */ + ptr = sextract64(ptr, 0, 56); + return ptr; +} + +static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr) +{ + int64_t clean_ptr = sextract64(ptr, 0, 56); + if (tbi_check(desc, clean_ptr < 0)) { + ptr = clean_ptr; + } + return ptr; +} + #endif diff --git a/qemu/target/arm/m_helper.c b/qemu/target/arm/m_helper.c index 7fd9d21965..22f4b1b949 100644 --- a/qemu/target/arm/m_helper.c +++ b/qemu/target/arm/m_helper.c @@ -87,12 +87,13 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, hwaddr physaddr; int prot; ARMMMUFaultInfo fi = { 0 }; + ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; // int exc; // bool exc_secure; if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, NULL)) { + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { if (mode == STACK_LAZYFP) { @@ -187,13 +188,14 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, hwaddr physaddr; int prot; ARMMMUFaultInfo fi = { 0 }; + ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; uint32_t value; if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, NULL)) { + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { qemu_log_mask(CPU_LOG_INT, @@ -1859,6 +1861,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, V8M_SAttributes sattrs = { 0 }; MemTxAttrs attrs = { 0 }; ARMMMUFaultInfo fi = { 0 }; + ARMCacheAttrs cacheattrs = {}; MemTxResult txres; target_ulong page_size; hwaddr physaddr; @@ -1877,7 +1880,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, return false; } if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, - &physaddr, &attrs, &prot, &page_size, &fi, NULL)) { + &physaddr, &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* the MPU lookup failed */ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure); diff --git a/qemu/target/arm/mte_helper.c b/qemu/target/arm/mte_helper.c new file mode 100644 index 0000000000..630e18a8ac --- /dev/null +++ b/qemu/target/arm/mte_helper.c @@ -0,0 +1,913 @@ +/* + * ARM v8.5-MemTag Operations + * + * Copyright (c) 2020 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "exec/exec-all.h" +#include "exec/ram_addr.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" +#include "qemu/guest-random.h" + + +static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) +{ + if (exclude == 0xffff) { + return 0; + } + if (offset == 0) { + while (exclude & (1 << tag)) { + tag = (tag + 1) & 15; + } + } else { + do { + do { + tag = (tag + 1) & 15; + } while (exclude & (1 << tag)); + } while (--offset > 0); + } + return tag; +} + +/** + * allocation_tag_mem: + * @env: the cpu environment + * @ptr_mmu_idx: the addressing regime to use for the virtual address + * @ptr: the virtual address for which to look up tag memory + * @ptr_access: the access to use for the virtual address + * @ptr_size: the number of bytes in the normal memory access + * @tag_access: the access to use for the tag memory + * @tag_size: the number of bytes in the tag memory access + * @ra: the return address for exception handling + * + * Our tag memory is formatted as a sequence of little-endian nibbles. + * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two + * tags, with the tag at [3:0] for the lower addr and the tag at [7:4] + * for the higher addr. + * + * Here, resolve the physical address from the virtual address, and return + * a pointer to the corresponding tag byte. Exit with exception if the + * virtual address is not accessible for @ptr_access. + * + * The @ptr_size and @tag_size values may not have an obvious relation + * due to the alignment of @ptr, and the number of tag checks required. + * + * If there is no tag storage corresponding to @ptr, return NULL. + */ +static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + int tag_size, uintptr_t ra) +{ + struct uc_struct *uc = env->uc; + uintptr_t index; + CPUIOTLBEntry *iotlbentry; + int in_page, flags; + ram_addr_t ptr_ra; + hwaddr ptr_paddr, tag_paddr, xlat; + MemoryRegion *mr; + ARMASIdx tag_asi; + AddressSpace *tag_as; + void *host; + + /* + * Probe the first byte of the virtual address. This raises an + * exception for inaccessible pages, and resolves the virtual address + * into the softmmu tlb. + * + * When RA == 0, this is for mte_probe1. The page is expected to be + * valid. Indicate to probe_access_flags no-fault, then assert that + * we received a valid page. + */ + flags = probe_access_flags(env, ptr, ptr_access, ptr_mmu_idx, + ra == 0, &host, ra); + assert(!(flags & TLB_INVALID_MASK)); + + /* + * Find the iotlbentry for ptr. This *must* be present in the TLB + * because we just found the mapping. + * TODO: Perhaps there should be a cputlb helper that returns a + * matching tlb entry + iotlb entry. + */ + index = tlb_index(env, ptr_mmu_idx, ptr); + iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index]; + + /* If the virtual page MemAttr != Tagged, access unchecked. */ + if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) { + return NULL; + } + + /* + * If not backed by host ram, there is no tag storage: access unchecked. + * This is probably a guest os bug though, so log it. + */ + if (unlikely(flags & TLB_MMIO)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Page @ 0x%" PRIx64 " indicates Tagged Normal memory " + "but is not backed by host ram\n", ptr); + return NULL; + } + + /* + * The Normal memory access can extend to the next page. E.g. a single + * 8-byte access to the last byte of a page will check only the last + * tag on the first page. + * Any page access exception has priority over tag check exception. + */ + in_page = -(ptr | TARGET_PAGE_MASK); + if (unlikely(ptr_size > in_page)) { + void *ignore; + flags |= probe_access_flags(env, ptr + in_page, ptr_access, + ptr_mmu_idx, ra == 0, &ignore, ra); + assert(!(flags & TLB_INVALID_MASK)); + } + + /* Any debug exception has priority over a tag check exception. */ + if (unlikely(flags & TLB_WATCHPOINT)) { + int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE; + assert(ra != 0); + cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, + iotlbentry->attrs, wp, ra); + } + + /* + * Find the physical address within the normal mem space. + * The memory region lookup must succeed because TLB_MMIO was + * not set in the cputlb lookup above. + */ + mr = memory_region_from_host(uc, host, &ptr_ra); + tcg_debug_assert(mr != NULL); + tcg_debug_assert(memory_region_is_ram(mr)); + ptr_paddr = ptr_ra; + do { + ptr_paddr += mr->addr; + mr = mr->container; + } while (mr); + + /* Convert to the physical address in tag space. */ + tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1); + + /* Look up the address in tag space. */ + tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS; + tag_as = cpu_get_address_space(env_cpu(env), tag_asi); + mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL, + tag_access == MMU_DATA_STORE, + iotlbentry->attrs); + + /* + * Note that @mr will never be NULL. If there is nothing in the address + * space at @tag_paddr, the translation will return the unallocated memory + * region. For our purposes, the result must be ram. + */ + if (unlikely(!memory_region_is_ram(mr))) { + /* ??? Failure is a board configuration error. */ + qemu_log_mask(LOG_UNIMP, + "Tag Memory @ 0x%" HWADDR_PRIx " not found for " + "Normal Memory @ 0x%" HWADDR_PRIx "\n", + tag_paddr, ptr_paddr); + return NULL; + } + + /* + * Ensure the tag memory is dirty on write, for migration. + * Tag memory can never contain code or display memory (vga). + */ + if (tag_access == MMU_DATA_STORE) { + ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat; + cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); + } + + return (uint8_t*)memory_region_get_ram_ptr(mr) + xlat; +} + +uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) +{ + uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16); + int rrnd = extract32(env->cp15.gcr_el1, 16, 1); + int start = extract32(env->cp15.rgsr_el1, 0, 4); + int seed = extract32(env->cp15.rgsr_el1, 8, 16); + int offset, i, rtag; + + /* + * Our IMPDEF choice for GCR_EL1.RRND==1 is to continue to use the + * deterministic algorithm. Except that with RRND==1 the kernel is + * not required to have set RGSR_EL1.SEED != 0, which is required for + * the deterministic algorithm to function. So we force a non-zero + * SEED for that case. + */ + if (unlikely(seed == 0) && rrnd) { + do { + uint16_t two; + + if (qemu_guest_getrandom(&two, sizeof(two)) < 0) { + /* + * Failed, for unknown reasons in the crypto subsystem. + * Best we can do is use a constant seed. + */ + two = 1; + } + seed = two; + } while (seed == 0); + } + + /* RandomTag */ + for (i = offset = 0; i < 4; ++i) { + /* NextRandomTagBit */ + int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^ + extract32(seed, 2, 1) ^ extract32(seed, 0, 1)); + seed = (top << 15) | (seed >> 1); + offset |= top << i; + } + rtag = choose_nonexcluded_tag(start, offset, exclude); + env->cp15.rgsr_el1 = rtag | (seed << 8); + + return address_with_allocation_tag(rn, rtag); +} + +uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr, + int32_t offset, uint32_t tag_offset) +{ + int start_tag = allocation_tag_from_addr(ptr); + uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16); + int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude); + + return address_with_allocation_tag(ptr + offset, rtag); +} + +static int load_tag1(uint64_t ptr, uint8_t *mem) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + return extract32(*mem, ofs, 4); +} + +uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + int mmu_idx = cpu_mmu_index(env, false); + uint8_t *mem; + int rtag = 0; + + /* Trap if accessing an invalid page. */ + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1, + MMU_DATA_LOAD, 1, GETPC()); + + /* Load if page supports tags. */ + if (mem) { + rtag = load_tag1(ptr, mem); + } + + return address_with_allocation_tag(xt, rtag); +} + +static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra) +{ + if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) { + arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, + cpu_mmu_index(env, false), ra); + g_assert_not_reached(); + } +} + +/* For use in a non-parallel context, store to the given nibble. */ +static void store_tag1(uint64_t ptr, uint8_t *mem, int tag) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + *mem = deposit32(*mem, ofs, 4, tag); +} + +/* For use in a parallel context, atomically store to the given nibble. */ +static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + uint8_t old = atomic_read(mem); + + while (1) { + uint8_t new = deposit32(old, ofs, 4, tag); + uint8_t cmp = atomic_cmpxchg(mem, old, new); + if (likely(cmp == old)) { + return; + } + old = cmp; + } +} + +typedef void stg_store1(uint64_t, uint8_t *, int); + +static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt, + uintptr_t ra, stg_store1 store1) +{ + int mmu_idx = cpu_mmu_index(env, false); + uint8_t *mem; + + check_tag_aligned(env, ptr, ra); + + /* Trap if accessing an invalid page. */ + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE, + MMU_DATA_STORE, 1, ra); + + /* Store if page supports tags. */ + if (mem) { + store1(ptr, mem, allocation_tag_from_addr(xt)); + } +} + +void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_stg(env, ptr, xt, GETPC(), store_tag1); +} + +void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_stg(env, ptr, xt, GETPC(), store_tag1_parallel); +} + +void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + + check_tag_aligned(env, ptr, ra); + probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); +} + +static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, + uintptr_t ra, stg_store1 store1) +{ + int mmu_idx = cpu_mmu_index(env, false); + int tag = allocation_tag_from_addr(xt); + uint8_t *mem1, *mem2; + + check_tag_aligned(env, ptr, ra); + + /* + * Trap if accessing an invalid page(s). + * This takes priority over !allocation_tag_access_enabled. + */ + if (ptr & TAG_GRANULE) { + /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */ + mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + TAG_GRANULE, MMU_DATA_STORE, 1, ra); + mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE, + MMU_DATA_STORE, TAG_GRANULE, + MMU_DATA_STORE, 1, ra); + + /* Store if page(s) support tags. */ + if (mem1) { + store1(TAG_GRANULE, mem1, tag); + } + if (mem2) { + store1(0, mem2, tag); + } + } else { + /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */ + mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + 2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra); + if (mem1) { + tag |= tag << 4; + atomic_set(mem1, tag); + } + } +} + +void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_st2g(env, ptr, xt, GETPC(), store_tag1); +} + +void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel); +} + +void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) +{ + uc_engine *uc = env->uc; + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + int in_page = -(ptr | TARGET_PAGE_MASK); + + check_tag_aligned(env, ptr, ra); + + if (likely(in_page >= 2 * TAG_GRANULE)) { + probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra); + } else { + probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); + probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); + } +} + +#define LDGM_STGM_SIZE (4 << GMID_EL1_BS) + +uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* The tag is squashed to zero if the page does not support tags. */ + if (!tag_mem) { + return 0; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are loading 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + return ldq_le_p(tag_mem); +} + +void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* + * Tag store only happens if the page support tags, + * and if the OS has enabled access to the tags. + */ + if (!tag_mem) { + return; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are storing 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + stq_le_p(tag_mem, val); +} + +void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + uintptr_t ra = GETPC(); + int mmu_idx = cpu_mmu_index(env, false); + int log2_dcz_bytes, log2_tag_bytes; + intptr_t dcz_bytes, tag_bytes; + uint8_t *mem; + + /* + * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, + * to make sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + ptr &= -dcz_bytes; + + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, + MMU_DATA_STORE, tag_bytes, ra); + if (mem) { + int tag_pair = (val & 0xf) * 0x11; + memset(mem, tag_pair, tag_bytes); + } +} + +/* Record a tag check failure. */ +static void mte_check_fail(CPUARMState *env, int mmu_idx, + uint64_t dirty_ptr, uintptr_t ra) +{ + ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); + int el, reg_el, tcf, select; + uint64_t sctlr; + + reg_el = regime_el(env, arm_mmu_idx); + sctlr = env->cp15.sctlr_el[reg_el]; + + switch (arm_mmu_idx) { + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E20_0: + el = 0; + tcf = extract64(sctlr, 38, 2); + break; + default: + el = reg_el; + tcf = extract64(sctlr, 40, 2); + } + + switch (tcf) { + case 1: + /* + * Tag check fail causes a synchronous exception. + * + * In restore_state_to_opc, we set the exception syndrome + * for the load or store operation. Unwind first so we + * may overwrite that with the syndrome for the tag check. + */ + cpu_restore_state(env_cpu(env), ra, true); + env->exception.vaddress = dirty_ptr; + raise_exception(env, EXCP_DATA_ABORT, + syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11), + exception_target_el(env)); + /* noreturn, but fall through to the assert anyway */ + + case 0: + /* + * Tag check fail does not affect the PE. + * We eliminate this case by not setting MTE_ACTIVE + * in tb_flags, so that we never make this runtime call. + */ + g_assert_not_reached(); + + case 2: + /* Tag check fail causes asynchronous flag set. */ + mmu_idx = arm_mmu_idx_el(env, el); + if (regime_has_2_ranges(mmu_idx)) { + select = extract64(dirty_ptr, 55, 1); + } else { + select = 0; + } + env->cp15.tfsr_el[el] |= 1 << select; + break; + + default: + /* Case 3: Reserved. */ + qemu_log_mask(LOG_GUEST_ERROR, + "Tag check failure with SCTLR_EL%d.TCF%s " + "set to reserved value %d\n", + reg_el, el ? "" : "0", tcf); + break; + } +} + +/* + * Perform an MTE checked access for a single logical or atomic access. + */ +static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr, + uintptr_t ra, int bit55) +{ + int mem_tag, mmu_idx, ptr_tag, size; + MMUAccessType type; + uint8_t *mem; + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + return true; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + size = FIELD_EX32(desc, MTEDESC, ESIZE); + + mem = allocation_tag_mem(env, mmu_idx, ptr, type, size, + MMU_DATA_LOAD, 1, ra); + if (!mem) { + return true; + } + + mem_tag = load_tag1(ptr, mem); + return ptr_tag == mem_tag; +} + +/* + * No-fault version of mte_check1, to be used by SVE for MemSingleNF. + * Returns false if the access is Checked and the check failed. This + * is only intended to probe the tag -- the validity of the page must + * be checked beforehand. + */ +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + int bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked. */ + if (unlikely(!tbi_check(desc, bit55))) { + return true; + } + + return mte_probe1_int(env, desc, ptr, 0, bit55); +} + +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + int bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { + int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + mte_check_fail(env, mmu_idx, ptr, ra); + } + + return useronly_clean_ptr(ptr); +} + +uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return mte_check1(env, desc, ptr, GETPC()); +} + +/* + * Perform an MTE checked access for multiple logical accesses. + */ + +/** + * checkN: + * @tag: tag memory to test + * @odd: true to begin testing at tags at odd nibble + * @cmp: the tag to compare against + * @count: number of tags to test + * + * Return the number of successful tests. + * Thus a return value < @count indicates a failure. + * + * A note about sizes: count is expected to be small. + * + * The most common use will be LDP/STP of two integer registers, + * which means 16 bytes of memory touching at most 2 tags, but + * often the access is aligned and thus just 1 tag. + * + * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory, + * touching at most 5 tags. SVE LDR/STR (vector) with the default + * vector length is also 64 bytes; the maximum architectural length + * is 256 bytes touching at most 9 tags. + * + * The loop below uses 7 logical operations and 1 memory operation + * per tag pair. An implementation that loads an aligned word and + * uses masking to ignore adjacent tags requires 18 logical operations + * and thus does not begin to pay off until 6 tags. + * Which, according to the survey above, is unlikely to be common. + */ +static int checkN(uint8_t *mem, int odd, int cmp, int count) +{ + int n = 0, diff; + + /* Replicate the test tag and compare. */ + cmp *= 0x11; + diff = *mem++ ^ cmp; + + if (odd) { + goto start_odd; + } + + while (1) { + /* Test even tag. */ + if (unlikely((diff) & 0x0f)) { + break; + } + if (++n == count) { + break; + } + + start_odd: + /* Test odd tag. */ + if (unlikely((diff) & 0xf0)) { + break; + } + if (++n == count) { + break; + } + + diff = *mem++ ^ cmp; + } + return n; +} + +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + uc_engine *uc = env->uc; + int mmu_idx, ptr_tag, bit55; + uint64_t ptr_last, ptr_end, prev_page, next_page; + uint64_t tag_first, tag_end; + uint64_t tag_byte_first, tag_byte_end; + uint32_t esize, total, tag_count, tag_size, n, c; + uint8_t *mem1, *mem2; + MMUAccessType type; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + esize = FIELD_EX32(desc, MTEDESC, ESIZE); + total = FIELD_EX32(desc, MTEDESC, TSIZE); + + /* Find the addr of the end of the access, and of the last element. */ + ptr_end = ptr + total; + ptr_last = ptr_end - esize; + + /* Round the bounds to the tag granule, and compute the number of tags. */ + tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); + tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE); + tag_count = (tag_end - tag_first) / TAG_GRANULE; + + /* Round the bounds to twice the tag granule, and compute the bytes. */ + tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); + tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE); + + /* Locate the page boundaries. */ + prev_page = ptr & TARGET_PAGE_MASK; + next_page = prev_page + TARGET_PAGE_SIZE; + + if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) { + /* Memory access stays on one page. */ + tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE); + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, + MMU_DATA_LOAD, tag_size, ra); + if (!mem1) { + goto done; + } + /* Perform all of the comparisons. */ + n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); + } else { + /* Memory access crosses to next page. */ + tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE); + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, + MMU_DATA_LOAD, tag_size, ra); + + tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE); + mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, + ptr_end - next_page, + MMU_DATA_LOAD, tag_size, ra); + + /* + * Perform all of the comparisons. + * Note the possible but unlikely case of the operation spanning + * two pages that do not both have tagging enabled. + */ + n = c = (next_page - tag_first) / TAG_GRANULE; + if (mem1) { + n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c); + } + if (n == c) { + if (!mem2) { + goto done; + } + n += checkN(mem2, 0, ptr_tag, tag_count - c); + } + } + + /* + * If we failed, we know which granule. Compute the element that + * is first in that granule, and signal failure on that element. + */ + if (unlikely(n < tag_count)) { + uint64_t fail_ofs; + + fail_ofs = tag_first + n * TAG_GRANULE - ptr; + fail_ofs = ROUND_UP(fail_ofs, esize); + mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra); + } + + done: + return useronly_clean_ptr(ptr); +} + +uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return mte_checkN(env, desc, ptr, GETPC()); +} + +/* + * Perform an MTE checked access for DC_ZVA. + */ +uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + uintptr_t ra = GETPC(); + int log2_dcz_bytes, log2_tag_bytes; + int mmu_idx, bit55; + intptr_t dcz_bytes, tag_bytes, i; + void *mem; + uint64_t ptr_tag, mem_tag, align_ptr; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + /* + * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make + * sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + align_ptr = ptr & -dcz_bytes; + + /* + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. + */ + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + (void) probe_write(env, ptr, 1, mmu_idx, ra); + mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, + dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); + if (!mem) { + goto done; + } + + /* + * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus + * it is quite easy to perform all of the comparisons at once without + * any extra masking. + * + * The most common zva block size is 64; some of the thunderx cpus use + * a block size of 128. For user-only, aarch64_max_initfn will set the + * block size to 512. Fill out the other cases for future-proofing. + * + * In order to be able to find the first miscompare later, we want the + * tag bytes to be in little-endian order. + */ + switch (log2_tag_bytes) { + case 0: /* zva_blocksize 32 */ + mem_tag = *(uint8_t *)mem; + ptr_tag *= 0x11u; + break; + case 1: /* zva_blocksize 64 */ + mem_tag = cpu_to_le16(*(uint16_t *)mem); + ptr_tag *= 0x1111u; + break; + case 2: /* zva_blocksize 128 */ + mem_tag = cpu_to_le32(*(uint32_t *)mem); + ptr_tag *= 0x11111111u; + break; + case 3: /* zva_blocksize 256 */ + mem_tag = cpu_to_le64(*(uint64_t *)mem); + ptr_tag *= 0x1111111111111111ull; + break; + + default: /* zva_blocksize 512, 1024, 2048 */ + ptr_tag *= 0x1111111111111111ull; + i = 0; + do { + mem_tag = cpu_to_le64(*(uint64_t *)((char*)mem + i)); + if (unlikely(mem_tag != ptr_tag)) { + goto fail; + } + i += 8; + align_ptr += 16 * TAG_GRANULE; + } while (i < tag_bytes); + goto done; + } + + if (likely(mem_tag == ptr_tag)) { + goto done; + } + + fail: + /* Locate the first nibble that differs. */ + i = ctz64(mem_tag ^ ptr_tag) >> 4; + mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); + + done: + return useronly_clean_ptr(ptr); +} diff --git a/qemu/target/arm/neon_helper.c b/qemu/target/arm/neon_helper.c index 0c2828e6f3..7a9568a4e6 100644 --- a/qemu/target/arm/neon_helper.c +++ b/qemu/target/arm/neon_helper.c @@ -562,24 +562,6 @@ uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) return dest; } -#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 -NEON_VOP(cgt_s8, neon_s8, 4) -NEON_VOP(cgt_u8, neon_u8, 4) -NEON_VOP(cgt_s16, neon_s16, 2) -NEON_VOP(cgt_u16, neon_u16, 2) -NEON_VOP(cgt_s32, neon_s32, 1) -NEON_VOP(cgt_u32, neon_u32, 1) -#undef NEON_FN - -#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 -NEON_VOP(cge_s8, neon_s8, 4) -NEON_VOP(cge_u8, neon_u8, 4) -NEON_VOP(cge_s16, neon_s16, 2) -NEON_VOP(cge_u16, neon_u16, 2) -NEON_VOP(cge_s32, neon_s32, 1) -NEON_VOP(cge_u32, neon_u32, 1) -#undef NEON_FN - #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 NEON_POP(pmin_s8, neon_s8, 4) NEON_POP(pmin_u8, neon_u8, 4) @@ -594,16 +576,6 @@ NEON_POP(pmax_s16, neon_s16, 2) NEON_POP(pmax_u16, neon_u16, 2) #undef NEON_FN -#define NEON_FN(dest, src1, src2) \ - dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) -NEON_VOP(abd_s8, neon_s8, 4) -NEON_VOP(abd_u8, neon_u8, 4) -NEON_VOP(abd_s16, neon_s16, 2) -NEON_VOP(abd_u16, neon_u16, 2) -NEON_VOP(abd_s32, neon_s32, 1) -NEON_VOP(abd_u32, neon_u32, 1) -#undef NEON_FN - #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -1135,12 +1107,6 @@ NEON_VOP(tst_u16, neon_u16, 2) NEON_VOP(tst_u32, neon_u32, 1) #undef NEON_FN -#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 -NEON_VOP(ceq_u8, neon_u8, 4) -NEON_VOP(ceq_u16, neon_u16, 2) -NEON_VOP(ceq_u32, neon_u32, 1) -#undef NEON_FN - /* Count Leading Sign/Zero Bits. */ static inline int do_clz8(uint8_t x) { @@ -1889,13 +1855,6 @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x) } /* NEON Float helpers. */ -uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - float32 f0 = make_float32(a); - float32 f1 = make_float32(b); - return float32_val(float32_abs(float32_sub(f0, f1, fpst))); -} /* Floating point comparisons produce an integer result. * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do. diff --git a/qemu/target/arm/op_helper.c b/qemu/target/arm/op_helper.c index a9cbc79287..8844d13eae 100644 --- a/qemu/target/arm/op_helper.c +++ b/qemu/target/arm/op_helper.c @@ -933,6 +933,23 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) } } +void HELPER(probe_access)(CPUARMState *env, target_ulong ptr, + uint32_t access_type, uint32_t mmu_idx, + uint32_t size) +{ + uc_engine *uc = env->uc; + uint32_t in_page = -((uint32_t)ptr | TARGET_PAGE_SIZE); + uintptr_t ra = GETPC(); + + if (likely(size <= in_page)) { + probe_access(env, ptr, size, access_type, mmu_idx, ra); + } else { + probe_access(env, ptr, in_page, access_type, mmu_idx, ra); + probe_access(env, ptr + in_page, size - in_page, + access_type, mmu_idx, ra); + } +} + uint32_t HELPER(uc_hooksys64)(CPUARMState *env, uint32_t insn, void *hk) { uc_arm64_reg uc_rt; diff --git a/qemu/target/arm/pauth_helper.c b/qemu/target/arm/pauth_helper.c index b909630317..6dbab03768 100644 --- a/qemu/target/arm/pauth_helper.c +++ b/qemu/target/arm/pauth_helper.c @@ -300,7 +300,11 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, */ test = sextract64(ptr, bot_bit, top_bit - bot_bit); if (test != 0 && test != -1) { - pac ^= MAKE_64BIT_MASK(top_bit - 1, 1); + /* + * Note that our top_bit is one greater than the pseudocode's + * version, hence "- 2" here. + */ + pac ^= MAKE_64BIT_MASK(top_bit - 2, 1); } /* diff --git a/qemu/target/arm/sve_helper.c b/qemu/target/arm/sve_helper.c index 2abbeba57b..c575b8f7db 100644 --- a/qemu/target/arm/sve_helper.c +++ b/qemu/target/arm/sve_helper.c @@ -27,21 +27,20 @@ #include "fpu/softfloat.h" #include "tcg/tcg.h" - /* Note that vector data is stored in host-endian 64-bit chunks, so addressing units smaller than that needs a host-endian fixup. */ #ifdef HOST_WORDS_BIGENDIAN -#define H1(x) ((x) ^ 7) +#define H1(x) ((x) ^ 7) #define H1_2(x) ((x) ^ 6) #define H1_4(x) ((x) ^ 4) -#define H2(x) ((x) ^ 3) -#define H4(x) ((x) ^ 1) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) #else -#define H1(x) (x) +#define H1(x) (x) #define H1_2(x) (x) #define H1_4(x) (x) -#define H2(x) (x) -#define H4(x) (x) +#define H2(x) (x) +#define H4(x) (x) #endif /* Return a value for NZCV as per the ARM PredTest pseudofunction. @@ -52,7 +51,7 @@ */ /* For no G bits set, NZCV = C. */ -#define PREDTEST_INIT 1 +#define PREDTEST_INIT 1 /* This is an iterative function, called for each Pd and Pg word * moving forward. @@ -290,25 +289,25 @@ static inline uint64_t wswap64(uint64_t h) return rol64(h, 32); } -#define LOGICAL_PPPP(NAME, FUNC) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - uintptr_t opr_sz = simd_oprsz(desc); \ - uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \ - uintptr_t i; \ - for (i = 0; i < opr_sz / 8; ++i) { \ - d[i] = FUNC(n[i], m[i], g[i]); \ - } \ -} +#define LOGICAL_PPPP(NAME, FUNC) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ + { \ + uintptr_t opr_sz = simd_oprsz(desc); \ + uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \ + uintptr_t i; \ + for (i = 0; i < opr_sz / 8; ++i) { \ + d[i] = FUNC(n[i], m[i], g[i]); \ + } \ + } -#define DO_AND(N, M, G) (((N) & (M)) & (G)) -#define DO_BIC(N, M, G) (((N) & ~(M)) & (G)) -#define DO_EOR(N, M, G) (((N) ^ (M)) & (G)) -#define DO_ORR(N, M, G) (((N) | (M)) & (G)) -#define DO_ORN(N, M, G) (((N) | ~(M)) & (G)) -#define DO_NOR(N, M, G) (~((N) | (M)) & (G)) +#define DO_AND(N, M, G) (((N) & (M)) & (G)) +#define DO_BIC(N, M, G) (((N) & ~(M)) & (G)) +#define DO_EOR(N, M, G) (((N) ^ (M)) & (G)) +#define DO_ORR(N, M, G) (((N) | (M)) & (G)) +#define DO_ORN(N, M, G) (((N) | ~(M)) & (G)) +#define DO_NOR(N, M, G) (~((N) | (M)) & (G)) #define DO_NAND(N, M, G) (~((N) & (M)) & (G)) -#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G))) +#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G))) LOGICAL_PPPP(sve_and_pppp, DO_AND) LOGICAL_PPPP(sve_bic_pppp, DO_BIC) @@ -337,49 +336,48 @@ LOGICAL_PPPP(sve_nand_pppp, DO_NAND) * extra care wrt byte/word ordering we could use gcc generic vectors * and do 16 bytes at a time. */ -#define DO_ZPZZ(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ -} +#define DO_ZPZZ(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + } /* Similarly, specialized for 64-bit operands. */ -#define DO_ZPZZ_D(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPE *d = vd, *n = vn, *m = vm; \ - uint8_t *pg = vg; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPE nn = n[i], mm = m[i]; \ - d[i] = OP(nn, mm); \ - } \ - } \ -} - -#define DO_AND(N, M) (N & M) -#define DO_EOR(N, M) (N ^ M) -#define DO_ORR(N, M) (N | M) -#define DO_BIC(N, M) (N & ~M) -#define DO_ADD(N, M) (N + M) -#define DO_SUB(N, M) (N - M) -#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) -#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) -#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) -#define DO_MUL(N, M) (N * M) +#define DO_ZPZZ_D(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i], mm = m[i]; \ + d[i] = OP(nn, mm); \ + } \ + } \ + } +#define DO_AND(N, M) (N & M) +#define DO_EOR(N, M) (N ^ M) +#define DO_ORR(N, M) (N | M) +#define DO_BIC(N, M) (N & ~M) +#define DO_ADD(N, M) (N + M) +#define DO_SUB(N, M) (N - M) +#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) +#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) +#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) +#define DO_MUL(N, M) (N * M) /* * We must avoid the C undefined behaviour cases: division by @@ -431,20 +429,20 @@ DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX) DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX) DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX) -DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN) -DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN) -DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN) -DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN) +DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN) DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN) DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN) DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN) DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN) -DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD) -DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD) -DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD) -DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD) +DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD) DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD) DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD) @@ -505,9 +503,9 @@ DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_UDIV) /* Note that all bits of the shift are significant and not modulo the element size. */ -#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1)) -#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0) -#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0) +#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1)) +#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0) +#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0) DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR) DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR) @@ -532,22 +530,22 @@ DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL) * third operand is "wide". That is, for D = N op M, the same 64-bit * value of M is used with all of the narrower values of N. */ -#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3)); \ - TYPEW mm = *(TYPEW *)((char *)vm + i); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 7); \ - } \ -} +#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3)); \ + TYPEW mm = *(TYPEW *)((char *)vm + i); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 7); \ + } \ + } DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR) DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR) @@ -565,47 +563,47 @@ DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL) /* Fully general two-operand expander, controlled by a predicate. */ -#define DO_ZPZ(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ -} +#define DO_ZPZ(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + } /* Similarly, specialized for 64-bit operands. */ -#define DO_ZPZ_D(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPE *d = vd, *n = vn; \ - uint8_t *pg = vg; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPE nn = n[i]; \ - d[i] = OP(nn); \ - } \ - } \ -} - -#define DO_CLS_B(N) (clrsb32(N) - 24) -#define DO_CLS_H(N) (clrsb32(N) - 16) +#define DO_ZPZ_D(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i]; \ + d[i] = OP(nn); \ + } \ + } \ + } + +#define DO_CLS_B(N) (clrsb32(N) - 24) +#define DO_CLS_H(N) (clrsb32(N) - 16) DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B) DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H) DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32) DO_ZPZ_D(sve_cls_d, int64_t, clrsb64) -#define DO_CLZ_B(N) (clz32(N) - 24) -#define DO_CLZ_H(N) (clz32(N) - 16) +#define DO_CLZ_B(N) (clz32(N) - 24) +#define DO_CLZ_H(N) (clz32(N) - 16) DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B) DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H) @@ -617,7 +615,7 @@ DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16) DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32) DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64) -#define DO_CNOT(N) (N == 0) +#define DO_CNOT(N) (N == 0) DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT) DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT) @@ -625,15 +623,15 @@ DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT) DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT) #ifdef _MSC_VER -#define DO_FABS16(N) (N & ((uint16_t)-1 >> 1)) -#define DO_FABS32(N) (N & ((uint32_t)-1 >> 1)) -#define DO_FABS64(N) (N & ((uint64_t)-1 >> 1)) +#define DO_FABS16(N) (N & ((uint16_t)-1 >> 1)) +#define DO_FABS32(N) (N & ((uint32_t)-1 >> 1)) +#define DO_FABS64(N) (N & ((uint64_t)-1 >> 1)) DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS16) DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS32) DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS64) #else -#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1)) +#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1)) DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) @@ -641,34 +639,34 @@ DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) #endif #ifdef _MSC_VER -#define DO_FNEG16(N) (N ^ ~((uint16_t)-1 >> 1)) -#define DO_FNEG32(N) (N ^ ~((uint32_t)-1 >> 1)) -#define DO_FNEG64(N) (N ^ ~((uint64_t)-1 >> 1)) +#define DO_FNEG16(N) (N ^ ~((uint16_t)-1 >> 1)) +#define DO_FNEG32(N) (N ^ ~((uint32_t)-1 >> 1)) +#define DO_FNEG64(N) (N ^ ~((uint64_t)-1 >> 1)) DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG16) DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG32) DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG64) #else -#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) +#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) #endif -#define DO_NOT(N) (~N) +#define DO_NOT(N) (~N) DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT) DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT) DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT) -#define DO_SXTB(N) ((int8_t)N) -#define DO_SXTH(N) ((int16_t)N) -#define DO_SXTS(N) ((int32_t)N) -#define DO_UXTB(N) ((uint8_t)N) -#define DO_UXTH(N) ((uint16_t)N) -#define DO_UXTS(N) ((uint32_t)N) +#define DO_SXTB(N) ((int8_t)N) +#define DO_SXTH(N) ((int16_t)N) +#define DO_SXTS(N) ((int32_t)N) +#define DO_UXTB(N) ((uint8_t)N) +#define DO_UXTH(N) ((uint16_t)N) +#define DO_UXTS(N) ((uint32_t)N) DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB) DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB) @@ -685,9 +683,9 @@ DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH) DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS) #ifdef _MSC_VER -#define DO_ABS(N) (N < 0 ? (0 - N) : N) +#define DO_ABS(N) (N < 0 ? (0 - N) : N) #else -#define DO_ABS(N) (N < 0 ? -N : N) +#define DO_ABS(N) (N < 0 ? -N : N) #endif DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS) @@ -696,9 +694,9 @@ DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS) DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS) #ifdef _MSC_VER -#define DO_NEG(N) (0 - N) +#define DO_NEG(N) (0 - N) #else -#define DO_NEG(N) (-N) +#define DO_NEG(N) (-N) #endif DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG) @@ -722,19 +720,19 @@ DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64) /* Three-operand expander, unpredicated, in which the third operand is "wide". */ -#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - TYPEW mm = *(TYPEW *)((char *)vm + i); \ - do { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ - i += sizeof(TYPE); \ - } while (i & 7); \ - } \ -} +#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + TYPEW mm = *(TYPEW *)((char *)vm + i); \ + do { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + i += sizeof(TYPE); \ + } while (i & 7); \ + } \ + } DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR) DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR) @@ -771,39 +769,39 @@ DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL) /* ??? If we were to vectorize this by hand the reduction ordering * would change. For integer operands, this is perfectly fine. */ -#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \ -uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - TYPERED ret = INIT; \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEELT nn = *(TYPEELT *)((char *)vn + H(i)); \ - ret = OP(ret, nn); \ - } \ - i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \ - } while (i & 15); \ - } \ - return (TYPERET)ret; \ -} - -#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \ -uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPEE *n = vn; \ - uint8_t *pg = vg; \ - TYPER ret = INIT; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPEE nn = n[i]; \ - ret = OP(ret, nn); \ - } \ - } \ - return ret; \ -} +#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \ + uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPERED ret = INIT; \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPEELT nn = *(TYPEELT *)((char *)vn + H(i)); \ + ret = OP(ret, nn); \ + } \ + i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \ + } while (i & 15); \ + } \ + return (TYPERET)ret; \ + } + +#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \ + uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPEE *n = vn; \ + uint8_t *pg = vg; \ + TYPER ret = INIT; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPEE nn = n[i]; \ + ret = OP(ret, nn); \ + } \ + } \ + return ret; \ + } DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR) DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR) @@ -853,17 +851,17 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN) #undef DO_VPZ_D /* Two vector operand, one scalar operand, unpredicated. */ -#define DO_ZZI(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \ - TYPE s = s64, *d = vd, *n = vn; \ - for (i = 0; i < opr_sz; ++i) { \ - d[i] = OP(n[i], s); \ - } \ -} +#define DO_ZZI(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \ + TYPE s = s64, *d = vd, *n = vn; \ + for (i = 0; i < opr_sz; ++i) { \ + d[i] = OP(n[i], s); \ + } \ + } -#define DO_SUBR(X, Y) (Y - X) +#define DO_SUBR(X, Y) (Y - X) DO_ZZI(sve_subri_b, uint8_t, DO_SUBR) DO_ZZI(sve_subri_h, uint16_t, DO_SUBR) @@ -1094,49 +1092,49 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc) /* Three-operand expander, immediate operand, controlled by a predicate. */ -#define DO_ZPZI(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - TYPE imm = simd_data(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, imm); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ -} +#define DO_ZPZI(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPE imm = simd_data(desc); \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, imm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + } /* Similarly, specialized for 64-bit operands. */ -#define DO_ZPZI_D(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPE *d = vd, *n = vn; \ - TYPE imm = simd_data(desc); \ - uint8_t *pg = vg; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPE nn = n[i]; \ - d[i] = OP(nn, imm); \ - } \ - } \ -} - -#define DO_SHR(N, M) (N >> M) -#define DO_SHL(N, M) (N << M) +#define DO_ZPZI_D(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn; \ + TYPE imm = simd_data(desc); \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i]; \ + d[i] = OP(nn, imm); \ + } \ + } \ + } + +#define DO_SHR(N, M) (N >> M) +#define DO_SHL(N, M) (N << M) /* Arithmetic shift right for division. This rounds negative numbers toward zero as per signed division. Therefore before shifting, when N is negative, add 2**M-1. */ #ifdef _MSC_VER - #define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M) +#define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M) #else - #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M) +#define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M) #endif DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR) @@ -1167,43 +1165,43 @@ DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD) /* Fully general four-operand expander, controlled by a predicate. */ -#define DO_ZPZZZ(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \ - void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - TYPE aa = *(TYPE *)((char *)va + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ -} +#define DO_ZPZZZ(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, void *vg, \ + uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + TYPE aa = *(TYPE *)((char *)va + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + } /* Similarly, specialized for 64-bit operands. */ -#define DO_ZPZZZ_D(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \ - void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPE *d = vd, *a = va, *n = vn, *m = vm; \ - uint8_t *pg = vg; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPE aa = a[i], nn = n[i], mm = m[i]; \ - d[i] = OP(aa, nn, mm); \ - } \ - } \ -} - -#define DO_MLA(A, N, M) (A + N * M) -#define DO_MLS(A, N, M) (A - N * M) +#define DO_ZPZZZ_D(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, void *vg, \ + uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *a = va, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE aa = a[i], nn = n[i], mm = m[i]; \ + d[i] = OP(aa, nn, mm); \ + } \ + } \ + } + +#define DO_MLA(A, N, M) (A + N * M) +#define DO_MLS(A, N, M) (A - N * M) DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA) DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS) @@ -1222,8 +1220,7 @@ DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS) #undef DO_ZPZZZ #undef DO_ZPZZZ_D -void HELPER(sve_index_b)(void *vd, uint32_t start, - uint32_t incr, uint32_t desc) +void HELPER(sve_index_b)(void *vd, uint32_t start, uint32_t incr, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc); uint8_t *d = vd; @@ -1232,8 +1229,7 @@ void HELPER(sve_index_b)(void *vd, uint32_t start, } } -void HELPER(sve_index_h)(void *vd, uint32_t start, - uint32_t incr, uint32_t desc) +void HELPER(sve_index_h)(void *vd, uint32_t start, uint32_t incr, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 2; uint16_t *d = vd; @@ -1242,8 +1238,7 @@ void HELPER(sve_index_h)(void *vd, uint32_t start, } } -void HELPER(sve_index_s)(void *vd, uint32_t start, - uint32_t incr, uint32_t desc) +void HELPER(sve_index_s)(void *vd, uint32_t start, uint32_t incr, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 4; uint32_t *d = vd; @@ -1252,8 +1247,7 @@ void HELPER(sve_index_s)(void *vd, uint32_t start, } } -void HELPER(sve_index_d)(void *vd, uint64_t start, - uint64_t incr, uint32_t desc) +void HELPER(sve_index_d)(void *vd, uint64_t start, uint64_t incr, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd; @@ -1326,22 +1320,16 @@ void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc) { /* These constants are cut-and-paste directly from the ARM pseudocode. */ static const uint32_t coeff[] = { - 0x000000, 0x0164d2, 0x02cd87, 0x043a29, - 0x05aac3, 0x071f62, 0x08980f, 0x0a14d5, - 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, - 0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d, - 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, - 0x1ef532, 0x20b051, 0x227043, 0x243516, - 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, - 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, - 0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b, - 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, - 0x45672a, 0x478d75, 0x49b9be, 0x4bec15, - 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, - 0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5, - 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, - 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, - 0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c, + 0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f, + 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b, + 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532, + 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, + 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf, + 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75, + 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd, + 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, + 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3, + 0x7d3e0c, }; intptr_t i, opr_sz = simd_oprsz(desc) / 4; uint32_t *d = vd, *n = vn; @@ -1573,8 +1561,8 @@ void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc) /* Two operand predicated copy immediate with merge. All valid immediates * can fit within 17 signed bits in the simd_data field. */ -void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, - uint64_t mm, uint32_t desc) +void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, uint64_t mm, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; @@ -1588,8 +1576,8 @@ void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, } } -void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, - uint64_t mm, uint32_t desc) +void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, uint64_t mm, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; @@ -1603,8 +1591,8 @@ void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, } } -void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, - uint64_t mm, uint32_t desc) +void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, uint64_t mm, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; @@ -1618,8 +1606,8 @@ void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, } } -void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg, - uint64_t mm, uint32_t desc) +void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg, uint64_t mm, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; @@ -1678,7 +1666,7 @@ void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc) } } -/* Big-endian hosts need to frob the byte indicies. If the copy +/* Big-endian hosts need to frob the byte indices. If the copy * happens to be 8-byte aligned, then no frobbing necessary. */ static void swap_memmove(void *vd, void *vs, size_t n) @@ -1702,7 +1690,7 @@ static void swap_memmove(void *vd, void *vs, size_t n) *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i); } } else { - for (i = n; i > 0; ) { + for (i = n; i > 0;) { i -= 4; *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i); } @@ -1716,7 +1704,7 @@ static void swap_memmove(void *vd, void *vs, size_t n) *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i); } } else { - for (i = n; i > 0; ) { + for (i = n; i > 0;) { i -= 2; *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i); } @@ -1729,7 +1717,7 @@ static void swap_memmove(void *vd, void *vs, size_t n) *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i); } } else { - for (i = n; i > 0; ) { + for (i = n; i > 0;) { i -= 1; *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i); } @@ -1800,13 +1788,13 @@ void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc) } } -#define DO_INSR(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \ -{ \ - intptr_t opr_sz = simd_oprsz(desc); \ - swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \ - *(TYPE *)((char *)vd + H(0)) = val; \ -} +#define DO_INSR(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \ + { \ + intptr_t opr_sz = simd_oprsz(desc); \ + swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \ + *(TYPE *)((char *)vd + H(0)) = val; \ + } DO_INSR(sve_insr_b, uint8_t, H1) DO_INSR(sve_insr_h, uint16_t, H1_2) @@ -1859,21 +1847,21 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc) } } -#define DO_TBL(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - uintptr_t elem = opr_sz / sizeof(TYPE); \ - TYPE *d = vd, *n = vn, *m = vm; \ - ARMVectorReg tmp; \ - if (unlikely(vd == vn)) { \ - n = memcpy(&tmp, vn, opr_sz); \ - } \ - for (i = 0; i < elem; i++) { \ - TYPE j = m[H(i)]; \ - d[H(i)] = j < elem ? n[H(j)] : 0; \ - } \ -} +#define DO_TBL(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + uintptr_t elem = opr_sz / sizeof(TYPE); \ + TYPE *d = vd, *n = vn, *m = vm; \ + ARMVectorReg tmp; \ + if (unlikely(vd == vn)) { \ + n = memcpy(&tmp, vn, opr_sz); \ + } \ + for (i = 0; i < elem; i++) { \ + TYPE j = m[H(i)]; \ + d[H(i)] = j < elem ? n[H(j)] : 0; \ + } \ + } DO_TBL(sve_tbl_b, uint8_t, H1) DO_TBL(sve_tbl_h, uint16_t, H2) @@ -1882,20 +1870,20 @@ DO_TBL(sve_tbl_d, uint64_t, ) #undef TBL -#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \ -void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - TYPED *d = vd; \ - TYPES *n = vn; \ - ARMVectorReg tmp; \ - if (unlikely((char *)vn - (char *)vd < opr_sz)) { \ - n = memcpy(&tmp, n, opr_sz / 2); \ - } \ - for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \ - d[HD(i)] = n[HS(i)]; \ - } \ -} +#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \ + void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPED *d = vd; \ + TYPES *n = vn; \ + ARMVectorReg tmp; \ + if (unlikely((char *)vn - (char *)vd < opr_sz)) { \ + n = memcpy(&tmp, n, opr_sz / 2); \ + } \ + for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \ + d[HD(i)] = n[HS(i)]; \ + } \ + } DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1) DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2) @@ -1912,11 +1900,8 @@ DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4) * same pattern out to 16-bit units. */ static const uint64_t even_bit_esz_masks[5] = { - 0x5555555555555555ull, - 0x3333333333333333ull, - 0x0f0f0f0f0f0f0f0full, - 0x00ff00ff00ff00ffull, - 0x0000ffff0000ffffull, + 0x5555555555555555ull, 0x3333333333333333ull, 0x0f0f0f0f0f0f0f0full, + 0x00ff00ff00ff00ffull, 0x0000ffff0000ffffull, }; /* Zero-extend units of 2**N bits to units of 2**(N+1) bits. @@ -2112,7 +2097,7 @@ static uint64_t reverse_bits_64(uint64_t x, int n) static uint8_t reverse_bits_8(uint8_t x, int n) { - static const uint8_t mask[3] = { 0x55, 0x33, 0x0f }; + static const uint8_t mask[3] = {0x55, 0x33, 0x0f}; int i, sh; for (i = 2, sh = 4; i >= n; i--, sh >>= 1) { @@ -2197,68 +2182,72 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc) } } -#define DO_ZIP(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - intptr_t i, oprsz_2 = oprsz / 2; \ - ARMVectorReg tmp_n, tmp_m; \ - /* We produce output faster than we consume input. \ - Therefore we must be mindful of possible overlap. */ \ - if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) { \ - vn = memcpy(&tmp_n, vn, oprsz_2); \ - } \ - if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ - vm = memcpy(&tmp_m, vm, oprsz_2); \ - } \ - for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ - *(TYPE *)((char *)vd + H(2 * i + 0)) = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)((char *)vm + H(i)); \ - } \ -} +#define DO_ZIP(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t i, oprsz_2 = oprsz / 2; \ + ARMVectorReg tmp_n, tmp_m; \ + /* We produce output faster than we consume input. \ + Therefore we must be mindful of possible overlap. */ \ + if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) { \ + vn = memcpy(&tmp_n, vn, oprsz_2); \ + } \ + if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ + vm = memcpy(&tmp_m, vm, oprsz_2); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(2 * i + 0)) = \ + *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) = \ + *(TYPE *)((char *)vm + H(i)); \ + } \ + } DO_ZIP(sve_zip_b, uint8_t, H1) DO_ZIP(sve_zip_h, uint16_t, H1_2) DO_ZIP(sve_zip_s, uint32_t, H1_4) DO_ZIP(sve_zip_d, uint64_t, ) -#define DO_UZP(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - intptr_t oprsz_2 = oprsz / 2; \ - intptr_t odd_ofs = simd_data(desc); \ - intptr_t i; \ - ARMVectorReg tmp_m; \ - if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ - vm = memcpy(&tmp_m, vm, oprsz); \ - } \ - for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ - *(TYPE *)((char *)vd + H(i)) = *(TYPE *)((char *)vn + H(2 * i + odd_ofs)); \ - } \ - for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ - *(TYPE *)((char *)vd + H(oprsz_2 + i)) = *(TYPE *)((char *)vm + H(2 * i + odd_ofs)); \ - } \ -} +#define DO_UZP(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t oprsz_2 = oprsz / 2; \ + intptr_t odd_ofs = simd_data(desc); \ + intptr_t i; \ + ARMVectorReg tmp_m; \ + if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ + vm = memcpy(&tmp_m, vm, oprsz); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(i)) = \ + *(TYPE *)((char *)vn + H(2 * i + odd_ofs)); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(oprsz_2 + i)) = \ + *(TYPE *)((char *)vm + H(2 * i + odd_ofs)); \ + } \ + } DO_UZP(sve_uzp_b, uint8_t, H1) DO_UZP(sve_uzp_h, uint16_t, H1_2) DO_UZP(sve_uzp_s, uint32_t, H1_4) DO_UZP(sve_uzp_d, uint64_t, ) -#define DO_TRN(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - intptr_t odd_ofs = simd_data(desc); \ - intptr_t i; \ - for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \ - TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs)); \ - TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs)); \ - *(TYPE *)((char *)vd + H(i + 0)) = ae; \ - *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be; \ - } \ -} +#define DO_TRN(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t odd_ofs = simd_data(desc); \ + intptr_t i; \ + for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \ + TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs)); \ + TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs)); \ + *(TYPE *)((char *)vd + H(i + 0)) = ae; \ + *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be; \ + } \ + } DO_TRN(sve_trn_b, uint8_t, H1) DO_TRN(sve_trn_h, uint16_t, H1_2) @@ -2352,8 +2341,8 @@ void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) swap_memmove((char *)vd + len, vm, opr_sz * 8 - len); } -void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, - void *vg, uint32_t desc) +void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, void *vg, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn, *m = vm; @@ -2366,8 +2355,8 @@ void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, } } -void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, - void *vg, uint32_t desc) +void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, void *vg, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn, *m = vm; @@ -2380,8 +2369,8 @@ void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, } } -void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, - void *vg, uint32_t desc) +void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, void *vg, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn, *m = vm; @@ -2394,8 +2383,8 @@ void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, } } -void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, - void *vg, uint32_t desc) +void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, void *vg, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn, *m = vm; @@ -2428,63 +2417,64 @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, * a scalar output, and also handles the byte-ordering of sub-uint64_t * scalar outputs, is tricky. */ -#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \ -uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t opr_sz = simd_oprsz(desc); \ - uint32_t flags = PREDTEST_INIT; \ - intptr_t i = opr_sz; \ - do { \ - uint64_t out = 0, pg; \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - out |= nn OP mm; \ - } while (i & 63); \ - pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ - out &= pg; \ - *(uint64_t *)((char *)vd + (i >> 3)) = out; \ - flags = iter_predtest_bwd(out, pg, flags); \ - } while (i > 0); \ - return flags; \ -} - -#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \ - DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull) -#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \ +#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \ + uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + uint32_t desc) \ + { \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + out |= nn OP mm; \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ + } + +#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \ DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull) -#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \ +#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \ DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) -#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \ - DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull) +#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull) -DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==) +DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==) DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==) DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==) DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==) -DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=) +DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=) DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=) DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=) DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=) -DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >) +DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >) DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >) DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >) DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >) -DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=) +DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=) DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=) DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=) DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=) -DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >) +DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >) DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >) DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >) DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >) -DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=) +DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=) DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=) DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=) DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=) @@ -2496,74 +2486,75 @@ DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=) #undef DO_CMP_PPZZ /* Similar, but the second source is "wide". */ -#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \ -uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t opr_sz = simd_oprsz(desc); \ - uint32_t flags = PREDTEST_INIT; \ - intptr_t i = opr_sz; \ - do { \ - uint64_t out = 0, pg; \ - do { \ - TYPEW mm = *(TYPEW *)((char *)vm + i - 8); \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - out |= nn OP mm; \ - } while (i & 7); \ - } while (i & 63); \ - pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ - out &= pg; \ - *(uint64_t *)((char *)vd + (i >> 3)) = out; \ - flags = iter_predtest_bwd(out, pg, flags); \ - } while (i > 0); \ - return flags; \ -} - -#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \ - DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull) -#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \ +#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \ + uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + uint32_t desc) \ + { \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + TYPEW mm = *(TYPEW *)((char *)vm + i - 8); \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= nn OP mm; \ + } while (i & 7); \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ + } + +#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \ + DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull) -#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \ +#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull) -DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==) +DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==) DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, int16_t, uint64_t, ==) DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, int32_t, uint64_t, ==) -DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=) +DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=) DO_CMP_PPZW_H(sve_cmpne_ppzw_h, int16_t, uint64_t, !=) DO_CMP_PPZW_S(sve_cmpne_ppzw_s, int32_t, uint64_t, !=) -DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >) -DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >) -DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >) +DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >) +DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >) +DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >) -DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=) -DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=) -DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=) +DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=) +DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=) +DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=) -DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >) +DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >) DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >) DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >) -DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=) +DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=) DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=) DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=) -DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <) -DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <) -DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <) +DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <) +DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <) +DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <) -DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=) -DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=) -DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=) +DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=) +DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=) +DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=) -DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <) +DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <) DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <) DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <) -DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=) +DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=) DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=) DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=) @@ -2573,83 +2564,83 @@ DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=) #undef DO_CMP_PPZW /* Similar, but the second source is immediate. */ -#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \ -uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t opr_sz = simd_oprsz(desc); \ - uint32_t flags = PREDTEST_INIT; \ - TYPE mm = simd_data(desc); \ - intptr_t i = opr_sz; \ - do { \ - uint64_t out = 0, pg; \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - out |= nn OP mm; \ - } while (i & 63); \ - pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ - out &= pg; \ - *(uint64_t *)((char *)vd + (i >> 3)) = out; \ - flags = iter_predtest_bwd(out, pg, flags); \ - } while (i > 0); \ - return flags; \ -} - -#define DO_CMP_PPZI_B(NAME, TYPE, OP) \ - DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull) -#define DO_CMP_PPZI_H(NAME, TYPE, OP) \ +#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \ + uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + TYPE mm = simd_data(desc); \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= nn OP mm; \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ + } + +#define DO_CMP_PPZI_B(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZI_H(NAME, TYPE, OP) \ DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull) -#define DO_CMP_PPZI_S(NAME, TYPE, OP) \ +#define DO_CMP_PPZI_S(NAME, TYPE, OP) \ DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) -#define DO_CMP_PPZI_D(NAME, TYPE, OP) \ - DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull) +#define DO_CMP_PPZI_D(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull) -DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==) +DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==) DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==) DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==) DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==) -DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=) +DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=) DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=) DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=) DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=) -DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >) +DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >) DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >) DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >) DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >) -DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=) +DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=) DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=) DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=) DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=) -DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >) +DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >) DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >) DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >) DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >) -DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=) +DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=) DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=) DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=) DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=) -DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <) +DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <) DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <) DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <) DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <) -DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=) +DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=) DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=) DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=) DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=) -DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <) +DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <) DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <) DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <) DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <) -DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=) +DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=) DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=) DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=) DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=) @@ -2678,8 +2669,8 @@ static bool last_active_pred(void *vd, void *vg, intptr_t oprsz) * (if after) or excluding (if !after) the first G & N. * Return true if BRK found. */ -static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, - bool brk, bool after) +static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, bool brk, + bool after) { uint64_t b; @@ -2690,16 +2681,16 @@ static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, b = g; } else { /* Break somewhere in N. Locate it. */ - b = g & n; /* guard true, pred true */ + b = g & n; /* guard true, pred true */ #ifdef _MSC_VER - b = b & (0 - b); /* first such */ + b = b & (0 - b); /* first such */ #else - b = b & -b; /* first such */ + b = b & -b; /* first such */ #endif if (after) { - b = b | (b - 1); /* break after same */ + b = b | (b - 1); /* break after same */ } else { - b = b - 1; /* break before same */ + b = b - 1; /* break before same */ } brk = true; } @@ -2709,8 +2700,8 @@ static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, } /* Compute a zeroing BRK. */ -static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g, - intptr_t oprsz, bool after) +static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g, intptr_t oprsz, + bool after) { bool brk = false; intptr_t i; @@ -2742,8 +2733,8 @@ static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g, } /* Compute a merging BRK. */ -static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g, - intptr_t oprsz, bool after) +static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g, intptr_t oprsz, + bool after) { bool brk = false; intptr_t i; @@ -2960,61 +2951,61 @@ uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc) * The recursion is bounded to depth 7 (128 fp16 elements), so there's * little to gain with a more complex non-recursive form. */ -#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \ -static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ -{ \ - if (n == 1) { \ - return *data; \ - } else { \ - uintptr_t half = n / 2; \ - TYPE lo = NAME##_reduce(data, status, half); \ - TYPE hi = NAME##_reduce(data + half, status, half); \ - return TYPE##_##FUNC(lo, hi, status); \ - } \ -} \ -uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ -{ \ - uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ - TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT); \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ - for (; i < maxsz; i += sizeof(TYPE)) { \ - *(TYPE *)((char *)data + i) = IDENT; \ - } \ - return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \ -} +#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \ + static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ + { \ + if (n == 1) { \ + return *data; \ + } else { \ + uintptr_t half = n / 2; \ + TYPE lo = NAME##_reduce(data, status, half); \ + TYPE hi = NAME##_reduce(data + half, status, half); \ + return TYPE##_##FUNC(lo, hi, status); \ + } \ + } \ + uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ + { \ + uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ + TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ + for (i = 0; i < oprsz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT); \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + for (; i < maxsz; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)data + i) = IDENT; \ + } \ + return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \ + } DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) -DO_REDUCE(sve_faddv_d, float64, , add, float64_zero) +DO_REDUCE(sve_faddv_d, float64, , add, float64_zero) /* Identity is floatN_default_nan, without the function call. */ DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) -DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL) +DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL) DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) -DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL) +DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL) DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) -DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity) +DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity) DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) -DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity)) +DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity)) #undef DO_REDUCE -uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) +uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, void *status, + uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc); float16 result = nn; @@ -3033,8 +3024,8 @@ uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, return result; } -uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) +uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, void *status, + uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc); float32 result = nn; @@ -3053,8 +3044,8 @@ uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, return result; } -uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) +uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, void *status, + uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8; uint64_t *m = vm; @@ -3072,56 +3063,56 @@ uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, /* Fully general three-operand expander, controlled by a predicate, * With the extra float_status parameter. */ -#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ - void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc); \ - uint64_t *g = vg; \ - do { \ - uint64_t pg = g[(i - 1) >> 6]; \ - do { \ - i -= sizeof(TYPE); \ - if (likely((pg >> (i & 63)) & 1)) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ - } \ - } while (i & 63); \ - } while (i != 0); \ -} +#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, void *status, \ + uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ + } DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) -DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add) +DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add) DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) -DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub) +DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub) DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) -DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul) +DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul) DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) -DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div) +DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div) DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) -DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min) +DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min) DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) -DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max) +DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max) DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) -DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum) +DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum) DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) -DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum) +DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum) static inline float16 abd_h(float16 a, float16 b, float_status *s) { @@ -3140,7 +3131,7 @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) -DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d) +DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d) static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) { @@ -3150,47 +3141,47 @@ static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn) DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn) -DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d) +DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d) DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh) DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs) -DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd) +DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd) #undef DO_ZPZZ_FP /* Three-operand expander, with one scalar operand, controlled by * a predicate, with the extra float_status parameter. */ -#define DO_ZPZS_FP(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \ - void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc); \ - uint64_t *g = vg; \ - TYPE mm = scalar; \ - do { \ - uint64_t pg = g[(i - 1) >> 6]; \ - do { \ - i -= sizeof(TYPE); \ - if (likely((pg >> (i & 63)) & 1)) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ - } \ - } while (i & 63); \ - } while (i != 0); \ -} +#define DO_ZPZS_FP(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \ + void *status, uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + TYPE mm = scalar; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ + } DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add) DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add) -DO_ZPZS_FP(sve_fadds_d, float64, , float64_add) +DO_ZPZS_FP(sve_fadds_d, float64, , float64_add) DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub) DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub) -DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub) +DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub) DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul) DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul) -DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul) +DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul) static inline float16 subr_h(float16 a, float16 b, float_status *s) { @@ -3209,43 +3200,44 @@ static inline float64 subr_d(float64 a, float64 b, float_status *s) DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h) DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s) -DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d) +DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d) DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum) DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum) -DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum) +DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum) DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum) DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum) -DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum) +DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum) DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max) DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max) -DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max) +DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max) DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) -DO_ZPZS_FP(sve_fmins_d, float64, , float64_min) +DO_ZPZS_FP(sve_fmins_d, float64, , float64_min) /* Fully general two-operand expander, controlled by a predicate, * With the extra float_status parameter. */ -#define DO_ZPZ_FP(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc); \ - uint64_t *g = vg; \ - do { \ - uint64_t pg = g[(i - 1) >> 6]; \ - do { \ - i -= sizeof(TYPE); \ - if (likely((pg >> (i & 63)) & 1)) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, status); \ - } \ - } while (i & 63); \ - } while (i != 0); \ -} +#define DO_ZPZ_FP(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, \ + uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ + } /* SVE fp16 conversions always use IEEE mode. Like AdvSIMD, they ignore * FZ16. When converting from fp16, this affects flushing input denormals; @@ -3253,7 +3245,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ */ static inline float32 sve_f16_to_f32(float16 f, float_status *fpst) { - flag save = get_flush_inputs_to_zero(fpst); + bool save = get_flush_inputs_to_zero(fpst); float32 ret; set_flush_inputs_to_zero(false, fpst); @@ -3264,7 +3256,7 @@ static inline float32 sve_f16_to_f32(float16 f, float_status *fpst) static inline float64 sve_f16_to_f64(float16 f, float_status *fpst) { - flag save = get_flush_inputs_to_zero(fpst); + bool save = get_flush_inputs_to_zero(fpst); float64 ret; set_flush_inputs_to_zero(false, fpst); @@ -3275,7 +3267,7 @@ static inline float64 sve_f16_to_f64(float16 f, float_status *fpst) static inline float16 sve_f32_to_f16(float32 f, float_status *fpst) { - flag save = get_flush_to_zero(fpst); + bool save = get_flush_to_zero(fpst); float16 ret; set_flush_to_zero(false, fpst); @@ -3286,7 +3278,7 @@ static inline float16 sve_f32_to_f16(float32 f, float_status *fpst) static inline float16 sve_f64_to_f16(float64 f, float_status *fpst) { - flag save = get_flush_to_zero(fpst); + bool save = get_flush_to_zero(fpst); float16 ret; set_flush_to_zero(false, fpst); @@ -3369,78 +3361,66 @@ static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s) DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16) DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32) -DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16) -DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64) -DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32) -DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64) +DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16) +DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64) +DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32) +DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64) DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz) DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh) DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs) -DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz) -DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz) -DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd) -DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd) +DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz) DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz) DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh) DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs) -DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz) -DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz) -DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd) -DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd) +DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz) DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth) DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints) -DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd) +DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd) DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int) DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int) -DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int) +DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int) DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16) DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32) -DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64) +DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64) DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt) DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt) -DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt) +DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt) DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16) DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16) DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32) -DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64) -DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16) -DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32) -DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64) +DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64) +DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16) +DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32) +DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64) DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16) DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16) DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32) -DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64) -DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16) -DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32) -DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) +DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64) +DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16) +DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32) +DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) #undef DO_ZPZ_FP -/* 4-operand predicated multiply-add. This requires 7 operands to pass - * "properly", so we need to encode some of the registers into DESC. - */ -QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 20 > 32); - -static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc, - uint16_t neg1, uint16_t neg3) +static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint16_t neg1, + uint16_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3453,45 +3433,42 @@ static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint16_t *)((char *)vn + H1_2(i)) ^ neg1; e2 = *(uint16_t *)((char *)vm + H1_2(i)); e3 = *(uint16_t *)((char *)va + H1_2(i)) ^ neg3; - r = float16_muladd(e1, e2, e3, 0, &env->vfp.fp_status_f16); + r = float16_muladd(e1, e2, e3, 0, status); *(uint16_t *)((char *)vd + H1_2(i)) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); } -void HELPER(sve_fnmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); } -void HELPER(sve_fnmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); } -static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc, - uint32_t neg1, uint32_t neg3) +static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint32_t neg1, + uint32_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3504,45 +3481,42 @@ static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint32_t *)((char *)vn + H1_4(i)) ^ neg1; e2 = *(uint32_t *)((char *)vm + H1_4(i)); e3 = *(uint32_t *)((char *)va + H1_4(i)) ^ neg3; - r = float32_muladd(e1, e2, e3, 0, &env->vfp.fp_status); + r = float32_muladd(e1, e2, e3, 0, status); *(uint32_t *)((char *)vd + H1_4(i)) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); } -void HELPER(sve_fnmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); } -void HELPER(sve_fnmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); } -static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc, - uint64_t neg1, uint64_t neg3) +static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint64_t neg1, + uint64_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3555,31 +3529,35 @@ static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint64_t *)((char *)vn + i) ^ neg1; e2 = *(uint64_t *)((char *)vm + i); e3 = *(uint64_t *)((char *)va + i) ^ neg3; - r = float64_muladd(e1, e2, e3, 0, &env->vfp.fp_status); + r = float64_muladd(e1, e2, e3, 0, status); *(uint64_t *)((char *)vd + i) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, 0, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); } -void HELPER(sve_fnmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); } -void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, 0, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); } /* Two operand floating-point comparison controlled by a predicate. @@ -3587,49 +3565,46 @@ void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) * compare operands, since the comparison may have side effects wrt * the FPSR. */ -#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ - void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ - uint64_t *d = vd, *g = vg; \ - do { \ - uint64_t out = 0, pg = g[j]; \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - if (likely((pg >> (i & 63)) & 1)) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - out |= OP(TYPE, nn, mm, status); \ - } \ - } while (i & 63); \ - d[j--] = out; \ - } while (i > 0); \ -} - -#define DO_FPCMP_PPZZ_H(NAME, OP) \ - DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP) -#define DO_FPCMP_PPZZ_S(NAME, OP) \ - DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP) -#define DO_FPCMP_PPZZ_D(NAME, OP) \ - DO_FPCMP_PPZZ(NAME##_d, float64, , OP) - -#define DO_FPCMP_PPZZ_ALL(NAME, OP) \ - DO_FPCMP_PPZZ_H(NAME, OP) \ - DO_FPCMP_PPZZ_S(NAME, OP) \ +#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, void *status, \ + uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + out |= OP(TYPE, nn, mm, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ + } + +#define DO_FPCMP_PPZZ_H(NAME, OP) DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZZ_S(NAME, OP) DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZZ_D(NAME, OP) DO_FPCMP_PPZZ(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZZ_ALL(NAME, OP) \ + DO_FPCMP_PPZZ_H(NAME, OP) \ + DO_FPCMP_PPZZ_S(NAME, OP) \ DO_FPCMP_PPZZ_D(NAME, OP) -#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0 -#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0 -#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0 -#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0 -#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0 -#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0 -#define DO_FCMUO(TYPE, X, Y, ST) \ +#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0 +#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0 +#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0 +#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0 +#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0 +#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0 +#define DO_FCMUO(TYPE, X, Y, ST) \ TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered -#define DO_FACGE(TYPE, X, Y, ST) \ +#define DO_FACGE(TYPE, X, Y, ST) \ TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0 -#define DO_FACGT(TYPE, X, Y, ST) \ +#define DO_FACGT(TYPE, X, Y, ST) \ TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0 DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE) @@ -3649,35 +3624,32 @@ DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT) /* One operand floating-point comparison against zero, controlled * by a predicate. */ -#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, \ - void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ - uint64_t *d = vd, *g = vg; \ - do { \ - uint64_t out = 0, pg = g[j]; \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - if ((pg >> (i & 63)) & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - out |= OP(TYPE, nn, 0, status); \ - } \ - } while (i & 63); \ - d[j--] = out; \ - } while (i > 0); \ -} - -#define DO_FPCMP_PPZ0_H(NAME, OP) \ - DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP) -#define DO_FPCMP_PPZ0_S(NAME, OP) \ - DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP) -#define DO_FPCMP_PPZ0_D(NAME, OP) \ - DO_FPCMP_PPZ0(NAME##_d, float64, , OP) - -#define DO_FPCMP_PPZ0_ALL(NAME, OP) \ - DO_FPCMP_PPZ0_H(NAME, OP) \ - DO_FPCMP_PPZ0_S(NAME, OP) \ +#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, \ + uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if ((pg >> (i & 63)) & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= OP(TYPE, nn, 0, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ + } + +#define DO_FPCMP_PPZ0_H(NAME, OP) DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZ0_S(NAME, OP) DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZ0_D(NAME, OP) DO_FPCMP_PPZ0(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZ0_ALL(NAME, OP) \ + DO_FPCMP_PPZ0_H(NAME, OP) \ + DO_FPCMP_PPZ0_S(NAME, OP) \ DO_FPCMP_PPZ0_D(NAME, OP) DO_FPCMP_PPZ0_ALL(sve_fcmge0, DO_FCMGE) @@ -3712,9 +3684,8 @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) { static const float32 coeff[16] = { - 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9, - 0x36369d6d, 0x00000000, 0x00000000, 0x00000000, - 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705, + 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9, 0x36369d6d, 0x00000000, + 0x00000000, 0x00000000, 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705, 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, }; intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); @@ -3734,14 +3705,12 @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) { static const float64 coeff[16] = { - 0x3ff0000000000000ull, 0xbfc5555555555543ull, - 0x3f8111111110f30cull, 0xbf2a01a019b92fc6ull, - 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull, - 0x3de5d8408868552full, 0x0000000000000000ull, - 0x3ff0000000000000ull, 0xbfe0000000000000ull, - 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull, - 0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull, - 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, + 0x3ff0000000000000ull, 0xbfc5555555555543ull, 0x3f8111111110f30cull, + 0xbf2a01a019b92fc6ull, 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull, + 0x3de5d8408868552full, 0x0000000000000000ull, 0x3ff0000000000000ull, + 0xbfe0000000000000ull, 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull, + 0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull, 0x3e21ee96d2641b13ull, + 0xbda8f76380fbb401ull, }; intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); intptr_t x = simd_data(desc); @@ -3761,8 +3730,8 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) * FP Complex Add */ -void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) +void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, void *vs, + uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; @@ -3793,8 +3762,8 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, } while (i != 0); } -void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) +void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, void *vs, + uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; @@ -3825,8 +3794,8 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, } while (i != 0); } -void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) +void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, void *vs, + uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; @@ -3861,22 +3830,13 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, * FP Complex Multiply */ -QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 22 > 32); - -void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float16 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float16_set_sign(0, (rot & 2) != 0); @@ -3903,32 +3863,25 @@ void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float16 *)((char *)va + H1_2(i)); - d = float16_muladd(e2, e1, d, 0, &env->vfp.fp_status_f16); + d = float16_muladd(e2, e1, d, 0, status); *(float16 *)((char *)vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float16 *)((char *)va + H1_2(j)); - d = float16_muladd(e4, e3, d, 0, &env->vfp.fp_status_f16); + d = float16_muladd(e4, e3, d, 0, status); *(float16 *)((char *)vd + H1_2(j)) = d; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float32 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float32_set_sign(0, (rot & 2) != 0); @@ -3955,32 +3908,25 @@ void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float32 *)((char *)va + H1_2(i)); - d = float32_muladd(e2, e1, d, 0, &env->vfp.fp_status); + d = float32_muladd(e2, e1, d, 0, status); *(float32 *)((char *)vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float32 *)((char *)va + H1_2(j)); - d = float32_muladd(e4, e3, d, 0, &env->vfp.fp_status); + d = float32_muladd(e4, e3, d, 0, status); *(float32 *)((char *)vd + H1_2(j)) = d; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float64 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float64_set_sign(0, (rot & 2) != 0); @@ -4007,12 +3953,12 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float64 *)((char *)va + H1_2(i)); - d = float64_muladd(e2, e1, d, 0, &env->vfp.fp_status); + d = float64_muladd(e2, e1, d, 0, status); *(float64 *)((char *)vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float64 *)((char *)va + H1_2(j)); - d = float64_muladd(e4, e3, d, 0, &env->vfp.fp_status); + d = float64_muladd(e4, e3, d, 0, status); *(float64 *)((char *)vd + H1_2(j)) = d; } } while (i & 63); @@ -4024,103 +3970,111 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) */ /* - * Load elements into @vd, controlled by @vg, from @host + @mem_ofs. - * Memory is valid through @host + @mem_max. The register element - * indicies are inferred from @mem_ofs, as modified by the types for - * which the helper is built. Return the @mem_ofs of the first element - * not loaded (which is @mem_max if they are all loaded). - * - * For softmmu, we have fully validated the guest page. For user-only, - * we cannot fully validate without taking the mmap lock, but since we - * know the access is within one host page, if any access is valid they - * all must be valid. However, when @vg is all false, it may be that - * no access is valid. + * Load one element into @vd + @reg_off from @host. + * The controlling predicate is known to be true. */ -typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host, - intptr_t mem_ofs, intptr_t mem_max); +typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); /* * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). * The controlling predicate is known to be true. */ -typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, - target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra); -typedef sve_ld1_tlb_fn sve_st1_tlb_fn; +typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, uintptr_t retaddr); /* * Generate the above primitives. */ -#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ -static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \ - intptr_t mem_off, const intptr_t mem_max) \ -{ \ - intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM)); \ - uint64_t *pg = vg; \ - while (mem_off + sizeof(TYPEM) <= mem_max) { \ - TYPEM val = 0; \ - if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) { \ - val = HOST((char *)host + mem_off); \ - } \ - *(TYPEE *)((char *)vd + H(reg_off)) = val; \ - mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE); \ - } \ - return mem_off; \ -} - -#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \ -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ -{ \ - TYPEM val = TLB(env, addr, oi, ra); \ - *(TYPEE *)((char *)vd + H(reg_off)) = val; \ -} - -#define DO_LD_PRIM_1(NAME, H, TE, TM) \ - DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ - DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu) - -DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) -DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) -DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) -DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) -DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) -DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) -DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) - -#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT) \ - DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p) \ - DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p, \ - MOEND, helper_##end##_##PT##_mmu) - -DO_LD_PRIM_2(ld1hh, le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t, int16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hdu, le, MO_LE, , uint64_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hds, le, MO_LE, , uint64_t, int16_t, lduw, lduw) +#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ + static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ + { \ + TYPEM val = HOST(host); \ + *(TYPEE *)((char*)vd + H(reg_off)) = val; \ + } -DO_LD_PRIM_2(ld1ss, le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sdu, le, MO_LE, , uint64_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sds, le, MO_LE, , uint64_t, int32_t, ldl, ldul) +#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ + static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ + { \ + HOST(host, (TYPEM) * (TYPEE *)((char*)vd + H(reg_off))); \ + } -DO_LD_PRIM_2(ld1dd, le, MO_LE, , uint64_t, uint64_t, ldq, ldq) +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ + static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, uintptr_t ra) \ + { \ + *(TYPEE *)((char*)vd + H(reg_off)) = \ + (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \ + } -DO_LD_PRIM_2(ld1hh, be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t, int16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hdu, be, MO_BE, , uint64_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hds, be, MO_BE, , uint64_t, int16_t, lduw, lduw) +#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ + static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, uintptr_t ra) \ + { \ + TLB(env, useronly_clean_ptr(addr), \ + (TYPEM) * (TYPEE *)((char*)vd + H(reg_off)), ra); \ + } -DO_LD_PRIM_2(ld1ss, be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sdu, be, MO_BE, , uint64_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sds, be, MO_BE, , uint64_t, int32_t, ldl, ldul) +#define DO_LD_PRIM_1(NAME, H, TE, TM) \ + DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ + DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) -DO_LD_PRIM_2(ld1dd, be, MO_BE, , uint64_t, uint64_t, ldq, ldq) +DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) +DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) +DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) +DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) +DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) +DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) +DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) + +#define DO_ST_PRIM_1(NAME, H, TE, TM) \ + DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ + DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) + +DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) +DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) +DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) +DO_ST_PRIM_1(bd, , uint64_t, uint8_t) + +#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ + DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ + DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ + DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ + DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) + +#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ + DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ + DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ + DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ + DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) + +DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) +DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) +DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) +DO_LD_PRIM_2(hdu, , uint64_t, uint16_t, lduw) +DO_LD_PRIM_2(hds, , uint64_t, int16_t, lduw) + +DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) +DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) +DO_ST_PRIM_2(hd, , uint64_t, uint16_t, stw) + +DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) +DO_LD_PRIM_2(sdu, , uint64_t, uint32_t, ldl) +DO_LD_PRIM_2(sds, , uint64_t, int32_t, ldl) + +DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) +DO_ST_PRIM_2(sd, , uint64_t, uint32_t, stl) + +DO_LD_PRIM_2(dd, , uint64_t, uint64_t, ldq) +DO_ST_PRIM_2(dd, , uint64_t, uint64_t, stq) #undef DO_LD_TLB +#undef DO_ST_TLB #undef DO_LD_HOST #undef DO_LD_PRIM_1 +#undef DO_ST_PRIM_1 #undef DO_LD_PRIM_2 +#undef DO_ST_PRIM_2 /* * Skip through a sequence of inactive elements in the guarding predicate @vg, @@ -4157,297 +4111,687 @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, } /* - * Return the maximum offset <= @mem_max which is still within the page - * referenced by @base + @mem_off. + * Resolve the guest virtual address to info->host and info->flags. + * If @nofault, return false if the page is invalid, otherwise + * exit via page fault exception. */ -static intptr_t max_for_page(struct uc_struct *uc, target_ulong base, intptr_t mem_off, - intptr_t mem_max) -{ - target_ulong addr = base + mem_off; - intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK); - return MIN(split, mem_max - mem_off) + mem_off; -} -/* These are normally defined only for CONFIG_USER_ONLY in */ -static inline void set_helper_retaddr(uintptr_t ra) { } -static inline void clear_helper_retaddr(void) { } +typedef struct { + void *host; + int flags; + MemTxAttrs attrs; +} SVEHostPage; -/* - * The result of tlb_vaddr_to_host for user-only is just g2h(x), - * which is always non-null. Elide the useless test. - */ -static inline bool test_host_page(void *host) +static bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env, + target_ulong addr, int mem_off, + MMUAccessType access_type, int mmu_idx, + uintptr_t retaddr) { - return likely(host != NULL); + int flags; + + addr += mem_off; + + /* + * User-only currently always issues with TBI. See the comment + * above useronly_clean_ptr. Usually we clean this top byte away + * during translation, but we can't do that for e.g. vector + imm + * addressing modes. + * + * We currently always enable TBI for user-only, and do not provide + * a way to turn it off. So clean the pointer unconditionally here, + * rather than look it up here, or pass it down from above. + */ + addr = useronly_clean_ptr(addr); + + flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, + &info->host, retaddr); + info->flags = flags; + + if (flags & TLB_INVALID_MASK) { + g_assert(nofault); + return false; + } + + /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ + info->host = (void*)((char*)(info->host) - mem_off); + + /* + * Find the iotlbentry for addr and return the transaction attributes. + * This *must* be present in the TLB because we just found the mapping. + */ + { + uintptr_t index = tlb_index(env, mmu_idx, addr); + +#ifdef CONFIG_DEBUG_TCG + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong comparator = + (access_type == MMU_DATA_LOAD ? entry->addr_read + : tlb_addr_write(entry)); + g_assert(tlb_hit(comparator, addr)); +#endif + + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + info->attrs = iotlbentry->attrs; + } + + return true; } /* - * Common helper for all contiguous one-register predicated loads. + * Analyse contiguous data, protected by a governing predicate. */ -static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const uintptr_t retaddr, - const int esz, const int msz, - sve_ld1_host_fn *host_fn, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; - const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - ARMVectorReg scratch; - void *host; - intptr_t split, reg_off, mem_off; - /* Find the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { - /* The entire predicate was false; no load occurs. */ - memset(vd, 0, reg_max); - return; - } - mem_off = reg_off >> diffsz; - set_helper_retaddr(retaddr); +typedef enum { + FAULT_NO, + FAULT_FIRST, + FAULT_ALL, +} SVEContFault; +typedef struct { /* - * If the (remaining) load is entirely within a single page, then: - * For softmmu, and the tlb hits, then no faults will occur; - * For user-only, either the first load will fault or none will. - * We can thus perform the load directly to the destination and - * Vd will be unmodified on any exception path. + * First and last element wholly contained within the two pages. + * mem_off_first[0] and reg_off_first[0] are always set >= 0. + * reg_off_last[0] may be < 0 if the first element crosses pages. + * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] + * are set >= 0 only if there are complete elements on a second page. + * + * The reg_off_* offsets are relative to the internal vector register. + * The mem_off_first offset is relative to the memory address; the + * two offsets are different when a load operation extends, a store + * operation truncates, or for multi-register operations. */ - split = max_for_page(env->uc, addr, mem_off, mem_max); - if (likely(split == mem_max)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (test_host_page(host)) { - mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max); - tcg_debug_assert(mem_off == mem_max); - clear_helper_retaddr(); - /* After having taken any fault, zero leading inactive elements. */ - swap_memzero(vd, reg_off); - return; + int16_t mem_off_first[2]; + int16_t reg_off_first[2]; + int16_t reg_off_last[2]; + + /* + * One element that is misaligned and spans both pages, + * or -1 if there is no such active element. + */ + int16_t mem_off_split; + int16_t reg_off_split; + + /* + * The byte offset at which the entire operation crosses a page boundary. + * Set >= 0 if and only if the entire operation spans two pages. + */ + int16_t page_split; + + /* TLB data for the two pages. */ + SVEHostPage page[2]; +} SVEContLdSt; + +/* + * Find first active element on each page, and a loose bound for the + * final element on each page. Identify any single element that spans + * the page boundary. Return true if there are any active elements. + */ +static bool sve_cont_ldst_elements(CPUARMState *env, SVEContLdSt *info, + target_ulong addr, uint64_t *vg, + intptr_t reg_max, int esz, int msize) +{ + uc_engine *uc = env->uc; + const int esize = 1 << esz; + const uint64_t pg_mask = pred_esz_masks[esz]; + intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split; + intptr_t mem_off_last, mem_off_split; + intptr_t page_split, elt_split; + intptr_t i; + + /* Set all of the element indices to -1, and the TLB data to 0. */ + memset(info, -1, offsetof(SVEContLdSt, page)); + memset(info->page, 0, sizeof(info->page)); + + /* Gross scan over the entire predicate to find bounds. */ + i = 0; + do { + uint64_t pg = vg[i] & pg_mask; + if (pg) { + reg_off_last = i * 64 + 63 - clz64(pg); + if (reg_off_first < 0) { + reg_off_first = i * 64 + ctz64(pg); + } } + } while (++i * 64 < reg_max); + + if (unlikely(reg_off_first < 0)) { + /* No active elements, no pages touched. */ + return false; } + tcg_debug_assert(reg_off_last >= 0 && reg_off_last < reg_max); + + info->reg_off_first[0] = reg_off_first; + info->mem_off_first[0] = (reg_off_first >> esz) * msize; + mem_off_last = (reg_off_last >> esz) * msize; + + page_split = -(addr | TARGET_PAGE_MASK); + if (likely(mem_off_last + msize <= page_split)) { + /* The entire operation fits within a single page. */ + info->reg_off_last[0] = reg_off_last; + return true; + } + + info->page_split = page_split; + elt_split = page_split / msize; + reg_off_split = elt_split << esz; + mem_off_split = elt_split * msize; /* - * Perform the predicated read into a temporary, thus ensuring - * if the load of the last element faults, Vd is not modified. + * This is the last full element on the first page, but it is not + * necessarily active. If there is no full element, i.e. the first + * active element is the one that's split, this value remains -1. + * It is useful as iteration bounds. */ - memset(&scratch, 0, reg_max); - goto start; - while (1) { - reg_off = find_next_active(vg, reg_off, reg_max, esz); - if (reg_off >= reg_max) { - break; - } - mem_off = reg_off >> diffsz; - split = max_for_page(env->uc, addr, mem_off, mem_max); - - start: - if (split - mem_off >= (1ULL << msz)) { - /* At least one whole element on this page. */ - host = tlb_vaddr_to_host(env, addr + mem_off, - MMU_DATA_LOAD, mmu_idx); - if (host) { - mem_off = host_fn(&scratch, vg, (char *)host - mem_off, - mem_off, split); - reg_off = mem_off << diffsz; - continue; + if (elt_split != 0) { + info->reg_off_last[0] = reg_off_split - esize; + } + + /* Determine if an unaligned element spans the pages. */ + if (page_split % msize != 0) { + /* It is helpful to know if the split element is active. */ + if ((vg[reg_off_split >> 6] >> (reg_off_split & 63)) & 1) { + info->reg_off_split = reg_off_split; + info->mem_off_split = mem_off_split; + + if (reg_off_split == reg_off_last) { + /* The page crossing element is last. */ + return true; } } + reg_off_split += esize; + mem_off_split += msize; + } + + /* + * We do want the first active element on the second page, because + * this may affect the address reported in an exception. + */ + reg_off_split = find_next_active(vg, reg_off_split, reg_max, esz); + tcg_debug_assert(reg_off_split <= reg_off_last); + info->reg_off_first[1] = reg_off_split; + info->mem_off_first[1] = (reg_off_split >> esz) * msize; + info->reg_off_last[1] = reg_off_last; + return true; +} + +/* + * Resolve the guest virtual addresses to info->page[]. + * Control the generation of page faults with @fault. Return false if + * there is no work to do, which can only happen with @fault == FAULT_NO. + */ +static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, + CPUARMState *env, target_ulong addr, + MMUAccessType access_type, uintptr_t retaddr) +{ + int mmu_idx = cpu_mmu_index(env, false); + int mem_off = info->mem_off_first[0]; + bool nofault = fault == FAULT_NO; + bool have_work = true; + + if (!sve_probe_page(&info->page[0], nofault, env, addr, mem_off, + access_type, mmu_idx, retaddr)) { + /* No work to be done. */ + return false; + } + + if (likely(info->page_split < 0)) { + /* The entire operation was on the one page. */ + return true; + } + /* + * If the second page is invalid, then we want the fault address to be + * the first byte on that page which is accessed. + */ + if (info->mem_off_split >= 0) { + /* + * There is an element split across the pages. The fault address + * should be the first byte of the second page. + */ + mem_off = info->page_split; /* - * Perform one normal read. This may fault, longjmping out to the - * main loop in order to raise an exception. It may succeed, and - * as a side-effect load the TLB entry for the next round. Finally, - * in the extremely unlikely case we're performing this operation - * on I/O memory, it may succeed but not bring in the TLB entry. - * But even then we have still made forward progress. + * If the split element is also the first active element + * of the vector, then: For first-fault we should continue + * to generate faults for the second page. For no-fault, + * we have work only if the second page is valid. */ - tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr); - reg_off += 1ULL << esz; + if (info->mem_off_first[0] < info->mem_off_split) { + nofault = FAULT_FIRST; + have_work = false; + } + } else { + /* + * There is no element split across the pages. The fault address + * should be the first active element on the second page. + */ + mem_off = info->mem_off_first[1]; + /* + * There must have been one active element on the first page, + * so we're out of first-fault territory. + */ + nofault = fault != FAULT_ALL; } - clear_helper_retaddr(); - memcpy(vd, &scratch, reg_max); + have_work |= sve_probe_page(&info->page[1], nofault, env, addr, mem_off, + access_type, mmu_idx, retaddr); + return have_work; } -#define DO_LD1_1(NAME, ESZ) \ -void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ - sve_##NAME##_host, sve_##NAME##_tlb); \ -} - -#define DO_LD1_2(NAME, ESZ, MSZ) \ -void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ -} \ -void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ -} - -DO_LD1_1(ld1bb, 0) -DO_LD1_1(ld1bhu, 1) -DO_LD1_1(ld1bhs, 1) -DO_LD1_1(ld1bsu, 2) -DO_LD1_1(ld1bss, 2) -DO_LD1_1(ld1bdu, 3) -DO_LD1_1(ld1bds, 3) - -DO_LD1_2(ld1hh, 1, 1) -DO_LD1_2(ld1hsu, 2, 1) -DO_LD1_2(ld1hss, 2, 1) -DO_LD1_2(ld1hdu, 3, 1) -DO_LD1_2(ld1hds, 3, 1) - -DO_LD1_2(ld1ss, 2, 2) -DO_LD1_2(ld1sdu, 3, 2) -DO_LD1_2(ld1sds, 3, 2) - -DO_LD1_2(ld1dd, 3, 3) +static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, int wp_access, + uintptr_t retaddr) +{ + intptr_t mem_off, reg_off, reg_last; + int flags0 = info->page[0].flags; + int flags1 = info->page[1].flags; -#undef DO_LD1_1 -#undef DO_LD1_2 + if (likely(!((flags0 | flags1) & TLB_WATCHPOINT))) { + return; + } -/* - * Common helpers for all contiguous 2,3,4-register predicated loads. - */ -static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[2] = { 0 }; + /* Indicate that watchpoints are handled. */ + info->page[0].flags = flags0 & ~TLB_WATCHPOINT; + info->page[1].flags = flags1 & ~TLB_WATCHPOINT; + + if (flags0 & TLB_WATCHPOINT) { + mem_off = info->mem_off_first[0]; + reg_off = info->reg_off_first[0]; + reg_last = info->reg_off_last[0]; + + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize, + info->page[0].attrs, wp_access, + retaddr); + } + reg_off += esize; + mem_off += msize; + } while (reg_off <= reg_last && (reg_off & 63)); + } + } - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - } - i += size, pg >>= size; - addr += 2 * size; - } while (i & 15); + mem_off = info->mem_off_split; + if (mem_off >= 0) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize, + info->page[0].attrs, wp_access, retaddr); } - clear_helper_retaddr(); - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + mem_off = info->mem_off_first[1]; + if ((flags1 & TLB_WATCHPOINT) && mem_off >= 0) { + reg_off = info->reg_off_first[1]; + reg_last = info->reg_off_last[1]; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize, + info->page[1].attrs, wp_access, + retaddr); + } + reg_off += esize; + mem_off += msize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } } -static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) +typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t); + +static inline QEMU_ALWAYS_INLINE void +sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, uint64_t *vg, + target_ulong addr, int esize, int msize, + uint32_t mtedesc, uintptr_t ra, mte_check_fn *check) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[3] = { 0 }; + intptr_t mem_off, reg_off, reg_last; + + /* Process the page only if MemAttr == Tagged. */ + if (arm_tlb_mte_tagged(&info->page[0].attrs)) { + mem_off = info->mem_off_first[0]; + reg_off = info->reg_off_first[0]; + reg_last = info->reg_off_split; + if (reg_last < 0) { + reg_last = info->reg_off_last[0]; + } - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); - } - i += size, pg >>= size; - addr += 3 * size; - } while (i & 15); + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + check(env, mtedesc, addr, ra); + } + reg_off += esize; + mem_off += msize; + } while (reg_off <= reg_last && (reg_off & 63)); + } while (reg_off <= reg_last); } - clear_helper_retaddr(); - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); - memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); + mem_off = info->mem_off_first[1]; + if (mem_off >= 0 && arm_tlb_mte_tagged(&info->page[1].attrs)) { + reg_off = info->reg_off_first[1]; + reg_last = info->reg_off_last[1]; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + check(env, mtedesc, addr, ra); + } + reg_off += esize; + mem_off += msize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } } -static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) +typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, uint32_t mtedesc, + uintptr_t ra); + +static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[4] = { 0 }; + sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, mtedesc, ra, + mte_check1); +} + +static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra) +{ + sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, mtedesc, ra, + mte_checkN); +} + +/* + * Common helper for all contiguous 1,2,3,4-register predicated stores. + */ +static inline QEMU_ALWAYS_INLINE void +sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, const int esz, const int msz, + const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn, sve_cont_ldst_mte_check_fn *mte_check_fn) +{ + const unsigned rd = simd_data(desc); + const intptr_t reg_max = simd_oprsz(desc); + intptr_t reg_off, reg_last, mem_off; + SVEContLdSt info; + void *host; + int flags, i; + + /* Find the active elements. */ + if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, N << msz)) { + /* The entire predicate was false; no load occurs. */ + for (i = 0; i < N; ++i) { + memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max); + } + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, retaddr); + + /* Handle watchpoints for all active elements. */ + sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, + BP_MEM_READ, retaddr); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mte_check_fn && mtedesc) { + mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, mtedesc, + retaddr); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. Perform the load + * into scratch memory to preserve register state until the end. + */ + ARMVectorReg scratch[4] = {}; + + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[1]; + if (reg_last < 0) { + reg_last = info.reg_off_split; + if (reg_last < 0) { + reg_last = info.reg_off_last[0]; + } + } - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); - tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra); + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + tlb_fn(env, &scratch[i], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + + for (i = 0; i < N; ++i) { + memcpy(&env->vfp.zregs[(rd + i) & 31], &scratch[i], reg_max); + } + return; +#endif + } + + /* The entire operation is in RAM, on valid pages. */ + + for (i = 0; i < N; ++i) { + memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max); + } + + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[0]; + host = info.page[0].host; + + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + (char*)host + mem_off + (i << msz)); + } } - i += size, pg >>= size; - addr += 4 * size; - } while (i & 15); + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off <= reg_last && (reg_off & 63)); } - clear_helper_retaddr(); - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); - memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); - memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz); -} + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + mem_off = info.mem_off_split; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_split; + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } -#define DO_LDN_1(N) \ -void QEMU_FLATTEN HELPER(sve_ld##N##bb_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \ -} + mem_off = info.mem_off_first[1]; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_first[1]; + reg_last = info.reg_off_last[1]; + host = info.page[1].host; -#define DO_LDN_2(N, SUFF, SIZE) \ -void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_le_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ - sve_ld1##SUFF##_le_tlb); \ -} \ -void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_be_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ - sve_ld1##SUFF##_be_tlb); \ + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + (char*)host + mem_off + (i << msz)); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } } +static inline QEMU_ALWAYS_INLINE void +sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, + const uintptr_t ra, const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, + N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); +} + +#define DO_LD1_1(NAME, ESZ) \ + void HELPER(sve_##NAME##_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \ + sve_##NAME##_host, sve_##NAME##_tlb, NULL); \ + } \ + void HELPER(sve_##NAME##_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ + sve_##NAME##_host, sve_##NAME##_tlb); \ + } + +#define DO_LD1_2(NAME, ESZ, MSZ) \ + void HELPER(sve_##NAME##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL); \ + } \ + void HELPER(sve_##NAME##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL); \ + } \ + void HELPER(sve_##NAME##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ + } \ + void HELPER(sve_##NAME##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ + } + +DO_LD1_1(ld1bb, MO_8) +DO_LD1_1(ld1bhu, MO_16) +DO_LD1_1(ld1bhs, MO_16) +DO_LD1_1(ld1bsu, MO_32) +DO_LD1_1(ld1bss, MO_32) +DO_LD1_1(ld1bdu, MO_64) +DO_LD1_1(ld1bds, MO_64) + +DO_LD1_2(ld1hh, MO_16, MO_16) +DO_LD1_2(ld1hsu, MO_32, MO_16) +DO_LD1_2(ld1hss, MO_32, MO_16) +DO_LD1_2(ld1hdu, MO_64, MO_16) +DO_LD1_2(ld1hds, MO_64, MO_16) + +DO_LD1_2(ld1ss, MO_32, MO_32) +DO_LD1_2(ld1sdu, MO_64, MO_32) +DO_LD1_2(ld1sds, MO_64, MO_32) + +DO_LD1_2(ld1dd, MO_64, MO_64) + +#undef DO_LD1_1 +#undef DO_LD1_2 + +#define DO_LDN_1(N) \ + void HELPER(sve_ld##N##bb_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \ + sve_ld1bb_host, sve_ld1bb_tlb, NULL); \ + } \ + void HELPER(sve_ld##N##bb_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ + sve_ld1bb_host, sve_ld1bb_tlb); \ + } + +#define DO_LDN_2(N, SUFF, ESZ) \ + void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL); \ + } \ + void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL); \ + } \ + void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ + } \ + void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ + } + DO_LDN_1(2) DO_LDN_1(3) DO_LDN_1(4) -DO_LDN_2(2, hh, 2) -DO_LDN_2(3, hh, 2) -DO_LDN_2(4, hh, 2) +DO_LDN_2(2, hh, MO_16) +DO_LDN_2(3, hh, MO_16) +DO_LDN_2(4, hh, MO_16) -DO_LDN_2(2, ss, 4) -DO_LDN_2(3, ss, 4) -DO_LDN_2(4, ss, 4) +DO_LDN_2(2, ss, MO_32) +DO_LDN_2(3, ss, MO_32) +DO_LDN_2(4, ss, MO_32) -DO_LDN_2(2, dd, 8) -DO_LDN_2(3, dd, 8) -DO_LDN_2(4, dd, 8) +DO_LDN_2(2, dd, MO_64) +DO_LDN_2(3, dd, MO_64) +DO_LDN_2(4, dd, MO_64) #undef DO_LDN_1 #undef DO_LDN_2 @@ -4484,385 +4828,524 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) } /* - * Common helper for all contiguous first-fault loads. + * Common helper for all contiguous no-fault and first-fault loads. */ -static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const uintptr_t retaddr, - const int esz, const int msz, - sve_ld1_host_fn *host_fn, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); +static inline QEMU_ALWAYS_INLINE void +sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, uint32_t mtedesc, + const int esz, const int msz, const SVEContFault fault, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) +{ + const unsigned rd = simd_data(desc); void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - intptr_t split, reg_off, mem_off; + intptr_t reg_off, mem_off, reg_last; + SVEContLdSt info; + int flags; void *host; - /* Skip to the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { + /* Find the active elements. */ + if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, 1 << msz)) { /* The entire predicate was false; no load occurs. */ memset(vd, 0, reg_max); return; } - mem_off = reg_off >> diffsz; - set_helper_retaddr(retaddr); + reg_off = info.reg_off_first[0]; + + /* Probe the page(s). */ + if (!sve_cont_ldst_pages(&info, fault, env, addr, MMU_DATA_LOAD, retaddr)) { + /* Fault on first element. */ + tcg_debug_assert(fault == FAULT_NO); + memset(vd, 0, reg_max); + goto do_fault; + } + + mem_off = info.mem_off_first[0]; + flags = info.page[0].flags; /* - * If the (remaining) load is entirely within a single page, then: - * For softmmu, and the tlb hits, then no faults will occur; - * For user-only, either the first load will fault or none will. - * We can thus perform the load directly to the destination and - * Vd will be unmodified on any exception path. + * Disable MTE checking if the Tagged bit is not set. Since TBI must + * be set within MTEDESC for MTE, !mtedesc => !mte_active. */ - split = max_for_page(env->uc, addr, mem_off, mem_max); - if (likely(split == mem_max)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (test_host_page(host)) { - mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max); - tcg_debug_assert(mem_off == mem_max); - clear_helper_retaddr(); - /* After any fault, zero any leading inactive elements. */ + if (arm_tlb_mte_tagged(&info.page[0].attrs)) { + mtedesc = 0; + } + + if (fault == FAULT_FIRST) { + /* Trapping mte check for the first-fault element. */ + if (mtedesc) { + mte_check1(env, mtedesc, addr + mem_off, retaddr); + } + + /* + * Special handling of the first active element, + * if it crosses a page boundary or is MMIO. + */ + bool is_split = mem_off == info.mem_off_split; + if (unlikely(flags != 0) || unlikely(is_split)) { + /* + * Use the slow path for cross-page handling. + * Might trap for MMIO or watchpoints. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, retaddr); + + /* After any fault, zero the other elements. */ swap_memzero(vd, reg_off); - return; + reg_off += 1 << esz; + mem_off += 1 << msz; + swap_memzero((char*)vd + reg_off, reg_max - reg_off); + + if (is_split) { + goto second_page; + } + } else { + memset(vd, 0, reg_max); + } + } else { + memset(vd, 0, reg_max); + if (unlikely(mem_off == info.mem_off_split)) { + /* The first active element crosses a page boundary. */ + flags |= info.page[1].flags; + if (unlikely(flags & TLB_MMIO)) { + /* Some page is MMIO, see below. */ + goto do_fault; + } + if (unlikely(flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches(env_cpu(env), addr + mem_off, + 1 << msz) & + BP_MEM_READ)) { + /* Watchpoint hit, see below. */ + goto do_fault; + } + if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + goto do_fault; + } + /* + * Use the slow path for cross-page handling. + * This is RAM, without a watchpoint, and will not trap. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, retaddr); + goto second_page; } } /* - * Perform one normal read, which will fault or not. - * But it is likely to bring the page into the tlb. + * From this point on, all memory operations are MemSingleNF. + * + * Per the MemSingleNF pseudocode, a no-fault load from Device memory + * must not actually hit the bus -- it returns (UNKNOWN, FAULT) instead. + * + * Unfortuately we do not have access to the memory attributes from the + * PTE to tell Device memory from Normal memory. So we make a mostly + * correct check, and indicate (UNKNOWN, FAULT) for any MMIO. + * This gives the right answer for the common cases of "Normal memory, + * backed by host RAM" and "Device memory, backed by MMIO". + * The architecture allows us to suppress an NF load and return + * (UNKNOWN, FAULT) for any reason, so our behaviour for the corner + * case of "Normal memory, backed by MMIO" is permitted. The case we + * get wrong is "Device memory, backed by host RAM", for which we + * should return (UNKNOWN, FAULT) for but do not. + * + * Similarly, CPU_BP breakpoints would raise exceptions, and so + * return (UNKNOWN, FAULT). For simplicity, we consider gdb and + * architectural breakpoints the same. */ - tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr); - - /* After any fault, zero any leading predicated false elts. */ - swap_memzero(vd, reg_off); - mem_off += 1ULL << msz; - reg_off += 1ULL << esz; - - /* Try again to read the balance of the page. */ - split = max_for_page(env->uc, addr, mem_off - 1, mem_max); - if (split >= (1ULL << msz)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (host) { - mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split); - reg_off = mem_off << diffsz; - } + if (unlikely(flags & TLB_MMIO)) { + goto do_fault; } - clear_helper_retaddr(); - record_fault(env, reg_off, reg_max); -} + reg_last = info.reg_off_last[0]; + host = info.page[0].host; -/* - * Common helper for all contiguous no-fault loads. - */ -static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const int esz, const int msz, - sve_ld1_host_fn *host_fn) -{ - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; - const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - const int mmu_idx = cpu_mmu_index(env, false); - intptr_t split, reg_off, mem_off; - void *host; + do { + uint64_t pg = *(uint64_t *)((char*)vg + (reg_off >> 3)); + do { + if ((pg >> (reg_off & 63)) & 1) { + if (unlikely(flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches(env_cpu(env), + addr + mem_off, 1 << msz) & + BP_MEM_READ)) { + goto do_fault; + } + if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + goto do_fault; + } + host_fn(vd, reg_off, (char*)host + mem_off); + } + reg_off += 1 << esz; + mem_off += 1 << msz; + } while (reg_off <= reg_last && (reg_off & 63)); + } while (reg_off <= reg_last); - /* There will be no fault, so we may modify in advance. */ - memset(vd, 0, reg_max); + /* + * MemSingleNF is allowed to fail for any reason. We have special + * code above to handle the first element crossing a page boundary. + * As an implementation choice, decline to handle a cross-page element + * in any other position. + */ + reg_off = info.reg_off_split; + if (reg_off >= 0) { + goto do_fault; + } - /* Skip to the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { - /* The entire predicate was false; no load occurs. */ +second_page: + reg_off = info.reg_off_first[1]; + if (likely(reg_off < 0)) { + /* No active elements on the second page. All done. */ return; } - mem_off = reg_off >> diffsz; /* - * If the address is not in the TLB, we have no way to bring the - * entry into the TLB without also risking a fault. Note that - * the corollary is that we never load from an address not in RAM. - * - * This last is out of spec, in a weird corner case. - * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory - * must not actually hit the bus -- it returns UNKNOWN data instead. - * But if you map non-RAM with Normal memory attributes and do a NF - * load then it should access the bus. (Nobody ought actually do this - * in the real world, obviously.) - * - * Then there are the annoying special cases with watchpoints... - * TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true). + * MemSingleNF is allowed to fail for any reason. As an implementation + * choice, decline to handle elements on the second page. This should + * be low frequency as the guest walks through memory -- the next + * iteration of the guest's loop should be aligned on the page boundary, + * and then all following iterations will stay aligned. */ - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - split = max_for_page(env->uc, addr, mem_off, mem_max); - if (host && split >= (1ULL << msz)) { - mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split); - reg_off = mem_off << diffsz; - } +do_fault: record_fault(env, reg_off, reg_max); } -#define DO_LDFF1_LDNF1_1(PART, ESZ) \ -void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ - sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ -} \ -void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \ -} - -#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ -void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ -} \ -void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \ -} \ -void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ -} \ -void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \ -} - -DO_LDFF1_LDNF1_1(bb, 0) -DO_LDFF1_LDNF1_1(bhu, 1) -DO_LDFF1_LDNF1_1(bhs, 1) -DO_LDFF1_LDNF1_1(bsu, 2) -DO_LDFF1_LDNF1_1(bss, 2) -DO_LDFF1_LDNF1_1(bdu, 3) -DO_LDFF1_LDNF1_1(bds, 3) - -DO_LDFF1_LDNF1_2(hh, 1, 1) -DO_LDFF1_LDNF1_2(hsu, 2, 1) -DO_LDFF1_LDNF1_2(hss, 2, 1) -DO_LDFF1_LDNF1_2(hdu, 3, 1) -DO_LDFF1_LDNF1_2(hds, 3, 1) - -DO_LDFF1_LDNF1_2(ss, 2, 2) -DO_LDFF1_LDNF1_2(sdu, 3, 2) -DO_LDFF1_LDNF1_2(sds, 3, 2) - -DO_LDFF1_LDNF1_2(dd, 3, 3) - -#undef DO_LDFF1_LDNF1_1 -#undef DO_LDFF1_LDNF1_2 - -/* - * Store contiguous data, protected by a governing predicate. - */ +static inline QEMU_ALWAYS_INLINE void +sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, uint32_t desc, + const uintptr_t retaddr, const int esz, const int msz, + const SVEContFault fault, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); -#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ -{ \ - TLB(env, addr, *(TYPEM *)((char *)vd + H(reg_off)), oi, ra); \ -} + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); -DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu) + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } -DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu) -DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu) -DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu) + sve_ldnfff1_r(env, vg, addr, desc, retaddr, mtedesc, esz, msz, fault, + host_fn, tlb_fn); +} + +#define DO_LDFF1_LDNF1_1(PART, ESZ) \ + void HELPER(sve_ldff1##PART##_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ + } \ + void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, \ + FAULT_FIRST, sve_ld1##PART##_host, \ + sve_ld1##PART##_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ + } -DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu) -DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu) +#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ + void HELPER(sve_ldff1##PART##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + } \ + void HELPER(sve_ldff1##PART##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + } \ + void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + } \ + void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + } -DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu) +DO_LDFF1_LDNF1_1(bb, MO_8) +DO_LDFF1_LDNF1_1(bhu, MO_16) +DO_LDFF1_LDNF1_1(bhs, MO_16) +DO_LDFF1_LDNF1_1(bsu, MO_32) +DO_LDFF1_LDNF1_1(bss, MO_32) +DO_LDFF1_LDNF1_1(bdu, MO_64) +DO_LDFF1_LDNF1_1(bds, MO_64) -DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu) -DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu) -DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_LDFF1_LDNF1_2(hh, MO_16, MO_16) +DO_LDFF1_LDNF1_2(hsu, MO_32, MO_16) +DO_LDFF1_LDNF1_2(hss, MO_32, MO_16) +DO_LDFF1_LDNF1_2(hdu, MO_64, MO_16) +DO_LDFF1_LDNF1_2(hds, MO_64, MO_16) -DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu) -DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu) +DO_LDFF1_LDNF1_2(ss, MO_32, MO_32) +DO_LDFF1_LDNF1_2(sdu, MO_64, MO_32) +DO_LDFF1_LDNF1_2(sds, MO_64, MO_32) -DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu) +DO_LDFF1_LDNF1_2(dd, MO_64, MO_64) -#undef DO_ST_TLB +#undef DO_LDFF1_LDNF1_1 +#undef DO_LDFF1_LDNF1_2 /* - * Common helpers for all contiguous 1,2,3,4-register predicated stores. + * Common helper for all contiguous 1,2,3,4-register predicated stores. */ -static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) + +static inline QEMU_ALWAYS_INLINE void +sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, + const uintptr_t retaddr, const int esz, const int msz, const int N, + uint32_t mtedesc, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn, sve_cont_ldst_mte_check_fn *mte_check_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *vd = &env->vfp.zregs[rd]; + const unsigned rd = simd_data(desc); + const intptr_t reg_max = simd_oprsz(desc); + intptr_t reg_off, reg_last, mem_off; + SVEContLdSt info; + void *host; + int i, flags; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, vd, i, addr, oi, ra); + /* Find the active elements. */ + if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, N << msz)) { + /* The entire predicate was false; no store occurs. */ + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, retaddr); + + /* Handle watchpoints for all active elements. */ + sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, + BP_MEM_WRITE, retaddr); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mte_check_fn && mtedesc) { + mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, mtedesc, + retaddr); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. We cannot avoid + * this fault and will leave with the store incomplete. + */ + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[1]; + if (reg_last < 0) { + reg_last = info.reg_off_split; + if (reg_last < 0) { + reg_last = info.reg_off_last[0]; } - i += esize, pg >>= esize; - addr += msize; - } while (i & 15); + } + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + return; +#endif } - clear_helper_retaddr(); -} -static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[0]; + host = info.page[0].host; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + (char*)host + mem_off + (i << msz)); + } } - i += esize, pg >>= esize; - addr += 2 * msize; - } while (i & 15); + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off <= reg_last && (reg_off & 63)); } - clear_helper_retaddr(); -} -static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + mem_off = info.mem_off_split; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_split; + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + + mem_off = info.mem_off_first[1]; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_first[1]; + reg_last = info.reg_off_last[1]; + host = info.page[1].host; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); - tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); - } - i += esize, pg >>= esize; - addr += 3 * msize; - } while (i & 15); + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + (char*)host + mem_off + (i << msz)); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); } - clear_helper_retaddr(); } -static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, + const uintptr_t ra, const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; - void *d4 = &env->vfp.zregs[(rd + 3) & 31]; + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); - tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); - tlb_fn(env, d4, i, addr + 3 * msize, oi, ra); - } - i += esize, pg >>= esize; - addr += 4 * msize; - } while (i & 15); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, + N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); +} + +#define DO_STN_1(N, NAME, ESZ) \ + void HELPER(sve_st##N##NAME##_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL); \ + } \ + void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ + } + +#define DO_STN_2(N, NAME, ESZ, MSZ) \ + void HELPER(sve_st##N##NAME##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL); \ + } \ + void HELPER(sve_st##N##NAME##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL); \ + } \ + void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ + } \ + void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ } - clear_helper_retaddr(); -} - -#define DO_STN_1(N, NAME, ESIZE) \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \ - sve_st1##NAME##_tlb); \ -} - -#define DO_STN_2(N, NAME, ESIZE, MSIZE) \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_le_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ - sve_st1##NAME##_le_tlb); \ -} \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_be_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ - sve_st1##NAME##_be_tlb); \ -} - -DO_STN_1(1, bb, 1) -DO_STN_1(1, bh, 2) -DO_STN_1(1, bs, 4) -DO_STN_1(1, bd, 8) -DO_STN_1(2, bb, 1) -DO_STN_1(3, bb, 1) -DO_STN_1(4, bb, 1) - -DO_STN_2(1, hh, 2, 2) -DO_STN_2(1, hs, 4, 2) -DO_STN_2(1, hd, 8, 2) -DO_STN_2(2, hh, 2, 2) -DO_STN_2(3, hh, 2, 2) -DO_STN_2(4, hh, 2, 2) - -DO_STN_2(1, ss, 4, 4) -DO_STN_2(1, sd, 8, 4) -DO_STN_2(2, ss, 4, 4) -DO_STN_2(3, ss, 4, 4) -DO_STN_2(4, ss, 4, 4) - -DO_STN_2(1, dd, 8, 8) -DO_STN_2(2, dd, 8, 8) -DO_STN_2(3, dd, 8, 8) -DO_STN_2(4, dd, 8, 8) + +DO_STN_1(1, bb, MO_8) +DO_STN_1(1, bh, MO_16) +DO_STN_1(1, bs, MO_32) +DO_STN_1(1, bd, MO_64) +DO_STN_1(2, bb, MO_8) +DO_STN_1(3, bb, MO_8) +DO_STN_1(4, bb, MO_8) + +DO_STN_2(1, hh, MO_16, MO_16) +DO_STN_2(1, hs, MO_32, MO_16) +DO_STN_2(1, hd, MO_64, MO_16) +DO_STN_2(2, hh, MO_16, MO_16) +DO_STN_2(3, hh, MO_16, MO_16) +DO_STN_2(4, hh, MO_16, MO_16) + +DO_STN_2(1, ss, MO_32, MO_32) +DO_STN_2(1, sd, MO_64, MO_32) +DO_STN_2(2, ss, MO_32, MO_32) +DO_STN_2(3, ss, MO_32, MO_32) +DO_STN_2(4, ss, MO_32, MO_32) + +DO_STN_2(1, dd, MO_64, MO_64) +DO_STN_2(2, dd, MO_64, MO_64) +DO_STN_2(3, dd, MO_64, MO_64) +DO_STN_2(4, dd, MO_64, MO_64) #undef DO_STN_1 #undef DO_STN_2 @@ -4878,497 +5361,578 @@ typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs); static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs) { - return *(uint32_t *)((char *)reg + H1_4(reg_ofs)); + return *(uint32_t *)((char*)reg + H1_4(reg_ofs)); } static target_ulong off_zss_s(void *reg, intptr_t reg_ofs) { - return *(int32_t *)((char *)reg + H1_4(reg_ofs)); + return *(int32_t *)((char*)reg + H1_4(reg_ofs)); } static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs) { - return (uint32_t)*(uint64_t *)((char *)reg + reg_ofs); + return (uint32_t)*(uint64_t *)((char*)reg + reg_ofs); } static target_ulong off_zss_d(void *reg, intptr_t reg_ofs) { - return (int32_t)*(uint64_t *)((char *)reg + reg_ofs); + return (int32_t)*(uint64_t *)((char*)reg + reg_ofs); } static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) { - return *(uint64_t *)((char *)reg + reg_ofs); + return *(uint64_t *)((char*)reg + reg_ofs); } -static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, + uint32_t desc, uintptr_t retaddr, uint32_t mtedesc, int esize, + int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch = { 0 }; + uc_engine *uc = env->uc; + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + ARMVectorReg scratch; + intptr_t reg_off; + SVEHostPage info, info2; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + memset(&scratch, 0, reg_max); + reg_off = 0; + do { + uint64_t pg = vg[reg_off >> 6]; do { if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i); - tlb_fn(env, &scratch, i, base + (off << scale), oi, ra); + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + target_ulong in_page = -(addr | TARGET_PAGE_MASK); + + sve_probe_page(&info, false, env, addr, 0, MMU_DATA_LOAD, + mmu_idx, retaddr); + + if (likely(in_page >= msize)) { + if (unlikely(info.flags & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, msize, + info.attrs, BP_MEM_READ, retaddr); + } + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } + host_fn(&scratch, reg_off, info.host); + } else { + /* Element crosses the page boundary. */ + sve_probe_page(&info2, false, env, addr + in_page, 0, + MMU_DATA_LOAD, mmu_idx, retaddr); + if (unlikely((info.flags | info2.flags) & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, msize, + info.attrs, BP_MEM_READ, retaddr); + } + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } + tlb_fn(env, &scratch, reg_off, addr, retaddr); + } } - i += 4, pg >>= 4; - } while (i & 15); - } - clear_helper_retaddr(); + reg_off += esize; + pg >>= esize; + } while (reg_off & 63); + } while (reg_off < reg_max); /* Wait until all exceptions have been raised to write back. */ - memcpy(vd, &scratch, oprsz); + memcpy(vd, &scratch, reg_max); } -static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, int esize, + int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc) / 8; - ARMVectorReg scratch = { 0 }; + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - set_helper_retaddr(ra); - for (i = 0; i < oprsz; i++) { - uint8_t pg = *(uint8_t *)((char *)vg + H1(i)); - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i * 8); - tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra); - } + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_ld1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esize, msize, + off_fn, host_fn, tlb_fn); +} + +#define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \ + void HELPER(sve_ld##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << (MSZ), \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } \ + void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << (MSZ), \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } - clear_helper_retaddr(); - /* Wait until all exceptions have been raised to write back. */ - memcpy(vd, &scratch, oprsz * 8); -} - -#define DO_LD1_ZPZ_S(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_ld1##MEM##_tlb); \ -} - -#define DO_LD1_ZPZ_D(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_ld1##MEM##_tlb); \ -} - -DO_LD1_ZPZ_S(bsu, zsu) -DO_LD1_ZPZ_S(bsu, zss) -DO_LD1_ZPZ_D(bdu, zsu) -DO_LD1_ZPZ_D(bdu, zss) -DO_LD1_ZPZ_D(bdu, zd) - -DO_LD1_ZPZ_S(bss, zsu) -DO_LD1_ZPZ_S(bss, zss) -DO_LD1_ZPZ_D(bds, zsu) -DO_LD1_ZPZ_D(bds, zss) -DO_LD1_ZPZ_D(bds, zd) - -DO_LD1_ZPZ_S(hsu_le, zsu) -DO_LD1_ZPZ_S(hsu_le, zss) -DO_LD1_ZPZ_D(hdu_le, zsu) -DO_LD1_ZPZ_D(hdu_le, zss) -DO_LD1_ZPZ_D(hdu_le, zd) - -DO_LD1_ZPZ_S(hsu_be, zsu) -DO_LD1_ZPZ_S(hsu_be, zss) -DO_LD1_ZPZ_D(hdu_be, zsu) -DO_LD1_ZPZ_D(hdu_be, zss) -DO_LD1_ZPZ_D(hdu_be, zd) - -DO_LD1_ZPZ_S(hss_le, zsu) -DO_LD1_ZPZ_S(hss_le, zss) -DO_LD1_ZPZ_D(hds_le, zsu) -DO_LD1_ZPZ_D(hds_le, zss) -DO_LD1_ZPZ_D(hds_le, zd) - -DO_LD1_ZPZ_S(hss_be, zsu) -DO_LD1_ZPZ_S(hss_be, zss) -DO_LD1_ZPZ_D(hds_be, zsu) -DO_LD1_ZPZ_D(hds_be, zss) -DO_LD1_ZPZ_D(hds_be, zd) - -DO_LD1_ZPZ_S(ss_le, zsu) -DO_LD1_ZPZ_S(ss_le, zss) -DO_LD1_ZPZ_D(sdu_le, zsu) -DO_LD1_ZPZ_D(sdu_le, zss) -DO_LD1_ZPZ_D(sdu_le, zd) - -DO_LD1_ZPZ_S(ss_be, zsu) -DO_LD1_ZPZ_S(ss_be, zss) -DO_LD1_ZPZ_D(sdu_be, zsu) -DO_LD1_ZPZ_D(sdu_be, zss) -DO_LD1_ZPZ_D(sdu_be, zd) - -DO_LD1_ZPZ_D(sds_le, zsu) -DO_LD1_ZPZ_D(sds_le, zss) -DO_LD1_ZPZ_D(sds_le, zd) - -DO_LD1_ZPZ_D(sds_be, zsu) -DO_LD1_ZPZ_D(sds_be, zss) -DO_LD1_ZPZ_D(sds_be, zd) - -DO_LD1_ZPZ_D(dd_le, zsu) -DO_LD1_ZPZ_D(dd_le, zss) -DO_LD1_ZPZ_D(dd_le, zd) - -DO_LD1_ZPZ_D(dd_be, zsu) -DO_LD1_ZPZ_D(dd_be, zss) -DO_LD1_ZPZ_D(dd_be, zd) +#define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \ + void HELPER(sve_ld##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << (MSZ), \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } \ + void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << (MSZ), \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } + +DO_LD1_ZPZ_S(bsu, zsu, MO_8) +DO_LD1_ZPZ_S(bsu, zss, MO_8) +DO_LD1_ZPZ_D(bdu, zsu, MO_8) +DO_LD1_ZPZ_D(bdu, zss, MO_8) +DO_LD1_ZPZ_D(bdu, zd, MO_8) + +DO_LD1_ZPZ_S(bss, zsu, MO_8) +DO_LD1_ZPZ_S(bss, zss, MO_8) +DO_LD1_ZPZ_D(bds, zsu, MO_8) +DO_LD1_ZPZ_D(bds, zss, MO_8) +DO_LD1_ZPZ_D(bds, zd, MO_8) + +DO_LD1_ZPZ_S(hsu_le, zsu, MO_16) +DO_LD1_ZPZ_S(hsu_le, zss, MO_16) +DO_LD1_ZPZ_D(hdu_le, zsu, MO_16) +DO_LD1_ZPZ_D(hdu_le, zss, MO_16) +DO_LD1_ZPZ_D(hdu_le, zd, MO_16) + +DO_LD1_ZPZ_S(hsu_be, zsu, MO_16) +DO_LD1_ZPZ_S(hsu_be, zss, MO_16) +DO_LD1_ZPZ_D(hdu_be, zsu, MO_16) +DO_LD1_ZPZ_D(hdu_be, zss, MO_16) +DO_LD1_ZPZ_D(hdu_be, zd, MO_16) + +DO_LD1_ZPZ_S(hss_le, zsu, MO_16) +DO_LD1_ZPZ_S(hss_le, zss, MO_16) +DO_LD1_ZPZ_D(hds_le, zsu, MO_16) +DO_LD1_ZPZ_D(hds_le, zss, MO_16) +DO_LD1_ZPZ_D(hds_le, zd, MO_16) + +DO_LD1_ZPZ_S(hss_be, zsu, MO_16) +DO_LD1_ZPZ_S(hss_be, zss, MO_16) +DO_LD1_ZPZ_D(hds_be, zsu, MO_16) +DO_LD1_ZPZ_D(hds_be, zss, MO_16) +DO_LD1_ZPZ_D(hds_be, zd, MO_16) + +DO_LD1_ZPZ_S(ss_le, zsu, MO_32) +DO_LD1_ZPZ_S(ss_le, zss, MO_32) +DO_LD1_ZPZ_D(sdu_le, zsu, MO_32) +DO_LD1_ZPZ_D(sdu_le, zss, MO_32) +DO_LD1_ZPZ_D(sdu_le, zd, MO_32) + +DO_LD1_ZPZ_S(ss_be, zsu, MO_32) +DO_LD1_ZPZ_S(ss_be, zss, MO_32) +DO_LD1_ZPZ_D(sdu_be, zsu, MO_32) +DO_LD1_ZPZ_D(sdu_be, zss, MO_32) +DO_LD1_ZPZ_D(sdu_be, zd, MO_32) + +DO_LD1_ZPZ_D(sds_le, zsu, MO_32) +DO_LD1_ZPZ_D(sds_le, zss, MO_32) +DO_LD1_ZPZ_D(sds_le, zd, MO_32) + +DO_LD1_ZPZ_D(sds_be, zsu, MO_32) +DO_LD1_ZPZ_D(sds_be, zss, MO_32) +DO_LD1_ZPZ_D(sds_be, zd, MO_32) + +DO_LD1_ZPZ_D(dd_le, zsu, MO_64) +DO_LD1_ZPZ_D(dd_le, zss, MO_64) +DO_LD1_ZPZ_D(dd_le, zd, MO_64) + +DO_LD1_ZPZ_D(dd_be, zsu, MO_64) +DO_LD1_ZPZ_D(dd_be, zss, MO_64) +DO_LD1_ZPZ_D(dd_be, zd, MO_64) #undef DO_LD1_ZPZ_S #undef DO_LD1_ZPZ_D /* First fault loads with a vector index. */ -/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting. - * The controlling predicate is known to be true. Return true if the - * load was successful. - */ -typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off, - target_ulong vaddr, int mmu_idx); - -#ifdef _MSC_VER -#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ -static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, int mmu_idx) \ -{ \ - struct uc_struct *uc = env->uc; \ - target_ulong next_page = 0ULL - (addr | TARGET_PAGE_MASK); \ - if (likely(next_page - addr >= sizeof(TYPEM))) { \ - void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ - if (likely(host)) { \ - TYPEM val = HOST(host); \ - *(TYPEE *)((char *)vd + H(reg_off)) = val; \ - return true; \ - } \ - } \ - return false; \ -} -#else -#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ -static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, int mmu_idx) \ -{ \ - struct uc_struct *uc = env->uc; \ - target_ulong next_page = -(addr | TARGET_PAGE_MASK); \ - if (likely(next_page - addr >= sizeof(TYPEM))) { \ - void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ - if (likely(host)) { \ - TYPEM val = HOST(host); \ - *(TYPEE *)((char *)vd + H(reg_off)) = val; \ - return true; \ - } \ - } \ - return false; \ -} -#endif - -DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p) -DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p) -DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p) -DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p) - -DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p) -DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p) -DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p) -DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p) -DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p) -DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p) -DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p) -DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p) - -DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p) -DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p) -DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p) -DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p) -DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p) -DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p) - -DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p) -DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p) - /* - * Common helper for all gather first-faulting loads. + * Common helpers for all gather first-faulting loads. */ -static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, - sve_ld1_nf_fn *nonfault_fn) + +static inline QEMU_ALWAYS_INLINE void +sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + uint32_t mtedesc, const int esz, const int msz, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t reg_off, reg_max = simd_oprsz(desc); - target_ulong addr; + uc_engine *uc = env->uc; + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + const int esize = 1 << esz; + const int msize = 1 << msz; + intptr_t reg_off; + SVEHostPage info; + target_ulong addr, in_page; /* Skip to the first true predicate. */ - reg_off = find_next_active(vg, 0, reg_max, MO_32); - if (likely(reg_off < reg_max)) { - /* Perform one normal read, which will fault or not. */ - set_helper_retaddr(ra); - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - tlb_fn(env, vd, reg_off, addr, oi, ra); + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off >= reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } - /* The rest of the reads will be non-faulting. */ - clear_helper_retaddr(); + /* + * Probe the first element, allowing faults. + */ + addr = base + (off_fn(vm, reg_off) << scale); + if (mtedesc) { + mte_check1(env, mtedesc, addr, retaddr); } + tlb_fn(env, vd, reg_off, addr, retaddr); - /* After any fault, zero the leading predicated false elements. */ + /* After any fault, zero the other elements. */ swap_memzero(vd, reg_off); + reg_off += esize; + swap_memzero((char*)vd + reg_off, reg_max - reg_off); - while (likely((reg_off += 4) < reg_max)) { - uint64_t pg = *(uint64_t *)((char *)vg + (reg_off >> 6) * 8); - if (likely((pg >> (reg_off & 63)) & 1)) { - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { - record_fault(env, reg_off, reg_max); - break; + /* + * Probe the remaining elements, not allowing faults. + */ + while (reg_off < reg_max) { + uint64_t pg = vg[reg_off >> 6]; + do { + if (likely((pg >> (reg_off & 63)) & 1)) { + addr = base + (off_fn(vm, reg_off) << scale); + in_page = -(addr | TARGET_PAGE_MASK); + + if (unlikely(in_page < msize)) { + /* Stop if the element crosses a page boundary. */ + goto fault; + } + + sve_probe_page(&info, true, env, addr, 0, MMU_DATA_LOAD, + mmu_idx, retaddr); + if (unlikely(info.flags & (TLB_INVALID_MASK | TLB_MMIO))) { + goto fault; + } + if (unlikely(info.flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches(env_cpu(env), addr, msize) & + BP_MEM_READ)) { + goto fault; + } + if (mtedesc && arm_tlb_mte_tagged(&info.attrs) && + !mte_probe1(env, mtedesc, addr)) { + goto fault; + } + + host_fn(vd, reg_off, info.host); } - } else { - *(uint32_t *)((char *)vd + H1_4(reg_off)) = 0; - } + reg_off += esize; + } while (reg_off & 63); } + return; + +fault: + record_fault(env, reg_off, reg_max); } -static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, - sve_ld1_nf_fn *nonfault_fn) +static inline QEMU_ALWAYS_INLINE void +sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + const int esz, const int msz, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t reg_off, reg_max = simd_oprsz(desc); - target_ulong addr; - - /* Skip to the first true predicate. */ - reg_off = find_next_active(vg, 0, reg_max, MO_64); - if (likely(reg_off < reg_max)) { - /* Perform one normal read, which will fault or not. */ - set_helper_retaddr(ra); - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - tlb_fn(env, vd, reg_off, addr, oi, ra); + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* The rest of the reads will be non-faulting. */ - clear_helper_retaddr(); + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_ldff1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esz, msz, off_fn, + host_fn, tlb_fn); +} + +#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ + void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } \ + void HELPER(sve_ldff##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, \ + sve_ld1##MEM##_tlb); \ } - /* After any fault, zero the leading predicated false elements. */ - swap_memzero(vd, reg_off); - - while (likely((reg_off += 8) < reg_max)) { - uint8_t pg = *(uint8_t *)((char *)vg + H1(reg_off >> 3)); - if (likely(pg & 1)) { - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { - record_fault(env, reg_off, reg_max); - break; - } - } else { - *(uint64_t *)((char *)vd + reg_off) = 0; - } +#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ + void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } \ + void HELPER(sve_ldff##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, \ + sve_ld1##MEM##_tlb); \ } -} -#define DO_LDFF1_ZPZ_S(MEM, OFS) \ -void HELPER(sve_ldff##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ -} - -#define DO_LDFF1_ZPZ_D(MEM, OFS) \ -void HELPER(sve_ldff##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ -} - -DO_LDFF1_ZPZ_S(bsu, zsu) -DO_LDFF1_ZPZ_S(bsu, zss) -DO_LDFF1_ZPZ_D(bdu, zsu) -DO_LDFF1_ZPZ_D(bdu, zss) -DO_LDFF1_ZPZ_D(bdu, zd) - -DO_LDFF1_ZPZ_S(bss, zsu) -DO_LDFF1_ZPZ_S(bss, zss) -DO_LDFF1_ZPZ_D(bds, zsu) -DO_LDFF1_ZPZ_D(bds, zss) -DO_LDFF1_ZPZ_D(bds, zd) - -DO_LDFF1_ZPZ_S(hsu_le, zsu) -DO_LDFF1_ZPZ_S(hsu_le, zss) -DO_LDFF1_ZPZ_D(hdu_le, zsu) -DO_LDFF1_ZPZ_D(hdu_le, zss) -DO_LDFF1_ZPZ_D(hdu_le, zd) - -DO_LDFF1_ZPZ_S(hsu_be, zsu) -DO_LDFF1_ZPZ_S(hsu_be, zss) -DO_LDFF1_ZPZ_D(hdu_be, zsu) -DO_LDFF1_ZPZ_D(hdu_be, zss) -DO_LDFF1_ZPZ_D(hdu_be, zd) - -DO_LDFF1_ZPZ_S(hss_le, zsu) -DO_LDFF1_ZPZ_S(hss_le, zss) -DO_LDFF1_ZPZ_D(hds_le, zsu) -DO_LDFF1_ZPZ_D(hds_le, zss) -DO_LDFF1_ZPZ_D(hds_le, zd) - -DO_LDFF1_ZPZ_S(hss_be, zsu) -DO_LDFF1_ZPZ_S(hss_be, zss) -DO_LDFF1_ZPZ_D(hds_be, zsu) -DO_LDFF1_ZPZ_D(hds_be, zss) -DO_LDFF1_ZPZ_D(hds_be, zd) - -DO_LDFF1_ZPZ_S(ss_le, zsu) -DO_LDFF1_ZPZ_S(ss_le, zss) -DO_LDFF1_ZPZ_D(sdu_le, zsu) -DO_LDFF1_ZPZ_D(sdu_le, zss) -DO_LDFF1_ZPZ_D(sdu_le, zd) - -DO_LDFF1_ZPZ_S(ss_be, zsu) -DO_LDFF1_ZPZ_S(ss_be, zss) -DO_LDFF1_ZPZ_D(sdu_be, zsu) -DO_LDFF1_ZPZ_D(sdu_be, zss) -DO_LDFF1_ZPZ_D(sdu_be, zd) - -DO_LDFF1_ZPZ_D(sds_le, zsu) -DO_LDFF1_ZPZ_D(sds_le, zss) -DO_LDFF1_ZPZ_D(sds_le, zd) - -DO_LDFF1_ZPZ_D(sds_be, zsu) -DO_LDFF1_ZPZ_D(sds_be, zss) -DO_LDFF1_ZPZ_D(sds_be, zd) - -DO_LDFF1_ZPZ_D(dd_le, zsu) -DO_LDFF1_ZPZ_D(dd_le, zss) -DO_LDFF1_ZPZ_D(dd_le, zd) - -DO_LDFF1_ZPZ_D(dd_be, zsu) -DO_LDFF1_ZPZ_D(dd_be, zss) -DO_LDFF1_ZPZ_D(dd_be, zd) +DO_LDFF1_ZPZ_S(bsu, zsu, MO_8) +DO_LDFF1_ZPZ_S(bsu, zss, MO_8) +DO_LDFF1_ZPZ_D(bdu, zsu, MO_8) +DO_LDFF1_ZPZ_D(bdu, zss, MO_8) +DO_LDFF1_ZPZ_D(bdu, zd, MO_8) + +DO_LDFF1_ZPZ_S(bss, zsu, MO_8) +DO_LDFF1_ZPZ_S(bss, zss, MO_8) +DO_LDFF1_ZPZ_D(bds, zsu, MO_8) +DO_LDFF1_ZPZ_D(bds, zss, MO_8) +DO_LDFF1_ZPZ_D(bds, zd, MO_8) + +DO_LDFF1_ZPZ_S(hsu_le, zsu, MO_16) +DO_LDFF1_ZPZ_S(hsu_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zsu, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zd, MO_16) + +DO_LDFF1_ZPZ_S(hsu_be, zsu, MO_16) +DO_LDFF1_ZPZ_S(hsu_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zsu, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zd, MO_16) + +DO_LDFF1_ZPZ_S(hss_le, zsu, MO_16) +DO_LDFF1_ZPZ_S(hss_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zsu, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zd, MO_16) + +DO_LDFF1_ZPZ_S(hss_be, zsu, MO_16) +DO_LDFF1_ZPZ_S(hss_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zsu, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zd, MO_16) + +DO_LDFF1_ZPZ_S(ss_le, zsu, MO_32) +DO_LDFF1_ZPZ_S(ss_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zsu, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zd, MO_32) + +DO_LDFF1_ZPZ_S(ss_be, zsu, MO_32) +DO_LDFF1_ZPZ_S(ss_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zsu, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zd, MO_32) + +DO_LDFF1_ZPZ_D(sds_le, zsu, MO_32) +DO_LDFF1_ZPZ_D(sds_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sds_le, zd, MO_32) + +DO_LDFF1_ZPZ_D(sds_be, zsu, MO_32) +DO_LDFF1_ZPZ_D(sds_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sds_be, zd, MO_32) + +DO_LDFF1_ZPZ_D(dd_le, zsu, MO_64) +DO_LDFF1_ZPZ_D(dd_le, zss, MO_64) +DO_LDFF1_ZPZ_D(dd_le, zd, MO_64) + +DO_LDFF1_ZPZ_D(dd_be, zsu, MO_64) +DO_LDFF1_ZPZ_D(dd_be, zss, MO_64) +DO_LDFF1_ZPZ_D(dd_be, zd, MO_64) /* Stores with a vector index. */ -static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, + uint32_t desc, uintptr_t retaddr, uint32_t mtedesc, int esize, + int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc); + uc_engine *uc = env->uc; + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + void *host[ARM_MAX_VQ * 4]; + intptr_t reg_off, i; + SVEHostPage info, info2; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + /* + * Probe all of the elements for host addresses and flags. + */ + i = reg_off = 0; + do { + uint64_t pg = vg[reg_off >> 6]; do { - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i); - tlb_fn(env, vd, i, base + (off << scale), oi, ra); + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + target_ulong in_page = -(addr | TARGET_PAGE_MASK); + + host[i] = NULL; + if (likely((pg >> (reg_off & 63)) & 1)) { + if (likely(in_page >= msize)) { + sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE, + mmu_idx, retaddr); + host[i] = info.host; + } else { + /* + * Element crosses the page boundary. + * Probe both pages, but do not record the host address, + * so that we use the slow path. + */ + sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE, + mmu_idx, retaddr); + sve_probe_page(&info2, false, env, addr + in_page, 0, + MMU_DATA_STORE, mmu_idx, retaddr); + info.flags |= info2.flags; + } + + if (unlikely(info.flags & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, msize, info.attrs, + BP_MEM_WRITE, retaddr); + } + + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } } - i += 4, pg >>= 4; - } while (i & 15); - } - clear_helper_retaddr(); + i += 1; + reg_off += esize; + } while (reg_off & 63); + } while (reg_off < reg_max); + + /* + * Now that we have recognized all exceptions except SyncExternal + * (from TLB_MMIO), which we cannot avoid, perform all of the stores. + * + * Note for the common case of an element in RAM, not crossing a page + * boundary, we have stored the host address in host[]. This doubles + * as a first-level check against the predicate, since only enabled + * elements have non-null host addresses. + */ + i = reg_off = 0; + do { + void *h = host[i]; + if (likely(h != NULL)) { + host_fn(vd, reg_off, h); + } else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) { + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + tlb_fn(env, vd, reg_off, addr, retaddr); + } + i += 1; + reg_off += esize; + } while (reg_off < reg_max); } -static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, int esize, + int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc) / 8; + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - set_helper_retaddr(ra); - for (i = 0; i < oprsz; i++) { - uint8_t pg = *(uint8_t *)((char *)vg + H1(i)); - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i * 8); - tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra); - } + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_st1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esize, msize, + off_fn, host_fn, tlb_fn); +} + +#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ + void HELPER(sve_st##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ + } \ + void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } - clear_helper_retaddr(); -} - -#define DO_ST1_ZPZ_S(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_st1##MEM##_tlb); \ -} - -#define DO_ST1_ZPZ_D(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_st1##MEM##_tlb); \ -} - -DO_ST1_ZPZ_S(bs, zsu) -DO_ST1_ZPZ_S(hs_le, zsu) -DO_ST1_ZPZ_S(hs_be, zsu) -DO_ST1_ZPZ_S(ss_le, zsu) -DO_ST1_ZPZ_S(ss_be, zsu) - -DO_ST1_ZPZ_S(bs, zss) -DO_ST1_ZPZ_S(hs_le, zss) -DO_ST1_ZPZ_S(hs_be, zss) -DO_ST1_ZPZ_S(ss_le, zss) -DO_ST1_ZPZ_S(ss_be, zss) - -DO_ST1_ZPZ_D(bd, zsu) -DO_ST1_ZPZ_D(hd_le, zsu) -DO_ST1_ZPZ_D(hd_be, zsu) -DO_ST1_ZPZ_D(sd_le, zsu) -DO_ST1_ZPZ_D(sd_be, zsu) -DO_ST1_ZPZ_D(dd_le, zsu) -DO_ST1_ZPZ_D(dd_be, zsu) - -DO_ST1_ZPZ_D(bd, zss) -DO_ST1_ZPZ_D(hd_le, zss) -DO_ST1_ZPZ_D(hd_be, zss) -DO_ST1_ZPZ_D(sd_le, zss) -DO_ST1_ZPZ_D(sd_be, zss) -DO_ST1_ZPZ_D(dd_le, zss) -DO_ST1_ZPZ_D(dd_be, zss) - -DO_ST1_ZPZ_D(bd, zd) -DO_ST1_ZPZ_D(hd_le, zd) -DO_ST1_ZPZ_D(hd_be, zd) -DO_ST1_ZPZ_D(sd_le, zd) -DO_ST1_ZPZ_D(sd_be, zd) -DO_ST1_ZPZ_D(dd_le, zd) -DO_ST1_ZPZ_D(dd_be, zd) + +#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ + void HELPER(sve_st##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ + } \ + void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ + } + +DO_ST1_ZPZ_S(bs, zsu, MO_8) +DO_ST1_ZPZ_S(hs_le, zsu, MO_16) +DO_ST1_ZPZ_S(hs_be, zsu, MO_16) +DO_ST1_ZPZ_S(ss_le, zsu, MO_32) +DO_ST1_ZPZ_S(ss_be, zsu, MO_32) + +DO_ST1_ZPZ_S(bs, zss, MO_8) +DO_ST1_ZPZ_S(hs_le, zss, MO_16) +DO_ST1_ZPZ_S(hs_be, zss, MO_16) +DO_ST1_ZPZ_S(ss_le, zss, MO_32) +DO_ST1_ZPZ_S(ss_be, zss, MO_32) + +DO_ST1_ZPZ_D(bd, zsu, MO_8) +DO_ST1_ZPZ_D(hd_le, zsu, MO_16) +DO_ST1_ZPZ_D(hd_be, zsu, MO_16) +DO_ST1_ZPZ_D(sd_le, zsu, MO_32) +DO_ST1_ZPZ_D(sd_be, zsu, MO_32) +DO_ST1_ZPZ_D(dd_le, zsu, MO_64) +DO_ST1_ZPZ_D(dd_be, zsu, MO_64) + +DO_ST1_ZPZ_D(bd, zss, MO_8) +DO_ST1_ZPZ_D(hd_le, zss, MO_16) +DO_ST1_ZPZ_D(hd_be, zss, MO_16) +DO_ST1_ZPZ_D(sd_le, zss, MO_32) +DO_ST1_ZPZ_D(sd_be, zss, MO_32) +DO_ST1_ZPZ_D(dd_le, zss, MO_64) +DO_ST1_ZPZ_D(dd_be, zss, MO_64) + +DO_ST1_ZPZ_D(bd, zd, MO_8) +DO_ST1_ZPZ_D(hd_le, zd, MO_16) +DO_ST1_ZPZ_D(hd_be, zd, MO_16) +DO_ST1_ZPZ_D(sd_le, zd, MO_32) +DO_ST1_ZPZ_D(sd_be, zd, MO_32) +DO_ST1_ZPZ_D(dd_le, zd, MO_64) +DO_ST1_ZPZ_D(dd_be, zd, MO_64) #undef DO_ST1_ZPZ_S #undef DO_ST1_ZPZ_D diff --git a/qemu/target/arm/tlb_helper.c b/qemu/target/arm/tlb_helper.c index e19d6c17a3..c3335f75ac 100644 --- a/qemu/target/arm/tlb_helper.c +++ b/qemu/target/arm/tlb_helper.c @@ -31,7 +31,7 @@ static inline uint32_t merge_syn_data_abort(uint32_t template_syn, * ISV field. */ if (!(template_syn & ARM_EL_ISV) || target_el != 2 || s1ptw) { - syn = syn_data_abort_no_iss(same_el, + syn = syn_data_abort_no_iss(same_el, 0, ea, 0, s1ptw, is_write, fsc); } else { /* @@ -154,6 +154,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, int prot, ret; MemTxAttrs attrs = { 0 }; ARMMMUFaultInfo fi = { 0 }; + ARMCacheAttrs cacheattrs = {}; /* * Walk the page table and (if the mapping exists) add the page @@ -163,7 +164,8 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, */ ret = get_phys_addr(&cpu->env, address, access_type, core_to_arm_mmu_idx(&cpu->env, mmu_idx), - &phys_addr, &attrs, &prot, &page_size, &fi, NULL); + &phys_addr, &attrs, &prot, &page_size, + &fi, &cacheattrs); if (likely(!ret)) { /* * Map a single [sub]page. Regions smaller than our declared diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 922976536e..9d8cc18836 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -38,11 +38,9 @@ #include "kvm-consts.h" static const char *regnames[] = { - "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", - "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", - "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" -}; + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", + "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"}; enum a64_shift_type { A64_SHIFT_TYPE_LSL = 0, @@ -62,40 +60,23 @@ typedef struct AArch64DecodeTable { AArch64DecodeFn *disas_fn; } AArch64DecodeTable; -/* Function prototype for gen_ functions for calling Neon helpers */ -typedef void NeonGenOneOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32); -typedef void NeonGenTwoOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32); -typedef void NeonGenTwoOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); -typedef void NeonGenTwo64OpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64); -typedef void NeonGenTwo64OpEnvFn(TCGContext *, TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); -typedef void NeonGenNarrowFn(TCGContext *, TCGv_i32, TCGv_i64); -typedef void NeonGenNarrowEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i64); -typedef void NeonGenWidenFn(TCGContext *, TCGv_i64, TCGv_i32); -typedef void NeonGenTwoSingleOPFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); -typedef void NeonGenTwoDoubleOPFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); -typedef void NeonGenOneOpFn(TCGContext *, TCGv_i64, TCGv_i64); -typedef void CryptoTwoOpFn(TCGContext *, TCGv_ptr, TCGv_ptr); -typedef void CryptoThreeOpIntFn(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i32); -typedef void CryptoThreeOpFn(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr); -typedef void AtomicThreeOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); - /* initialize TCG globals. */ void a64_translate_init(struct uc_struct *uc) { int i; TCGContext *tcg_ctx = uc->tcg_ctx; - tcg_ctx->cpu_pc_arm64 = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env, - offsetof(CPUARMState, pc), - "pc"); + tcg_ctx->cpu_pc_arm64 = tcg_global_mem_new_i64( + tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, pc), "pc"); for (i = 0; i < 32; i++) { - tcg_ctx->cpu_X[i] = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env, - offsetof(CPUARMState, xregs[i]), - regnames[i]); + tcg_ctx->cpu_X[i] = tcg_global_mem_new_i64( + tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, xregs[i]), + regnames[i]); } - tcg_ctx->cpu_exclusive_high = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env, - offsetof(CPUARMState, exclusive_high), "exclusive_high"); + tcg_ctx->cpu_exclusive_high = tcg_global_mem_new_i64( + tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, exclusive_high), + "exclusive_high"); } /* @@ -140,7 +121,8 @@ static void reset_btype(DisasContext *s) TCGContext *tcg_ctx = s->uc->tcg_ctx; if (s->btype != 0) { TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0); - tcg_gen_st_i32(tcg_ctx, zero, tcg_ctx->cpu_env, offsetof(CPUARMState, btype)); + tcg_gen_st_i32(tcg_ctx, zero, tcg_ctx->cpu_env, + offsetof(CPUARMState, btype)); tcg_temp_free_i32(tcg_ctx, zero); s->btype = 0; } @@ -155,7 +137,8 @@ static void set_btype(DisasContext *s, int val) tcg_debug_assert(val >= 1 && val <= 3); tcg_val = tcg_const_i32(tcg_ctx, val); - tcg_gen_st_i32(tcg_ctx, tcg_val, tcg_ctx->cpu_env, offsetof(CPUARMState, btype)); + tcg_gen_st_i32(tcg_ctx, tcg_val, tcg_ctx->cpu_env, + offsetof(CPUARMState, btype)); tcg_temp_free_i32(tcg_ctx, tcg_val); s->btype = -1; } @@ -178,8 +161,8 @@ void gen_a64_set_pc_im(TCGContext *tcg_ctx, uint64_t val) * * Here We have concatenated TBI{1,0} into tbi. */ -static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, - TCGv_i64 src, int tbi) +static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, TCGv_i64 src, + int tbi) { TCGContext *tcg_ctx = s->uc->tcg_ctx; if (tbi == 0) { @@ -217,25 +200,118 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) } /* - * Return a "clean" address for ADDR according to TBID. - * This is always a fresh temporary, as we need to be able to - * increment this independently of a dirty write-back address. + * Handle MTE and/or TBI. + * + * For TBI, ideally, we would do nothing. Proper behaviour on fault is + * for the tag to be present in the FAR_ELx register. But for user-only + * mode we do not have a TLB with which to implement this, so we must + * remove the top byte now. + * + * Always return a fresh temporary that we can increment independently + * of the write-back address. */ -static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) +TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 clean = new_tmp_a64(s); - /* - * In order to get the correct value in the FAR_ELx register, - * we must present the memory subsystem with the "dirty" address - * including the TBI. In system mode we can make this work via - * the TLB, dropping the TBI during translation. But for user-only - * mode we don't have that option, and must remove the top byte now. - */ tcg_gen_mov_i64(tcg_ctx, clean, addr); return clean; } +/* Insert a zero tag into src, with the result at dst. */ +static void gen_address_with_allocation_tag0(TCGContext *tcg_ctx, TCGv_i64 dst, + TCGv_i64 src) +{ + tcg_gen_andi_i64(tcg_ctx, dst, src, ~MAKE_64BIT_MASK(56, 4)); +} + +static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, MMUAccessType acc, + int log2_size) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i32 t_acc = tcg_const_i32(tcg_ctx, acc); + TCGv_i32 t_idx = tcg_const_i32(tcg_ctx, get_mem_index(s)); + TCGv_i32 t_size = tcg_const_i32(tcg_ctx, 1 << log2_size); + + glue(gen_helper_probe_access, UNICORN_ARCH_POSTFIX)(tcg_ctx, tcg_ctx->cpu_env, ptr, t_acc, t_idx, + t_size); + tcg_temp_free_i32(tcg_ctx, t_acc); + tcg_temp_free_i32(tcg_ctx, t_idx); + tcg_temp_free_i32(tcg_ctx, t_size); +} + +/* + * For MTE, check a single logical or atomic access. This probes a single + * address, the exact one specified. The size and alignment of the access + * is not relevant to MTE, per se, but watchpoints do require the size, + * and we want to recognize those before making any other changes to state. + */ +static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, + bool is_write, bool tag_checked, + int log2_size, bool is_unpriv, + int core_idx) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (tag_checked && s->mte_active[is_unpriv]) { + TCGv_i32 tcg_desc; + TCGv_i64 ret; + int desc = 0; + + FIELD_DP32(desc, MTEDESC, MIDX, core_idx, desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc); + FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size, desc); + tcg_desc = tcg_const_i32(tcg_ctx, desc); + + ret = new_tmp_a64(s); + gen_helper_mte_check1(tcg_ctx, ret, tcg_ctx->cpu_env, tcg_desc, addr); + tcg_temp_free_i32(tcg_ctx, tcg_desc); + + return ret; + } + return clean_data_tbi(s, addr); +} + +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size) +{ + return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, + false, get_mem_index(s)); +} + +/* + * For MTE, check multiple logical sequential accesses. + */ +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_esize, int total_size) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) { + TCGv_i32 tcg_desc; + TCGv_i64 ret; + int desc = 0; + + FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc); + FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize, desc); + FIELD_DP32(desc, MTEDESC, TSIZE, total_size, desc); + tcg_desc = tcg_const_i32(tcg_ctx, desc); + + ret = new_tmp_a64(s); + gen_helper_mte_checkN(tcg_ctx, ret, tcg_ctx->cpu_env, tcg_desc, addr); + tcg_temp_free_i32(tcg_ctx, tcg_desc); + + return ret; + } + return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -248,7 +324,7 @@ static void a64_test_cc(TCGContext *tcg_ctx, DisasCompare64 *c64, int cc) arm_test_cc(tcg_ctx, &c32, cc); /* Sign-extend the 32-bit value so that the GE/LT comparisons work - * properly. The NE/EQ comparisons are also fine with this choice. */ + * properly. The NE/EQ comparisons are also fine with this choice. */ c64->cond = c32.cond; c64->value = tcg_temp_new_i64(tcg_ctx); tcg_gen_ext_i32_i64(tcg_ctx, c64->value, c32.value); @@ -390,6 +466,13 @@ TCGv_i64 new_tmp_a64(DisasContext *s) return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64(tcg_ctx); } +TCGv_i64 new_tmp_a64_local(DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + assert(s->tmp_a64_count < TMP_A64_MAX); + return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64(tcg_ctx); +} + TCGv_i64 new_tmp_a64_zero(DisasContext *s) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -505,7 +588,8 @@ static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 v = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ld16u_i32(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_16)); + tcg_gen_ld16u_i32(tcg_ctx, v, tcg_ctx->cpu_env, + fp_reg_offset(s, reg, MO_16)); return v; } @@ -518,14 +602,10 @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd) unsigned ofs = fp_reg_offset(s, rd, MO_64); unsigned vsz = vec_full_reg_size(s); - if (!is_q) { - TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0); - tcg_gen_st_i64(tcg_ctx, tcg_zero, tcg_ctx->cpu_env, ofs + 8); - tcg_temp_free_i64(tcg_ctx, tcg_zero); - } - if (vsz > 16) { - tcg_gen_gvec_dup8i(tcg_ctx, ofs + 16, vsz - 16, vsz - 16, 0); - } + TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0); + tcg_temp_free_i64(tcg_ctx, tcg_zero); + /* Nop move, with side effect of clearing the tail. */ + tcg_gen_gvec_mov(tcg_ctx, MO_64, ofs, ofs, is_q ? 16 : 8, vsz); } void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) @@ -571,8 +651,8 @@ static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, GVecGen2Fn *gvec_fn, int vece) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - is_q ? 16 : 8, vec_full_reg_size(s)); + gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), is_q ? 16 : 8, vec_full_reg_size(s)); } /* Expand a 2-operand + immediate AdvSIMD vector operation using @@ -582,8 +662,9 @@ static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, int64_t imm, GVecGen2iFn *gvec_fn, int vece) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - imm, is_q ? 16 : 8, vec_full_reg_size(s)); + gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), imm, is_q ? 16 : 8, + vec_full_reg_size(s)); } /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ @@ -591,8 +672,9 @@ static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, GVecGen3Fn *gvec_fn, int vece) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); + gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s)); } /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ @@ -600,56 +682,31 @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, int rx, GVecGen4Fn *gvec_fn, int vece) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), - is_q ? 16 : 8, vec_full_reg_size(s)); -} - -/* Expand a 2-operand + immediate AdvSIMD vector operation using - * an op descriptor. - */ -static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd, - int rn, int64_t imm, const GVecGen2i *gvec_op) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_gvec_2i(tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op); + gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rx), is_q ? 16 : 8, vec_full_reg_size(s)); } -/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */ -static void gen_gvec_op3(DisasContext *s, bool is_q, int rd, - int rn, int rm, const GVecGen3 *gvec_op) +/* Expand a 2-operand operation using an out-of-line helper. */ +static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, int rn, + int data, gen_helper_gvec_2 *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_gvec_3(tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), is_q ? 16 : 8, - vec_full_reg_size(s), gvec_op); + tcg_gen_gvec_2_ool(tcg_ctx, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), is_q ? 16 : 8, + vec_full_reg_size(s), data, fn); } /* Expand a 3-operand operation using an out-of-line helper. */ -static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, - int rn, int rm, int data, gen_helper_gvec_3 *fn) +static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, int rn, int rm, + int data, gen_helper_gvec_3 *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s), data, fn); } -/* Expand a 3-operand + env pointer operation using - * an out-of-line helper. - */ -static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd, - int rn, int rm, gen_helper_gvec_3_ptr *fn) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, - is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); -} - /* Expand a 3-operand + fpstatus pointer + simd data value operation using * an out-of-line helper. */ @@ -660,9 +717,8 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, is_fp16); tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), fpst, - is_q ? 16 : 8, vec_full_reg_size(s), data, fn); + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), + fpst, is_q ? 16 : 8, vec_full_reg_size(s), data, fn); tcg_temp_free_ptr(tcg_ctx, fpst); } @@ -689,7 +745,8 @@ static inline void gen_logic_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 result) } /* dest = T0 + T1; compute C, N, V and Z flags */ -static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, + TCGv_i64 t1) { if (sf) { TCGv_i64 result, flag, tmp; @@ -722,7 +779,8 @@ static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, tcg_gen_movi_i32(tcg_ctx, tmp, 0); tcg_gen_extrl_i64_i32(tcg_ctx, t0_32, t0); tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1); - tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, t1_32, tmp); + tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, + t1_32, tmp); tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF); tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32); tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32); @@ -736,7 +794,8 @@ static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, } /* dest = T0 - T1; compute C, N, V and Z flags */ -static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, + TCGv_i64 t1) { if (sf) { /* 64 bit arithmetic */ @@ -770,7 +829,8 @@ static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1); tcg_gen_sub_i32(tcg_ctx, tcg_ctx->cpu_NF, t0_32, t1_32); tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF); - tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GEU, tcg_ctx->cpu_CF, t0_32, t1_32); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GEU, tcg_ctx->cpu_CF, t0_32, + t1_32); tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32); tmp = tcg_temp_new_i32(tcg_ctx); tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32); @@ -783,7 +843,8 @@ static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, } /* dest = T0 + T1 + CF; do not compute flags. */ -static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, + TCGv_i64 t1) { TCGv_i64 flag = tcg_temp_new_i64(tcg_ctx); tcg_gen_extu_i32_i64(tcg_ctx, flag, tcg_ctx->cpu_CF); @@ -797,7 +858,8 @@ static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCG } /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ -static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, + TCGv_i64 t1) { if (sf) { TCGv_i64 result, cf_64, vf_64, tmp; @@ -831,8 +893,10 @@ static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, tcg_gen_extrl_i64_i32(tcg_ctx, t0_32, t0); tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1); - tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, tcg_ctx->cpu_CF, tmp); - tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t1_32, tmp); + tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, + tcg_ctx->cpu_CF, tmp); + tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, + tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t1_32, tmp); tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF); tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32); @@ -855,9 +919,8 @@ static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, */ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, TCGv_i64 tcg_addr, int size, int memidx, - bool iss_valid, - unsigned int iss_srt, - bool iss_sf, bool iss_ar) + bool iss_valid, unsigned int iss_srt, bool iss_sf, + bool iss_ar) { TCGContext *tcg_ctx = s->uc->tcg_ctx; g_assert(size <= 3); @@ -866,36 +929,27 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, if (iss_valid) { uint32_t syn; - syn = syn_data_abort_with_iss(0, - size, - false, - iss_srt, - iss_sf, - iss_ar, + syn = syn_data_abort_with_iss(0, size, false, iss_srt, iss_sf, iss_ar, 0, 0, 0, 0, 0, false); disas_set_insn_syndrome(s, syn); } } -static void do_gpr_st(DisasContext *s, TCGv_i64 source, - TCGv_i64 tcg_addr, int size, - bool iss_valid, - unsigned int iss_srt, +static void do_gpr_st(DisasContext *s, TCGv_i64 source, TCGv_i64 tcg_addr, + int size, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s), - iss_valid, iss_srt, iss_sf, iss_ar); + do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s), iss_valid, + iss_srt, iss_sf, iss_ar); } /* * Load from memory to GPR register */ -static void do_gpr_ld_memidx(DisasContext *s, - TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, - bool extend, int memidx, - bool iss_valid, unsigned int iss_srt, - bool iss_sf, bool iss_ar) +static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + int size, bool is_signed, bool extend, int memidx, + bool iss_valid, unsigned int iss_srt, bool iss_sf, + bool iss_ar) { TCGContext *tcg_ctx = s->uc->tcg_ctx; MemOp memop = s->be_data + size; @@ -916,26 +970,18 @@ static void do_gpr_ld_memidx(DisasContext *s, if (iss_valid) { uint32_t syn; - syn = syn_data_abort_with_iss(0, - size, - is_signed, - iss_srt, - iss_sf, - iss_ar, - 0, 0, 0, 0, 0, false); + syn = syn_data_abort_with_iss(0, size, is_signed, iss_srt, iss_sf, + iss_ar, 0, 0, 0, 0, 0, false); disas_set_insn_syndrome(s, syn); } } -static void do_gpr_ld(DisasContext *s, - TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, bool extend, - bool iss_valid, unsigned int iss_srt, - bool iss_sf, bool iss_ar) +static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + int size, bool is_signed, bool extend, bool iss_valid, + unsigned int iss_srt, bool iss_sf, bool iss_ar) { do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend, - get_mem_index(s), - iss_valid, iss_srt, iss_sf, iss_ar); + get_mem_index(s), iss_valid, iss_srt, iss_sf, iss_ar); } /* @@ -946,7 +992,8 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) TCGContext *tcg_ctx = s->uc->tcg_ctx; /* This writes the bottom N bits of a 128 bit wide vector to memory */ TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx); - tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_offset(s, srcidx, MO_64)); + tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, + fp_reg_offset(s, srcidx, MO_64)); if (size < 4) { tcg_gen_qemu_st_i64(tcg_ctx, tmp, tcg_addr, get_mem_index(s), s->be_data + size); @@ -955,11 +1002,12 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(tcg_ctx); tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s), - s->be_data | MO_Q); - tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, srcidx)); - tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s), - s->be_data | MO_Q); + tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_hiaddr : tcg_addr, + get_mem_index(s), s->be_data | MO_Q); + tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, + fp_reg_hi_offset(s, srcidx)); + tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_addr : tcg_hiaddr, + get_mem_index(s), s->be_data | MO_Q); tcg_temp_free_i64(tcg_ctx, tcg_hiaddr); } @@ -974,11 +1022,10 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) TCGContext *tcg_ctx = s->uc->tcg_ctx; /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(tcg_ctx); - TCGv_i64 tmphi; + TCGv_i64 tmphi = NULL; if (size < 4) { MemOp memop = s->be_data + size; - tmphi = tcg_const_i64(tcg_ctx, 0); tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, tcg_addr, get_mem_index(s), memop); } else { bool be = s->be_data == MO_BE; @@ -988,20 +1035,24 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) tcg_hiaddr = tcg_temp_new_i64(tcg_ctx); tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s), - s->be_data | MO_Q); - tcg_gen_qemu_ld_i64(tcg_ctx, tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s), - s->be_data | MO_Q); + tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, be ? tcg_hiaddr : tcg_addr, + get_mem_index(s), s->be_data | MO_Q); + tcg_gen_qemu_ld_i64(tcg_ctx, tmphi, be ? tcg_addr : tcg_hiaddr, + get_mem_index(s), s->be_data | MO_Q); tcg_temp_free_i64(tcg_ctx, tcg_hiaddr); } - tcg_gen_st_i64(tcg_ctx, tmplo, tcg_ctx->cpu_env, fp_reg_offset(s, destidx, MO_64)); - tcg_gen_st_i64(tcg_ctx, tmphi, tcg_ctx->cpu_env, fp_reg_hi_offset(s, destidx)); + tcg_gen_st_i64(tcg_ctx, tmplo, tcg_ctx->cpu_env, + fp_reg_offset(s, destidx, MO_64)); tcg_temp_free_i64(tcg_ctx, tmplo); - tcg_temp_free_i64(tcg_ctx, tmphi); - clear_vec_high(s, true, destidx); + if (tmphi) { + tcg_gen_st_i64(tcg_ctx, tmphi, tcg_ctx->cpu_env, + fp_reg_hi_offset(s, destidx)); + tcg_temp_free_i64(tcg_ctx, tmphi); + } + clear_vec_high(s, tmphi != NULL, destidx); } /* @@ -1032,17 +1083,17 @@ static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, case MO_32: tcg_gen_ld32u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_8|MO_SIGN: + case MO_8 | MO_SIGN: tcg_gen_ld8s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_16|MO_SIGN: + case MO_16 | MO_SIGN: tcg_gen_ld16s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_32|MO_SIGN: + case MO_32 | MO_SIGN: tcg_gen_ld32s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; case MO_64: - case MO_64|MO_SIGN: + case MO_64 | MO_SIGN: tcg_gen_ld_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; default: @@ -1062,14 +1113,14 @@ static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, case MO_16: tcg_gen_ld16u_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_8|MO_SIGN: + case MO_8 | MO_SIGN: tcg_gen_ld8s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_16|MO_SIGN: + case MO_16 | MO_SIGN: tcg_gen_ld16s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; case MO_32: - case MO_32|MO_SIGN: + case MO_32 | MO_SIGN: tcg_gen_ld_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; default: @@ -1129,7 +1180,8 @@ static void do_vec_st(DisasContext *s, int srcidx, int element, TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx); read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), endian | size); + tcg_gen_qemu_st_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), + endian | size); tcg_temp_free_i64(tcg_ctx, tcg_tmp); } @@ -1141,7 +1193,8 @@ static void do_vec_ld(DisasContext *s, int destidx, int element, TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx); - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), endian | size); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), + endian | size); write_vec_element(s, tcg_tmp, destidx, element, size); tcg_temp_free_i64(tcg_ctx, tcg_tmp); @@ -1186,8 +1239,8 @@ bool sve_access_check(DisasContext *s) * optional shift. You will likely want to pass a temporary for the * destination register. See DecodeRegExtend() in the ARM ARM. */ -static void ext_and_shift_reg(TCGContext *tcg_ctx, TCGv_i64 tcg_out, TCGv_i64 tcg_in, - int option, unsigned int shift) +static void ext_and_shift_reg(TCGContext *tcg_ctx, TCGv_i64 tcg_out, + TCGv_i64 tcg_in, int option, unsigned int shift) { int extsize = extract32(option, 0, 2); bool is_signed = extract32(option, 2, 1); @@ -1319,8 +1372,8 @@ static void disas_comp_b_imm(DisasContext *s, uint32_t insn) label_match = gen_new_label(tcg_ctx); reset_btype(s); - tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, - tcg_cmp, 0, label_match); + tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, tcg_cmp, 0, + label_match); gen_goto_tb(s, 0, s->base.pc_next); gen_set_label(tcg_ctx, label_match); @@ -1351,8 +1404,8 @@ static void disas_test_b_imm(DisasContext *s, uint32_t insn) label_match = gen_new_label(tcg_ctx); reset_btype(s); - tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, - tcg_cmp, 0, label_match); + tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, tcg_cmp, 0, + label_match); tcg_temp_free_i64(tcg_ctx, tcg_cmp); gen_goto_tb(s, 0, s->base.pc_next); gen_set_label(tcg_ctx, label_match); @@ -1393,8 +1446,8 @@ static void disas_cond_b_imm(DisasContext *s, uint32_t insn) } /* HINT instruction group, including various allocated HINTs */ -static void handle_hint(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static void handle_hint(DisasContext *s, uint32_t insn, unsigned int op1, + unsigned int op2, unsigned int crm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; unsigned int selector = crm << 3 | op2; @@ -1431,71 +1484,80 @@ static void handle_hint(DisasContext *s, uint32_t insn, break; case 7: // 0b00111: /* XPACLRI */ if (s->pauth_active) { - gen_helper_xpaci(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30]); + gen_helper_xpaci(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30]); } break; case 8: // 0b01000: /* PACIA1716 */ if (s->pauth_active) { - gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); + gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); } break; case 0xa: // 0b01010: /* PACIB1716 */ if (s->pauth_active) { - gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); + gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); } break; case 0xc: // 0b01100: /* AUTIA1716 */ if (s->pauth_active) { - gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); + gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); } break; case 0xe: // 0b01110: /* AUTIB1716 */ if (s->pauth_active) { - gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); + gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); } break; case 0x18: // 0b11000: /* PACIAZ */ if (s->pauth_active) { - gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], - new_tmp_a64_zero(s)); + gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], new_tmp_a64_zero(s)); } break; case 0x19: // 0b11001: /* PACIASP */ if (s->pauth_active) { - gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); + gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); } break; case 0x1a: // 0b11010: /* PACIBZ */ if (s->pauth_active) { - gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], - new_tmp_a64_zero(s)); + gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], new_tmp_a64_zero(s)); } break; case 0x1b: // 0b11011: /* PACIBSP */ if (s->pauth_active) { - gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); + gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); } break; case 0x1c: // 0b11100: /* AUTIAZ */ if (s->pauth_active) { - gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], - new_tmp_a64_zero(s)); + gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], new_tmp_a64_zero(s)); } break; case 0x1d: // 0b11101: /* AUTIASP */ if (s->pauth_active) { - gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); + gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); } break; case 0x1e: // 0b11110: /* AUTIBZ */ if (s->pauth_active) { - gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], - new_tmp_a64_zero(s)); + gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], new_tmp_a64_zero(s)); } break; case 0x1f: // 0b11111: /* AUTIBSP */ if (s->pauth_active) { - gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); + gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); } break; default: @@ -1511,8 +1573,8 @@ static void gen_clrex(DisasContext *s, uint32_t insn) } /* CLREX, DSB, DMB, ISB */ -static void handle_sync(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static void handle_sync(DisasContext *s, uint32_t insn, unsigned int op1, + unsigned int op2, unsigned int crm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGBar bar; @@ -1601,19 +1663,22 @@ static void gen_xaflag(TCGContext *tcg_ctx) static void gen_axflag(TCGContext *tcg_ctx) { - tcg_gen_sari_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, 31); /* V ? -1 : 0 */ - tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, tcg_ctx->cpu_VF); /* C & !V */ + tcg_gen_sari_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, + 31); /* V ? -1 : 0 */ + tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, + tcg_ctx->cpu_VF); /* C & !V */ /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ - tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, tcg_ctx->cpu_VF); + tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, + tcg_ctx->cpu_VF); tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_NF, 0); tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_VF, 0); } /* MSR (immediate) - move immediate to processor state field */ -static void handle_msr_i(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static void handle_msr_i(DisasContext *s, uint32_t insn, unsigned int op1, + unsigned int op2, unsigned int crm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 t1; @@ -1695,7 +1760,28 @@ static void handle_msr_i(DisasContext *s, uint32_t insn, gen_helper_msr_i_daifclear(tcg_ctx, tcg_ctx->cpu_env, t1); tcg_temp_free_i32(tcg_ctx, t1); /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; + break; + + case 0x1c: /* TCO */ + if (dc_isar_feature(aa64_mte, s)) { + /* Full MTE is enabled -- set the TCO bit as directed. */ + if (crm & 1) { + set_pstate_bits(tcg_ctx, PSTATE_TCO); + } else { + clear_pstate_bits(tcg_ctx, PSTATE_TCO); + } + t1 = tcg_const_i32(tcg_ctx, s->current_el); + gen_helper_rebuild_hflags_a64(tcg_ctx, tcg_ctx->cpu_env, t1); + tcg_temp_free_i32(tcg_ctx, t1); + /* Many factors, including TCO, go into MTE_ACTIVE. */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; + } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { + /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ + s->base.is_jmp = DISAS_NEXT; + } else { + goto do_unallocated; + } break; default: @@ -1738,7 +1824,8 @@ static void gen_set_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt) tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_NF, nzcv, (1U << 31)); /* bit 30, Z */ tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_ZF, nzcv, (1 << 30)); - tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, 0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, + 0); /* bit 29, C */ tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_CF, nzcv, (1 << 29)); tcg_gen_shri_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, 29); @@ -1748,7 +1835,6 @@ static void gen_set_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt) tcg_temp_free_i32(tcg_ctx, nzcv); } - static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk) { uc_engine *uc = s->uc; @@ -1759,17 +1845,18 @@ static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk) tcg_skip = tcg_temp_new_i32(tcg_ctx); tcg_insn = tcg_const_i32(tcg_ctx, insn); - tcg_hk = tcg_const_ptr(tcg_ctx, (void*)hk); + tcg_hk = tcg_const_ptr(tcg_ctx, (void *)hk); // Sync pc in advance. gen_a64_set_pc_im(tcg_ctx, s->pc_curr); // Only one hook per instruction for SYS/SYSL/MRS/MSR is allowed. // This is intended and may be extended if it's really necessary. - gen_helper_uc_hooksys64(tcg_ctx, tcg_skip, tcg_ctx->cpu_env, tcg_insn, tcg_hk); + gen_helper_uc_hooksys64(tcg_ctx, tcg_skip, tcg_ctx->cpu_env, tcg_insn, + tcg_hk); tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tcg_skip, 0, label); - + tcg_temp_free_i32(tcg_ctx, tcg_skip); tcg_temp_free_i32(tcg_ctx, tcg_insn); tcg_temp_free_ptr(tcg_ctx, tcg_hk); @@ -1777,7 +1864,8 @@ static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk) return label; } -static void may_gen_set_label(DisasContext *s, TCGLabel *label) { +static void may_gen_set_label(DisasContext *s, TCGLabel *label) +{ if (label) { gen_set_label(s->uc->tcg_ctx, label); } @@ -1802,7 +1890,8 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, struct hook *hook; HOOK_FOREACH_VAR_DECLARE; - HOOK_FOREACH(uc, hook, UC_HOOK_INSN) { + HOOK_FOREACH(uc, hook, UC_HOOK_INSN) + { if (hook->to_delete) continue; @@ -1811,32 +1900,32 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, } switch (hook->insn) { - case UC_ARM64_INS_MRS: { - if (isread && (op0 == 2 || op0 == 3)) { - label = gen_hook_sys(s, insn, hook); - } - break; + case UC_ARM64_INS_MRS: { + if (isread && (op0 == 2 || op0 == 3)) { + label = gen_hook_sys(s, insn, hook); } - case UC_ARM64_INS_MSR: { - if (!isread && (op0 == 2 || op0 == 3)) { - label = gen_hook_sys(s, insn, hook); - } - break; + break; + } + case UC_ARM64_INS_MSR: { + if (!isread && (op0 == 2 || op0 == 3)) { + label = gen_hook_sys(s, insn, hook); } - case UC_ARM64_INS_SYSL: { - if (isread && op0 == 1) { - label = gen_hook_sys(s, insn, hook); - } - break; + break; + } + case UC_ARM64_INS_SYSL: { + if (isread && op0 == 1) { + label = gen_hook_sys(s, insn, hook); } - case UC_ARM64_INS_SYS: { - if (!isread && op0 == 1) { - label = gen_hook_sys(s, insn, hook); - } - break; + break; + } + case UC_ARM64_INS_SYS: { + if (!isread && op0 == 1) { + label = gen_hook_sys(s, insn, hook); } - default: - break; + break; + } + default: + break; } if (label) { @@ -1844,15 +1933,16 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, } } - ri = get_arm_cp_reginfo(s->cp_regs, - ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, - crn, crm, op0, op1, op2)); + ri = get_arm_cp_reginfo( + s->cp_regs, + ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)); if (!ri) { /* Unknown register; this might be a guest error or a QEMU * unimplemented feature. */ - qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " + qemu_log_mask(LOG_UNIMP, + "%s access to unsupported AArch64 " "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", isread ? "read" : "write", op0, op1, crn, crm, op2); unallocated_encoding(s); @@ -1880,7 +1970,8 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); tcg_syn = tcg_const_i32(tcg_ctx, syndrome); tcg_isread = tcg_const_i32(tcg_ctx, isread); - gen_helper_access_check_cp_reg(tcg_ctx, tcg_ctx->cpu_env, tmpptr, tcg_syn, tcg_isread); + gen_helper_access_check_cp_reg(tcg_ctx, tcg_ctx->cpu_env, tmpptr, + tcg_syn, tcg_isread); tcg_temp_free_ptr(tcg_ctx, tmpptr); tcg_temp_free_i32(tcg_ctx, tcg_syn); tcg_temp_free_i32(tcg_ctx, tcg_isread); @@ -1916,10 +2007,62 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, return; case ARM_CP_DC_ZVA: /* Writes clear the aligned block of memory which rt points into. */ - tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + if (s->mte_active[0]) { + TCGv_i32 t_desc; + int desc = 0; + + FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + t_desc = tcg_const_i32(tcg_ctx, desc); + + tcg_rt = new_tmp_a64(s); + gen_helper_mte_check_zva(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, t_desc, + cpu_reg(s, rt)); + tcg_temp_free_i32(tcg_ctx, t_desc); + } else { + tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + } gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, tcg_rt); may_gen_set_label(s, label); return; + case ARM_CP_DC_GVA: { + TCGv_i64 clean_addr, tag; + + /* + * DC_GVA, like DC_ZVA, requires that we supply the original + * pointer for an invalid page. Probe that address first. + */ + tcg_rt = cpu_reg(s, rt); + clean_addr = clean_data_tbi(s, tcg_rt); + gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); + + if (s->ata) { + /* Extract the tag from the register to match STZGM. */ + tag = tcg_temp_new_i64(tcg_ctx); + tcg_gen_shri_i64(tcg_ctx, tag, tcg_rt, 56); + gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, clean_addr, tag); + tcg_temp_free_i64(tcg_ctx, tag); + } + } + return; + case ARM_CP_DC_GZVA: { + TCGv_i64 clean_addr, tag; + + /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ + tcg_rt = cpu_reg(s, rt); + clean_addr = clean_data_tbi(s, tcg_rt); + gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, clean_addr); + + if (s->ata) { + /* Extract the tag from the register to match STZGM. */ + tag = tcg_temp_new_i64(tcg_ctx); + tcg_gen_shri_i64(tcg_ctx, tag, tcg_rt, 56); + gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, clean_addr, tag); + tcg_temp_free_i64(tcg_ctx, tag); + } + } + return; default: break; } @@ -1961,7 +2104,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { /* I/O operations must end the TB here (whether read or write) */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { /* @@ -1976,7 +2119,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, * but allow this to be suppressed by the register definition * (usually only necessary to work around guest bugs). */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } may_gen_set_label(s, label); @@ -2046,12 +2189,12 @@ static void disas_exc(DisasContext *s, uint32_t insn) * instruction works properly. */ switch (op2_ll) { - case 1: /* SVC */ + case 1: /* SVC */ gen_ss_advance(s); gen_exception_insn(s, s->base.pc_next, EXCP_SWI, syn_aa64_svc(imm16), default_exception_el(s)); break; - case 2: /* HVC */ + case 2: /* HVC */ if (s->current_el == 0) { unallocated_encoding(s); break; @@ -2065,7 +2208,7 @@ static void disas_exc(DisasContext *s, uint32_t insn) gen_exception_insn(s, s->base.pc_next, EXCP_HVC, syn_aa64_hvc(imm16), 2); break; - case 3: /* SMC */ + case 3: /* SMC */ if (s->current_el == 0) { unallocated_encoding(s); break; @@ -2144,7 +2287,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; unsigned int opc, op2, op3, rn, op4; - unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ + unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ TCGv_i64 dst; TCGv_i64 modifier; @@ -2194,9 +2337,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) if (s->pauth_active) { dst = new_tmp_a64(s); if (op3 == 2) { - gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier); + gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, + cpu_reg(s, rn), modifier); } else { - gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier); + gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, + cpu_reg(s, rn), modifier); } } else { dst = cpu_reg(s, rn); @@ -2226,9 +2371,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) dst = new_tmp_a64(s); modifier = cpu_reg_sp(s, op4); if (op3 == 2) { - gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier); + gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), + modifier); } else { - gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier); + gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), + modifier); } } else { dst = cpu_reg(s, rn); @@ -2268,9 +2415,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) if (s->pauth_active) { modifier = tcg_ctx->cpu_X[31]; if (op3 == 2) { - gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, dst, modifier); + gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, dst, + modifier); } else { - gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, dst, modifier); + gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, dst, + modifier); } } break; @@ -2326,14 +2475,18 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) static void disas_b_exc_sys(DisasContext *s, uint32_t insn) { switch (extract32(insn, 25, 7)) { - case 0x0a: case 0x0b: - case 0x4a: case 0x4b: /* Unconditional branch (immediate) */ + case 0x0a: + case 0x0b: + case 0x4a: + case 0x4b: /* Unconditional branch (immediate) */ disas_uncond_b_imm(s, insn); break; - case 0x1a: case 0x5a: /* Compare & branch (immediate) */ + case 0x1a: + case 0x5a: /* Compare & branch (immediate) */ disas_comp_b_imm(s, insn); break; - case 0x1b: case 0x5b: /* Test & branch (immediate) */ + case 0x1b: + case 0x5b: /* Test & branch (immediate) */ disas_test_b_imm(s, insn); break; case 0x2a: /* Conditional branch (immediate) */ @@ -2370,8 +2523,8 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) * races in multi-threaded linux-user and when MTTCG softmmu is * enabled. */ -static void gen_load_exclusive(DisasContext *s, int rt, int rt2, - TCGv_i64 addr, int size, bool is_pair) +static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i64 addr, + int size, bool is_pair) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int idx = get_mem_index(s); @@ -2383,13 +2536,18 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, if (size == 2) { /* The pair must be single-copy atomic for the doubleword. */ memop |= MO_64 | MO_ALIGN; - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, memop); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, + memop); if (s->be_data == MO_LE) { - tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val, 0, 32); - tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_val, 32, 32); + tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), + tcg_ctx->cpu_exclusive_val, 0, 32); + tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), + tcg_ctx->cpu_exclusive_val, 32, 32); } else { - tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val, 32, 32); - tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_val, 0, 32); + tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), + tcg_ctx->cpu_exclusive_val, 32, 32); + tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), + tcg_ctx->cpu_exclusive_val, 0, 32); } } else { /* The pair must be single-copy atomic for *each* doubleword, not @@ -2400,15 +2558,19 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx); tcg_gen_addi_i64(tcg_ctx, addr2, addr, 8); - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, addr2, idx, memop); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, addr2, + idx, memop); tcg_temp_free_i64(tcg_ctx, addr2); - tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val); - tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_high); + tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), + tcg_ctx->cpu_exclusive_val); + tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), + tcg_ctx->cpu_exclusive_high); } } else { memop |= size | MO_ALIGN; - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, memop); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, + memop); tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val); } tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, addr); @@ -2434,48 +2596,54 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, TCGLabel *done_label = gen_new_label(tcg_ctx); TCGv_i64 tmp; - tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr, fail_label); + tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr, + fail_label); tmp = tcg_temp_new_i64(tcg_ctx); if (is_pair) { if (size == 2) { if (s->be_data == MO_LE) { - tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); + tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt), + cpu_reg(s, rt2)); } else { - tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); - } - tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, - tcg_ctx->cpu_exclusive_val, tmp, - get_mem_index(s), - MO_64 | MO_ALIGN | s->be_data); - tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val); + tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt2), + cpu_reg(s, rt)); + } + tcg_gen_atomic_cmpxchg_i64( + tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, + tcg_ctx->cpu_exclusive_val, tmp, get_mem_index(s), + MO_64 | MO_ALIGN | s->be_data); + tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, + tcg_ctx->cpu_exclusive_val); } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { if (!HAVE_CMPXCHG128) { gen_helper_exit_atomic(tcg_ctx, tcg_ctx->cpu_env); s->base.is_jmp = DISAS_NORETURN; } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env, - tcg_ctx->cpu_exclusive_addr, - cpu_reg(s, rt), - cpu_reg(s, rt2)); + gen_helper_paired_cmpxchg64_le_parallel( + tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_be_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env, - tcg_ctx->cpu_exclusive_addr, - cpu_reg(s, rt), - cpu_reg(s, rt2)); + gen_helper_paired_cmpxchg64_be_parallel( + tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, + tcg_ctx->cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, + tcg_ctx->cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } } else { - tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, tcg_ctx->cpu_exclusive_val, - cpu_reg(s, rt), get_mem_index(s), + tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, + tcg_ctx->cpu_exclusive_val, cpu_reg(s, rt), + get_mem_index(s), size | MO_ALIGN | s->be_data); - tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val); + tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, + tcg_ctx->cpu_exclusive_val); } tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rd), tmp); tcg_temp_free_i64(tcg_ctx, tmp); @@ -2487,8 +2655,8 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, -1); } -static void gen_compare_and_swap(DisasContext *s, int rs, int rt, - int rn, int size) +static void gen_compare_and_swap(DisasContext *s, int rs, int rt, int rn, + int size) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rs = cpu_reg(s, rs); @@ -2499,13 +2667,13 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); - tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, - size | MO_ALIGN | s->be_data); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); + tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tcg_rs, clean_addr, tcg_rs, tcg_rt, + memidx, size | MO_ALIGN | s->be_data); } -static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, - int rn, int size) +static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, int rn, + int size) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 s1 = cpu_reg(s, rs); @@ -2518,7 +2686,9 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + + /* This is a single atomic access, despite the "pair". */ + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); if (size == 2) { TCGv_i64 cmp = tcg_temp_new_i64(tcg_ctx); @@ -2579,7 +2749,8 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, /* If compare equal, write back new data, else write back old data. */ tcg_gen_movcond_i64(tcg_ctx, TCG_COND_NE, c1, c2, zero, t1, d1); tcg_gen_movcond_i64(tcg_ctx, TCG_COND_NE, c2, c2, zero, t2, d2); - tcg_gen_qemu_st_i64(tcg_ctx, c1, clean_addr, memidx, MO_64 | s->be_data); + tcg_gen_qemu_st_i64(tcg_ctx, c1, clean_addr, memidx, + MO_64 | s->be_data); tcg_gen_qemu_st_i64(tcg_ctx, c2, a2, memidx, MO_64 | s->be_data); tcg_temp_free_i64(tcg_ctx, a2); tcg_temp_free_i64(tcg_ctx, c1); @@ -2644,7 +2815,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); return; @@ -2653,7 +2824,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = + gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); s->is_ldex = true; gen_load_exclusive(s, rt, rt2, clean_addr, size, false); if (is_lasr) { @@ -2673,7 +2845,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL); - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); return; @@ -2689,13 +2861,15 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = + gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ); return; - case 0x2: case 0x3: /* CASP / STXP */ + case 0x2: + case 0x3: /* CASP / STXP */ if (size & 2) { /* STXP / STLXP */ if (rn == 31) { gen_check_sp_alignment(s); @@ -2703,25 +2877,27 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = + gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); return; } - if (rt2 == 31 - && ((rt | rs) & 1) == 0 - && dc_isar_feature(aa64_atomics, s)) { + if (rt2 == 31 && ((rt | rs) & 1) == 0 && + dc_isar_feature(aa64_atomics, s)) { /* CASP / CASPL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; } break; - case 0x6: case 0x7: /* CASPA / LDXP */ + case 0x6: + case 0x7: /* CASPA / LDXP */ if (size & 2) { /* LDXP / LDAXP */ if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = + gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); s->is_ldex = true; gen_load_exclusive(s, rt, rt2, clean_addr, size, true); if (is_lasr) { @@ -2729,9 +2905,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } return; } - if (rt2 == 31 - && ((rt | rs) & 1) == 0 - && dc_isar_feature(aa64_atomics, s)) { + if (rt2 == 31 && ((rt | rs) & 1) == 0 && + dc_isar_feature(aa64_atomics, s)) { /* CASPA / CASPAL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2802,8 +2977,8 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) /* Only unsigned 32bit loads target 32bit registers. */ bool iss_sf = opc != 0; - do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false, true, rt, + iss_sf, false); } tcg_temp_free_i64(tcg_ctx, clean_addr); } @@ -2825,7 +3000,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) * +-----+-------+---+---+-------+---+-------+-------+------+------+ * * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit - * LDPSW 01 + * LDPSW/STGP 01 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit * V: 0 -> GPR, 1 -> Vector * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, @@ -2851,6 +3026,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) bool is_signed = false; bool postindex = false; bool wback = false; + bool set_tag = false; TCGv_i64 clean_addr, dirty_addr; @@ -2863,6 +3039,14 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) if (is_vector) { size = 2 + opc; + } else if (opc == 1 && !is_load) { + /* STGP */ + if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { + unallocated_encoding(s); + return; + } + size = 3; + set_tag = true; } else { size = 2 + extract32(opc, 1, 1); is_signed = extract32(opc, 0, 1); @@ -2903,7 +3087,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) return; } - offset <<= size; + offset <<= (set_tag ? LOG2_TAG_GRANULE : size); if (rn == 31) { gen_check_sp_alignment(s); @@ -2913,7 +3097,25 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) if (!postindex) { tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset); } - clean_addr = clean_data_tbi(s, dirty_addr); + + if (set_tag) { + if (!s->ata) { + /* + * TODO: We could rely on the stores below, at least for + * system mode, if we arrange to add MO_ALIGN_16. + */ + gen_helper_stg_stub(tcg_ctx, tcg_ctx->cpu_env, dirty_addr); + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_stg_parallel(tcg_ctx, tcg_ctx->cpu_env, dirty_addr, + dirty_addr); + } else { + gen_helper_stg(tcg_ctx, tcg_ctx->cpu_env, dirty_addr, dirty_addr); + } + } + + clean_addr = + gen_mte_checkN(s, dirty_addr, !is_load, (wback || rn != 31) && !set_tag, + size, 2 << size); if (is_vector) { if (is_load) { @@ -2937,20 +3139,18 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) /* Do not modify tcg_rt before recognizing any exception * from the second load. */ - do_gpr_ld(s, tmp, clean_addr, size, is_signed, false, - false, 0, false, false); + do_gpr_ld(s, tmp, clean_addr, size, is_signed, false, false, 0, + false, false); tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 1ULL << size); - do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false, - false, 0, false, false); + do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false, false, 0, + false, false); tcg_gen_mov_i64(tcg_ctx, tcg_rt, tmp); tcg_temp_free_i64(tcg_ctx, tmp); } else { - do_gpr_st(s, tcg_rt, clean_addr, size, - false, 0, false, false); + do_gpr_st(s, tcg_rt, clean_addr, size, false, 0, false, false); tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 1ULL << size); - do_gpr_st(s, tcg_rt2, clean_addr, size, - false, 0, false, false); + do_gpr_st(s, tcg_rt2, clean_addr, size, false, 0, false, false); } } @@ -2978,11 +3178,8 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 */ -static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, int opc, + int size, int rt, bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rn = extract32(insn, 5, 5); @@ -2995,6 +3192,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, bool iss_valid = !is_vector; bool post_index; bool writeback; + int memidx; TCGv_i64 clean_addr, dirty_addr; @@ -3052,7 +3250,11 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, if (!post_index) { tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, imm9); } - clean_addr = clean_data_tbi(s, dirty_addr); + + memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + clean_addr = + gen_mte_check1_mmuidx(s, dirty_addr, is_store, writeback || rn != 31, + size, is_unpriv, memidx); if (is_vector) { if (is_store) { @@ -3062,16 +3264,14 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); - int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { - do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, - iss_valid, rt, iss_sf, false); + do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, iss_valid, rt, + iss_sf, false); } else { - do_gpr_ld_memidx(s, tcg_rt, clean_addr, size, - is_signed, is_extended, memidx, - iss_valid, rt, iss_sf, false); + do_gpr_ld_memidx(s, tcg_rt, clean_addr, size, is_signed, + is_extended, memidx, iss_valid, rt, iss_sf, false); } } @@ -3105,11 +3305,8 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, * Rn: address register or SP for base * Rm: offset register or ZR for offset */ -static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, int opc, + int size, int rt, bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rn = extract32(insn, 5, 5); @@ -3160,7 +3357,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, ext_and_shift_reg(tcg_ctx, tcg_rm, tcg_rm, opt, shift ? size : 0); tcg_gen_add_i64(tcg_ctx, dirty_addr, dirty_addr, tcg_rm); - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); if (is_vector) { if (is_store) { @@ -3172,12 +3369,10 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, TCGv_i64 tcg_rt = cpu_reg(s, rt); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, size, - true, rt, iss_sf, false); + do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size, - is_signed, is_extended, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, true, + rt, iss_sf, false); } } } @@ -3199,11 +3394,8 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, * Rn: base address register (inc SP) * Rt: target register */ -static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, int opc, + int size, int rt, bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rn = extract32(insn, 5, 5); @@ -3246,7 +3438,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, dirty_addr = read_cpu_reg_sp(s, rn, 1); offset = imm12 << size; tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset); - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); if (is_vector) { if (is_store) { @@ -3258,11 +3450,10 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, TCGv_i64 tcg_rt = cpu_reg(s, rt); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, size, - true, rt, iss_sf, false); + do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, true, + rt, iss_sf, false); } } } @@ -3281,8 +3472,8 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, * A: acquire flag * R: release flag */ -static void disas_ldst_atomic(DisasContext *s, uint32_t insn, - int size, int rt, bool is_vector) +static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int size, int rt, + bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rs = extract32(insn, 16, 5); @@ -3291,7 +3482,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, bool r = extract32(insn, 22, 1); bool a = extract32(insn, 23, 1); TCGv_i64 tcg_rs, clean_addr; - AtomicThreeOpFn *fn; + AtomicThreeOpFn *fn = NULL; if (is_vector || !dc_isar_feature(aa64_atomics, s)) { unallocated_encoding(s); @@ -3326,8 +3517,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, fn = tcg_gen_atomic_xchg_i64; break; case 014: /* LDAPR, LDAPRH, LDAPRB */ - if (!dc_isar_feature(aa64_rcpc_8_3, s) || - rs != 31 || a != 1 || r != 0) { + if (!dc_isar_feature(aa64_rcpc_8_3, s) || rs != 31 || a != 1 || + r != 0) { unallocated_encoding(s); return; } @@ -3340,7 +3531,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); if (o3_opc == 014) { /* @@ -3350,8 +3541,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, * full load-acquire (we only need "load-acquire processor consistent"), * but we choose to implement them as full LDAQ. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, - true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt, + disas_ldst_compute_iss_sf(size, false, 0), true); tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ); return; } @@ -3384,8 +3575,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, * W: pre-indexing flag * S: sign for imm9. */ -static void disas_ldst_pac(DisasContext *s, uint32_t insn, - int size, int rt, bool is_vector) +static void disas_ldst_pac(DisasContext *s, uint32_t insn, int size, int rt, + bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rn = extract32(insn, 5, 5); @@ -3406,9 +3597,11 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, if (s->pauth_active) { if (use_key_a) { - gen_helper_autda(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, tcg_ctx->cpu_X[31]); + gen_helper_autda(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, + new_tmp_a64_zero(s)); } else { - gen_helper_autdb(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, tcg_ctx->cpu_X[31]); + gen_helper_autdb(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, + new_tmp_a64_zero(s)); } } @@ -3418,7 +3611,8 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset); /* Note that "clean" and "dirty" here refer to TBI not PAC. */ - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = + gen_mte_check1(s, dirty_addr, false, is_wback || rn != 31, size); tcg_rt = cpu_reg(s, rt); do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false, @@ -3507,8 +3701,8 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) * Load-AcquirePC semantics; we implement as the slightly more * restrictive Load-Acquire. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend, - true, rt, iss_sf, true); + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend, true, + rt, iss_sf, true); tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ); } } @@ -3582,10 +3776,10 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; MemOp endian = s->be_data; - int ebytes; /* bytes per element */ + int total; /* bytes per element */ int elements; /* elements per vector */ - int rpt; /* num iterations */ - int selem; /* structure elements */ + int rpt; /* num iterations */ + int selem; /* structure elements */ int r; if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { @@ -3652,19 +3846,26 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) endian = MO_LE; } - /* Consecutive little-endian elements from a single register + total = rpt * selem * (is_q ? 16 : 8); + tcg_rn = cpu_reg_sp(s, rn); + + /* + * Issue the MTE check vs the logical repeat count, before we + * promote consecutive little-endian elements below. + */ + clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, + size, total); + + /* + * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ if (selem == 1 && endian == MO_LE) { size = 3; } - ebytes = 1 << size; - elements = (is_q ? 16 : 8) / ebytes; - - tcg_rn = cpu_reg_sp(s, rn); - clean_addr = clean_data_tbi(s, tcg_rn); - tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes); + elements = (is_q ? 16 : 8) >> size; + tcg_ebytes = tcg_const_i64(tcg_ctx, 1 << size); for (r = 0; r < rpt; r++) { int e; for (e = 0; e < elements; e++) { @@ -3698,7 +3899,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) if (is_postidx) { if (rm == 31) { - tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, rpt * elements * selem * ebytes); + tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, total); } else { tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm)); } @@ -3745,7 +3946,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) int selem = (extract32(opc, 0, 1) << 1 | R) + 1; bool replicate = false; int index = is_q << 3 | S << 2 | size; - int ebytes, xs; + int xs, total; TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; if (extract32(insn, 31, 1)) { @@ -3799,26 +4000,26 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) return; } - ebytes = 1 << scale; - if (rn == 31) { gen_check_sp_alignment(s); } + total = selem << scale; tcg_rn = cpu_reg_sp(s, rn); - clean_addr = clean_data_tbi(s, tcg_rn); - tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes); + clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, + scale, total); + + tcg_ebytes = tcg_const_i64(tcg_ctx, 1 << scale); for (xs = 0; xs < selem; xs++) { if (replicate) { /* Load and replicate to all elements */ TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx); - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, clean_addr, - get_mem_index(s), s->be_data + scale); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, clean_addr, get_mem_index(s), + s->be_data + scale); tcg_gen_gvec_dup_i64(tcg_ctx, scale, vec_full_reg_offset(s, rt), - (is_q + 1) * 8, vec_full_reg_size(s), - tcg_tmp); + (is_q + 1) * 8, vec_full_reg_size(s), tcg_tmp); tcg_temp_free_i64(tcg_ctx, tcg_tmp); } else { /* Load/store one element per register */ @@ -3835,19 +4036,235 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) if (is_postidx) { if (rm == 31) { - tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, selem * ebytes); + tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, total); } else { tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm)); } } } +/* + * Load/Store memory tags + * + * 31 30 29 24 22 21 12 10 5 0 + * +-----+-------------+-----+---+------+-----+------+------+ + * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | + * +-----+-------------+-----+---+------+-----+------+------+ + */ +static void disas_ldst_tag(DisasContext *s, uint32_t insn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) || + HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) { + // sync PC if there are memory hooks. + // TODO: Better granularity by checking ldst type and corresponding hook + // type + gen_a64_set_pc_im(s->uc->tcg_ctx, s->pc_curr); + } + + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; + int op2 = extract32(insn, 10, 2); + int op1 = extract32(insn, 22, 2); + bool is_load = false, is_pair = false, is_zero = false, is_mult = false; + int index = 0; + TCGv_i64 addr, clean_addr, tcg_rt; + + /* We checked insn bits [29:24,21] in the caller. */ + if (extract32(insn, 30, 2) != 3) { + goto do_unallocated; + } + + /* + * @index is a tri-state variable which has 3 states: + * < 0 : post-index, writeback + * = 0 : signed offset + * > 0 : pre-index, writeback + */ + switch (op1) { + case 0: + if (op2 != 0) { + /* STG */ + index = op2 - 2; + } else { + /* STZGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_zero = true; + } + break; + case 1: + if (op2 != 0) { + /* STZG */ + is_zero = true; + index = op2 - 2; + } else { + /* LDG */ + is_load = true; + } + break; + case 2: + if (op2 != 0) { + /* ST2G */ + is_pair = true; + index = op2 - 2; + } else { + /* STGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = true; + } + break; + case 3: + if (op2 != 0) { + /* STZ2G */ + is_pair = is_zero = true; + index = op2 - 2; + } else { + /* LDGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_load = true; + } + break; + + default: + do_unallocated: + unallocated_encoding(s); + return; + } + + if (is_mult ? !dc_isar_feature(aa64_mte, s) + : !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + addr = read_cpu_reg_sp(s, rn, true); + if (index >= 0) { + /* pre-index or signed offset */ + tcg_gen_addi_i64(tcg_ctx, addr, addr, offset); + } + + if (is_mult) { + tcg_rt = cpu_reg(s, rt); + + if (is_zero) { + int size = 4 << s->dcz_blocksize; + + if (s->ata) { + gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } + /* + * The non-tags portion of STZGM is mostly like DC_ZVA, + * except the alignment happens before the access. + */ + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(tcg_ctx, clean_addr, clean_addr, -size); + gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, clean_addr); + } else if (s->ata) { + if (is_load) { + gen_helper_ldgm(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, addr); + } else { + gen_helper_stgm(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } + } else { + MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; + int size = 4 << GMID_EL1_BS; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(tcg_ctx, clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + + if (is_load) { + /* The result tags are zeros. */ + tcg_gen_movi_i64(tcg_ctx, tcg_rt, 0); + } + } + return; + } + + if (is_load) { + tcg_gen_andi_i64(tcg_ctx, addr, addr, -TAG_GRANULE); + tcg_rt = cpu_reg(s, rt); + if (s->ata) { + gen_helper_ldg(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, addr, tcg_rt); + } else { + clean_addr = clean_data_tbi(s, addr); + gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); + gen_address_with_allocation_tag0(tcg_ctx, tcg_rt, addr); + } + } else { + tcg_rt = cpu_reg_sp(s, rt); + if (!s->ata) { + /* + * For STG and ST2G, we need to check alignment and probe memory. + * TODO: For STZG and STZ2G, we could rely on the stores below, + * at least for system mode; user-only won't enforce alignment. + */ + if (is_pair) { + gen_helper_st2g_stub(tcg_ctx, tcg_ctx->cpu_env, addr); + } else { + gen_helper_stg_stub(tcg_ctx, tcg_ctx->cpu_env, addr); + } + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (is_pair) { + gen_helper_st2g_parallel(tcg_ctx, tcg_ctx->cpu_env, addr, + tcg_rt); + } else { + gen_helper_stg_parallel(tcg_ctx, tcg_ctx->cpu_env, addr, + tcg_rt); + } + } else { + if (is_pair) { + gen_helper_st2g(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } else { + gen_helper_stg(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } + } + } + + if (is_zero) { + TCGv_i64 clean_addr = clean_data_tbi(s, addr); + TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0); + int mem_index = get_mem_index(s); + int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; + + tcg_gen_qemu_st_i64(tcg_ctx, tcg_zero, clean_addr, mem_index, + MO_Q | MO_ALIGN_16); + for (i = 8; i < n; i += 8) { + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8); + tcg_gen_qemu_st_i64(tcg_ctx, tcg_zero, clean_addr, mem_index, MO_Q); + } + tcg_temp_free_i64(tcg_ctx, tcg_zero); + } + + if (index != 0) { + /* pre-index or post-index */ + if (index < 0) { + /* post-index */ + tcg_gen_addi_i64(tcg_ctx, addr, addr, offset); + } + tcg_gen_mov_i64(tcg_ctx, cpu_reg_sp(s, rn), addr); + } +} + /* Loads and stores */ static void disas_ldst(DisasContext *s, uint32_t insn) { - if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) || HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) { + if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) || + HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) { // sync PC if there are memory hooks. - // TODO: Better granularity by checking ldst type and corresponding hook type + // TODO: Better granularity by checking ldst type and corresponding hook + // type gen_a64_set_pc_im(s->uc->tcg_ctx, s->pc_curr); } @@ -3855,15 +4272,20 @@ static void disas_ldst(DisasContext *s, uint32_t insn) case 0x08: /* Load/store exclusive */ disas_ldst_excl(s, insn); break; - case 0x18: case 0x1c: /* Load register (literal) */ + case 0x18: + case 0x1c: /* Load register (literal) */ disas_ld_lit(s, insn); break; - case 0x28: case 0x29: - case 0x2c: case 0x2d: /* Load/store pair (all forms) */ + case 0x28: + case 0x29: + case 0x2c: + case 0x2d: /* Load/store pair (all forms) */ disas_ldst_pair(s, insn); break; - case 0x38: case 0x39: - case 0x3c: case 0x3d: /* Load/store register (all forms) */ + case 0x38: + case 0x39: + case 0x3c: + case 0x3d: /* Load/store register (all forms) */ disas_ldst_reg(s, insn); break; case 0x0c: /* AdvSIMD load/store multiple structures */ @@ -3872,13 +4294,14 @@ static void disas_ldst(DisasContext *s, uint32_t insn) case 0x0d: /* AdvSIMD load/store single structure */ disas_ldst_single_struct(s, insn); break; - case 0x19: /* LDAPR/STLR (unscaled immediate) */ - if (extract32(insn, 10, 2) != 0 || - extract32(insn, 21, 1) != 0) { + case 0x19: + if (extract32(insn, 21, 1) != 0) { + disas_ldst_tag(s, insn); + } else if (extract32(insn, 10, 2) == 0) { + disas_ldst_ldapr_stlr(s, insn); + } else { unallocated_encoding(s); - break; } - disas_ldst_ldapr_stlr(s, insn); break; default: unallocated_encoding(s); @@ -3919,14 +4342,14 @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) * Add/subtract (immediate) * * 31 30 29 28 24 23 22 21 10 9 5 4 0 - * +--+--+--+-----------+-----+-------------+-----+-----+ - * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd | - * +--+--+--+-----------+-----+-------------+-----+-----+ + * +--+--+--+-------------+--+-------------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | + * +--+--+--+-------------+--+-------------+-----+-----+ * * sf: 0 -> 32bit, 1 -> 64bit * op: 0 -> add , 1 -> sub * S: 1 -> set flags - * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12 + * sh: 1 -> LSL imm by 12 */ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) { @@ -3934,7 +4357,7 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); uint64_t imm = extract32(insn, 10, 12); - int shift = extract32(insn, 22, 2); + bool shift = extract32(insn, 22, 1); bool setflags = extract32(insn, 29, 1); bool sub_op = extract32(insn, 30, 1); bool is_64bit = extract32(insn, 31, 1); @@ -3943,13 +4366,8 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); TCGv_i64 tcg_result; - switch (shift) { - case 0x0: - break; - case 0x1: + if (shift) { imm <<= 12; - break; - default: unallocated_encoding(s); return; } @@ -3980,6 +4398,57 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) tcg_temp_free_i64(tcg_ctx, tcg_result); } +/* + * Add/subtract (immediate, with tags) + * + * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 + * +--+--+--+-------------+--+---------+--+-------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | + * +--+--+--+-------------+--+---------+--+-------+-----+-----+ + * + * op: 0 -> add, 1 -> sub + */ +static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int uimm4 = extract32(insn, 10, 4); + int uimm6 = extract32(insn, 16, 6); + bool sub_op = extract32(insn, 30, 1); + TCGv_i64 tcg_rn, tcg_rd; + int imm; + + /* Test all of sf=1, S=0, o2=0, o3=0. */ + if ((insn & 0xa040c000u) != 0x80000000u || + !dc_isar_feature(aa64_mte_insn_reg, s)) { + unallocated_encoding(s); + return; + } + + imm = uimm6 << LOG2_TAG_GRANULE; + if (sub_op) { + imm = -imm; + } + + tcg_rn = cpu_reg_sp(s, rn); + tcg_rd = cpu_reg_sp(s, rd); + + if (s->ata) { + TCGv_i32 offset = tcg_const_i32(tcg_ctx, imm); + TCGv_i32 tag_offset = tcg_const_i32(tcg_ctx, uimm4); + + gen_helper_addsubg(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, offset, + tag_offset); + tcg_temp_free_i32(tcg_ctx, tag_offset); + tcg_temp_free_i32(tcg_ctx, offset); + } else { + tcg_gen_addi_i64(tcg_ctx, tcg_rd, tcg_rn, imm); + gen_address_with_allocation_tag0(tcg_ctx, tcg_rd, tcg_rd); + } +} + /* The input should be a value in the bottom e bits (with higher * bits zero); returns that value replicated into every element * of size e in a 64 bit integer. @@ -4267,7 +4736,7 @@ static void disas_bitfield(DisasContext *s, uint32_t insn) return; } - done: +done: if (!sf) { /* zero extend final result */ tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd); } @@ -4340,12 +4809,16 @@ static void disas_extract(DisasContext *s, uint32_t insn) static void disas_data_proc_imm(DisasContext *s, uint32_t insn) { switch (extract32(insn, 23, 6)) { - case 0x20: case 0x21: /* PC-rel. addressing */ + case 0x20: + case 0x21: /* PC-rel. addressing */ disas_pc_rel_adr(s, insn); break; - case 0x22: case 0x23: /* Add/subtract (immediate) */ + case 0x22: /* Add/subtract (immediate) */ disas_add_sub_imm(s, insn); break; + case 0x23: /* Add/subtract (immediate, with tags) */ + disas_add_sub_imm_with_tags(s, insn); + break; case 0x24: /* Logical (immediate) */ disas_logic_imm(s, insn); break; @@ -4414,8 +4887,9 @@ static void shift_reg(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf, * The shift amount must be in range (this should always be true as the * relevant instructions will UNDEF on bad shift immediates). */ -static void shift_reg_imm(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf, - enum a64_shift_type shift_type, unsigned int shift_i) +static void shift_reg_imm(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, + int sf, enum a64_shift_type shift_type, + unsigned int shift_i) { assert(shift_i < (sf ? 64 : 32)); @@ -4675,9 +5149,8 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int ra = extract32(insn, 10, 5); int rm = extract32(insn, 16, 5); - int op_id = (extract32(insn, 29, 3) << 4) | - (extract32(insn, 21, 3) << 1) | - extract32(insn, 15, 1); + int op_id = (extract32(insn, 29, 3) << 4) | (extract32(insn, 21, 3) << 1) | + extract32(insn, 15, 1); bool sf = extract32(insn, 31, 1); bool is_sub = extract32(op_id, 0, 1); bool is_high = extract32(op_id, 2, 1); @@ -4693,8 +5166,8 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn) case 0x44: /* SMULH */ is_signed = true; break; - case 0x0: /* MADD (32bit) */ - case 0x1: /* MSUB (32bit) */ + case 0x0: /* MADD (32bit) */ + case 0x1: /* MSUB (32bit) */ case 0x40: /* MADD (64bit) */ case 0x41: /* MSUB (64bit) */ case 0x4a: /* UMADDL */ @@ -4866,7 +5339,7 @@ static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - shift = sz ? 16 : 24; /* SETF16 or SETF8 */ + shift = sz ? 16 : 24; /* SETF16 or SETF8 */ tmp = tcg_temp_new_i32(tcg_ctx); tcg_gen_extrl_i64_i32(tcg_ctx, tmp, cpu_reg(s, rn)); @@ -5016,7 +5489,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn) if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { /* CSET & CSETM. */ - tcg_gen_setcond_i64(tcg_ctx, tcg_invert_cond(c.cond), tcg_rd, c.value, zero); + tcg_gen_setcond_i64(tcg_ctx, tcg_invert_cond(c.cond), tcg_rd, c.value, + zero); if (else_inv) { tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd); } @@ -5030,7 +5504,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn) } else if (else_inc) { tcg_gen_addi_i64(tcg_ctx, t_false, t_false, 1); } - tcg_gen_movcond_i64(tcg_ctx, c.cond, tcg_rd, c.value, zero, t_true, t_false); + tcg_gen_movcond_i64(tcg_ctx, c.cond, tcg_rd, c.value, zero, t_true, + t_false); } tcg_temp_free_i64(tcg_ctx, zero); @@ -5041,8 +5516,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn) } } -static void handle_clz(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_clz(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd, tcg_rn; @@ -5060,8 +5535,8 @@ static void handle_clz(DisasContext *s, unsigned int sf, } } -static void handle_cls(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_cls(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd, tcg_rn; @@ -5079,8 +5554,8 @@ static void handle_cls(DisasContext *s, unsigned int sf, } } -static void handle_rbit(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_rbit(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd, tcg_rn; @@ -5099,8 +5574,8 @@ static void handle_rbit(DisasContext *s, unsigned int sf, } /* REV with sf==1, opcode==3 ("REV64") */ -static void handle_rev64(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_rev64(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; if (!sf) { @@ -5113,8 +5588,8 @@ static void handle_rev64(DisasContext *s, unsigned int sf, /* REV with sf==0, opcode==2 * REV32 (sf==1, opcode==2) */ -static void handle_rev32(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_rev32(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd = cpu_reg(s, rd); @@ -5138,14 +5613,15 @@ static void handle_rev32(DisasContext *s, unsigned int sf, } /* REV16 (opcode==1) */ -static void handle_rev16(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_rev16(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd = cpu_reg(s, rd); TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx); TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); - TCGv_i64 mask = tcg_const_i64(tcg_ctx, sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); + TCGv_i64 mask = + tcg_const_i64(tcg_ctx, sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 8); tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rn, mask); @@ -5209,7 +5685,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x00): /* PACIA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5217,7 +5694,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x01): /* PACIB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5225,7 +5703,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x02): /* PACDA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5233,7 +5712,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x03): /* PACDB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5241,7 +5721,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x04): /* AUTIA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5249,7 +5730,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x05): /* AUTIB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5257,7 +5739,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x06): /* AUTDA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5265,7 +5748,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x07): /* AUTDB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5275,7 +5759,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x09): /* PACIZB */ @@ -5283,7 +5768,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0a): /* PACDZA */ @@ -5291,7 +5777,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0b): /* PACDZB */ @@ -5299,7 +5786,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0c): /* AUTIZA */ @@ -5307,7 +5795,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0d): /* AUTIZB */ @@ -5315,7 +5804,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0e): /* AUTDZA */ @@ -5323,7 +5813,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0f): /* AUTDZB */ @@ -5331,7 +5822,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x10): /* XPACI */ @@ -5388,9 +5880,9 @@ static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, } /* LSLV, LSRV, ASRV, RORV */ -static void handle_shift_reg(DisasContext *s, - enum a64_shift_type shift_type, unsigned int sf, - unsigned int rm, unsigned int rn, unsigned int rd) +static void handle_shift_reg(DisasContext *s, enum a64_shift_type shift_type, + unsigned int sf, unsigned int rm, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_shift = tcg_temp_new_i64(tcg_ctx); @@ -5403,17 +5895,16 @@ static void handle_shift_reg(DisasContext *s, } /* CRC32[BHWX], CRC32C[BHWX] */ -static void handle_crc32(DisasContext *s, - unsigned int sf, unsigned int sz, bool crc32c, - unsigned int rm, unsigned int rn, unsigned int rd) +static void handle_crc32(DisasContext *s, unsigned int sf, unsigned int sz, + bool crc32c, unsigned int rm, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_acc, tcg_val; TCGv_i32 tcg_bytes; - if (!dc_isar_feature(aa64_crc32, s) - || (sf == 1 && sz != 3) - || (sf == 0 && sz == 3)) { + if (!dc_isar_feature(aa64_crc32, s) || (sf == 1 && sz != 3) || + (sf == 0 && sz == 3)) { unallocated_encoding(s); return; } @@ -5443,9 +5934,11 @@ static void handle_crc32(DisasContext *s, tcg_bytes = tcg_const_i32(tcg_ctx, 1 << sz); if (crc32c) { - gen_helper_crc32c_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + gen_helper_crc32c_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, + tcg_bytes); } else { - gen_helper_crc32_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + gen_helper_crc32_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, + tcg_bytes); } tcg_temp_free_i32(tcg_ctx, tcg_bytes); @@ -5460,25 +5953,72 @@ static void handle_crc32(DisasContext *s, static void disas_data_proc_2src(DisasContext *s, uint32_t insn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - unsigned int sf, rm, opcode, rn, rd; + unsigned int sf, rm, opcode, rn, rd, setflag; sf = extract32(insn, 31, 1); + setflag = extract32(insn, 29, 1); rm = extract32(insn, 16, 5); opcode = extract32(insn, 10, 6); rn = extract32(insn, 5, 5); rd = extract32(insn, 0, 5); - if (extract32(insn, 29, 1)) { + if (setflag && opcode != 0) { unallocated_encoding(s); return; } switch (opcode) { + case 0: /* SUBP(S) */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } else { + TCGv_i64 tcg_n, tcg_m, tcg_d; + + tcg_n = read_cpu_reg_sp(s, rn, true); + tcg_m = read_cpu_reg_sp(s, rm, true); + tcg_gen_sextract_i64(tcg_ctx, tcg_n, tcg_n, 0, 56); + tcg_gen_sextract_i64(tcg_ctx, tcg_m, tcg_m, 0, 56); + tcg_d = cpu_reg(s, rd); + + if (setflag) { + gen_sub_CC(tcg_ctx, true, tcg_d, tcg_n, tcg_m); + } else { + tcg_gen_sub_i64(tcg_ctx, tcg_d, tcg_n, tcg_m); + } + } + break; case 2: /* UDIV */ handle_div(s, false, sf, rm, rn, rd); break; case 3: /* SDIV */ handle_div(s, true, sf, rm, rn, rd); break; + case 4: /* IRG */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } + if (s->ata) { + gen_helper_irg(tcg_ctx, cpu_reg_sp(s, rd), tcg_ctx->cpu_env, + cpu_reg_sp(s, rn), cpu_reg(s, rm)); + } else { + gen_address_with_allocation_tag0(tcg_ctx, cpu_reg_sp(s, rd), + cpu_reg_sp(s, rn)); + } + break; + case 5: /* GMI */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } else { + TCGv_i64 t1 = tcg_const_i64(tcg_ctx, 1); + TCGv_i64 t2 = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_extract_i64(tcg_ctx, t2, cpu_reg_sp(s, rn), 56, 4); + tcg_gen_shl_i64(tcg_ctx, t1, t1, t2); + tcg_gen_or_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, rm), t1); + + tcg_temp_free_i64(tcg_ctx, t1); + tcg_temp_free_i64(tcg_ctx, t2); + } + break; case 8: /* LSLV */ handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); break; @@ -5573,7 +6113,7 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn) } break; - case 0x2: /* Conditional compare */ + case 0x2: /* Conditional compare */ disas_cc(s, insn); /* both imm and reg forms */ break; @@ -5581,10 +6121,10 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn) disas_cond_select(s, insn); break; - case 0x6: /* Data-processing */ - if (op0) { /* (1 source) */ + case 0x6: /* Data-processing */ + if (op0) { /* (1 source) */ disas_data_proc_1src(s, insn); - } else { /* (2 source) */ + } else { /* (2 source) */ disas_data_proc_2src(s, insn); } break; @@ -5606,9 +6146,9 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn) } } -static void handle_fp_compare(DisasContext *s, int size, - unsigned int rn, unsigned int rm, - bool cmp_with_zero, bool signal_all_nans) +static void handle_fp_compare(DisasContext *s, int size, unsigned int rn, + unsigned int rm, bool cmp_with_zero, + bool signal_all_nans) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_flags = tcg_temp_new_i64(tcg_ctx); @@ -5644,16 +6184,20 @@ static void handle_fp_compare(DisasContext *s, int size, switch (size) { case MO_32: if (signal_all_nans) { - gen_helper_vfp_cmpes_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst); + gen_helper_vfp_cmpes_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, + fpst); } else { - gen_helper_vfp_cmps_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst); + gen_helper_vfp_cmps_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, + fpst); } break; case MO_16: if (signal_all_nans) { - gen_helper_vfp_cmpeh_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst); + gen_helper_vfp_cmpeh_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, + fpst); } else { - gen_helper_vfp_cmph_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst); + gen_helper_vfp_cmph_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, + fpst); } break; default: @@ -5844,7 +6388,8 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) a64_test_cc(tcg_ctx, &c, cond); t_zero = tcg_const_i64(tcg_ctx, 0); - tcg_gen_movcond_i64(tcg_ctx, c.cond, t_true, c.value, t_zero, t_true, t_false); + tcg_gen_movcond_i64(tcg_ctx, c.cond, t_true, c.value, t_zero, t_true, + t_false); tcg_temp_free_i64(tcg_ctx, t_zero); tcg_temp_free_i64(tcg_ctx, t_false); a64_free_cc(tcg_ctx, &c); @@ -5883,7 +6428,8 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) case 0xb: /* FRINTZ */ case 0xc: /* FRINTA */ { - TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7)); + TCGv_i32 tcg_rmode = + tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7)); fpst = get_fpstatus_ptr(tcg_ctx, true); gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst); @@ -5983,7 +6529,7 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) } tcg_temp_free_ptr(tcg_ctx, fpst); - done: +done: write_fp_sreg(s, rd, tcg_res); tcg_temp_free_i32(tcg_ctx, tcg_op); tcg_temp_free_i32(tcg_ctx, tcg_res); @@ -6061,19 +6607,18 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) } tcg_temp_free_ptr(tcg_ctx, fpst); - done: +done: write_fp_dreg(s, rd, tcg_res); tcg_temp_free_i64(tcg_ctx, tcg_op); tcg_temp_free_i64(tcg_ctx, tcg_res); } -static void handle_fp_fcvt(DisasContext *s, int opcode, - int rd, int rn, int dtype, int ntype) +static void handle_fp_fcvt(DisasContext *s, int opcode, int rd, int rn, + int dtype, int ntype) { TCGContext *tcg_ctx = s->uc->tcg_ctx; switch (ntype) { - case 0x0: - { + case 0x0: { TCGv_i32 tcg_rn = read_fp_sreg(s, rn); if (dtype == 1) { /* Single to double */ @@ -6097,8 +6642,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, tcg_temp_free_i32(tcg_ctx, tcg_rn); break; } - case 0x1: - { + case 0x1: { TCGv_i64 tcg_rn = read_fp_dreg(s, rn); TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx); if (dtype == 0) { @@ -6117,9 +6661,8 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, tcg_temp_free_i32(tcg_ctx, tcg_rd); tcg_temp_free_i64(tcg_ctx, tcg_rn); break; - } - case 0x3: - { + } + case 0x3: { TCGv_i32 tcg_rn = read_fp_sreg(s, rn); TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx, false); TCGv_i32 tcg_ahp = get_ahp_flag(tcg_ctx); @@ -6127,13 +6670,15 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, if (dtype == 0) { /* Half to single */ TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, + tcg_ahp); write_fp_sreg(s, rd, tcg_rd); tcg_temp_free_i32(tcg_ctx, tcg_rd); } else { /* Half to double */ TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx); - gen_helper_vfp_fcvt_f16_to_f64(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); + gen_helper_vfp_fcvt_f16_to_f64(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, + tcg_ahp); write_fp_dreg(s, rd, tcg_rd); tcg_temp_free_i64(tcg_ctx, tcg_rd); } @@ -6167,8 +6712,9 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) } switch (opcode) { - case 0x4: case 0x5: case 0x7: - { + case 0x4: + case 0x5: + case 0x7: { /* FCVT between half, single and double precision */ int dtype = extract32(opcode, 0, 2); if (type == 2 || dtype == type) { @@ -6242,8 +6788,8 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) } /* Floating-point data-processing (2 source) - single precision */ -static void handle_fp_2src_single(DisasContext *s, int opcode, - int rd, int rn, int rm) +static void handle_fp_2src_single(DisasContext *s, int opcode, int rd, int rn, + int rm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tcg_op1; @@ -6296,8 +6842,8 @@ static void handle_fp_2src_single(DisasContext *s, int opcode, } /* Floating-point data-processing (2 source) - double precision */ -static void handle_fp_2src_double(DisasContext *s, int opcode, - int rd, int rn, int rm) +static void handle_fp_2src_double(DisasContext *s, int opcode, int rd, int rn, + int rm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_op1; @@ -6350,8 +6896,8 @@ static void handle_fp_2src_double(DisasContext *s, int opcode, } /* Floating-point data-processing (2 source) - half precision */ -static void handle_fp_2src_half(DisasContext *s, int opcode, - int rd, int rn, int rm) +static void handle_fp_2src_half(DisasContext *s, int opcode, int rd, int rn, + int rm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tcg_op1; @@ -6454,8 +7000,8 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) } /* Floating-point data-processing (3 source) - single precision */ -static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) +static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, int rd, + int rn, int rm, int ra) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tcg_op1, tcg_op2, tcg_op3; @@ -6493,8 +7039,8 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, } /* Floating-point data-processing (3 source) - double precision */ -static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) +static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, int rd, + int rn, int rm, int ra) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_op1, tcg_op2, tcg_op3; @@ -6532,8 +7078,8 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, } /* Floating-point data-processing (3 source) - half precision */ -static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) +static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, int rd, + int rn, int rm, int ra) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tcg_op1, tcg_op2, tcg_op3; @@ -6559,7 +7105,8 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, tcg_gen_xori_i32(tcg_ctx, tcg_op1, tcg_op1, 0x8000); } - gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); + gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, + fpst); write_fp_sreg(s, rd, tcg_res); @@ -6708,11 +7255,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, case 1: /* float64 */ tcg_double = tcg_temp_new_i64(tcg_ctx); if (is_signed) { - gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int, tcg_shift, + tcg_fpstatus); } else { - gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int, tcg_shift, + tcg_fpstatus); } write_fp_dreg(s, rd, tcg_double); tcg_temp_free_i64(tcg_ctx, tcg_double); @@ -6721,11 +7268,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, case 0: /* float32 */ tcg_single = tcg_temp_new_i32(tcg_ctx); if (is_signed) { - gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int, tcg_shift, + tcg_fpstatus); } else { - gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int, tcg_shift, + tcg_fpstatus); } write_fp_sreg(s, rd, tcg_single); tcg_temp_free_i32(tcg_ctx, tcg_single); @@ -6734,11 +7281,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, case 3: /* float16 */ tcg_single = tcg_temp_new_i32(tcg_ctx); if (is_signed) { - gen_helper_vfp_sqtoh(tcg_ctx, tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_sqtoh(tcg_ctx, tcg_single, tcg_int, tcg_shift, + tcg_fpstatus); } else { - gen_helper_vfp_uqtoh(tcg_ctx, tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_uqtoh(tcg_ctx, tcg_single, tcg_int, tcg_shift, + tcg_fpstatus); } write_fp_sreg(s, rd, tcg_single); tcg_temp_free_i32(tcg_ctx, tcg_single); @@ -6933,7 +7480,8 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) break; case 2: /* 64 bit to top half. */ - tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd)); + tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, + fp_reg_hi_offset(s, rd)); clear_vec_high(s, true, rd); break; case 3: @@ -6952,19 +7500,23 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) switch (type) { case 0: /* 32 bit */ - tcg_gen_ld32u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_32)); + tcg_gen_ld32u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + fp_reg_offset(s, rn, MO_32)); break; case 1: /* 64 bit */ - tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_64)); + tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + fp_reg_offset(s, rn, MO_64)); break; case 2: /* 64 bits from top half */ - tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rn)); + tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + fp_reg_hi_offset(s, rn)); break; case 3: /* 16 bit */ - tcg_gen_ld16u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_16)); + tcg_gen_ld16u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + fp_reg_offset(s, rn, MO_16)); break; default: g_assert_not_reached(); @@ -7053,8 +7605,8 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) goto do_unallocated; } /* fallthru */ - case 6: // 0b00000110: /* FMOV 32-bit */ - case 7: // 0b00000111: + case 6: // 0b00000110: /* FMOV 32-bit */ + case 7: // 0b00000111: case 0xa6: // 0b10100110: /* FMOV 64-bit */ case 0xa7: // 0b10100111: case 0xce: // 0b11001110: /* FMOV top half of 128-bit */ @@ -7198,14 +7750,13 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_resh, rm, 0, MO_64); do_ext64(s, tcg_resh, tcg_resl, pos); } - tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0); } else { TCGv_i64 tcg_hh; typedef struct { int reg; int elt; } EltPosns; - EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; + EltPosns eltposns[] = {{rn, 0}, {rn, 1}, {rm, 0}, {rm, 1}}; EltPosns *elt = eltposns; if (pos >= 64) { @@ -7228,9 +7779,11 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_ctx, tcg_resl); - write_vec_element(s, tcg_resh, rd, 1, MO_64); + if (is_q) { + write_vec_element(s, tcg_resh, rd, 1, MO_64); + } tcg_temp_free_i64(tcg_ctx, tcg_resh); - clear_vec_high(s, true, rd); + clear_vec_high(s, is_q, rd); } /* TBL/TBX @@ -7268,17 +7821,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) * the input. */ tcg_resl = tcg_temp_new_i64(tcg_ctx); - tcg_resh = tcg_temp_new_i64(tcg_ctx); + tcg_resh = NULL; if (is_tblx) { read_vec_element(s, tcg_resl, rd, 0, MO_64); } else { tcg_gen_movi_i64(tcg_ctx, tcg_resl, 0); } - if (is_tblx && is_q) { - read_vec_element(s, tcg_resh, rd, 1, MO_64); - } else { - tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0); + + if (is_q) { + tcg_resh = tcg_temp_new_i64(tcg_ctx); + if (is_tblx) { + read_vec_element(s, tcg_resh, rd, 1, MO_64); + } else { + tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0); + } } tcg_idx = tcg_temp_new_i64(tcg_ctx); @@ -7289,8 +7846,8 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) tcg_regno, tcg_numregs); if (is_q) { read_vec_element(s, tcg_idx, rm, 1, MO_64); - gen_helper_simd_tbl(tcg_ctx, tcg_resh, tcg_ctx->cpu_env, tcg_resh, tcg_idx, - tcg_regno, tcg_numregs); + gen_helper_simd_tbl(tcg_ctx, tcg_resh, tcg_ctx->cpu_env, tcg_resh, + tcg_idx, tcg_regno, tcg_numregs); } tcg_temp_free_i64(tcg_ctx, tcg_idx); tcg_temp_free_i32(tcg_ctx, tcg_regno); @@ -7298,9 +7855,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_ctx, tcg_resl); - write_vec_element(s, tcg_resh, rd, 1, MO_64); - tcg_temp_free_i64(tcg_ctx, tcg_resh); - clear_vec_high(s, true, rd); + + if (is_q) { + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_ctx, tcg_resh); + } + clear_vec_high(s, is_q, rd); } /* ZIP/UZP/TRN @@ -7338,7 +7898,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) } tcg_resl = tcg_const_i64(tcg_ctx, 0); - tcg_resh = tcg_const_i64(tcg_ctx, 0); + tcg_resh = is_q ? tcg_const_i64(tcg_ctx, 0) : NULL; tcg_res = tcg_temp_new_i64(tcg_ctx); for (i = 0; i < elements; i++) { @@ -7349,8 +7909,8 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) if (i < midpoint) { read_vec_element(s, tcg_res, rn, 2 * i + part, size); } else { - read_vec_element(s, tcg_res, rm, - 2 * (i - midpoint) + part, size); + read_vec_element(s, tcg_res, rm, 2 * (i - midpoint) + part, + size); } break; } @@ -7389,9 +7949,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_ctx, tcg_resl); - write_vec_element(s, tcg_resh, rd, 1, MO_64); - tcg_temp_free_i64(tcg_ctx, tcg_resh); - clear_vec_high(s, true, rd); + + if (is_q) { + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_ctx, tcg_resh); + } + clear_vec_high(s, is_q, rd); } /* @@ -7496,8 +8059,8 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) return; } /* fall through */ - case 0x3: /* SADDLV, UADDLV */ - case 0xa: /* SMAXV, UMAXV */ + case 0x3: /* SADDLV, UADDLV */ + case 0xa: /* SMAXV, UMAXV */ case 0x1a: /* SMINV, UMINV */ if (size == 3 || (size == 2 && !is_q)) { unallocated_encoding(s); @@ -7577,7 +8140,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) default: g_assert_not_reached(); } - } } else { /* Floating point vector reduction ops which work across 32 @@ -7650,8 +8212,8 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, index = imm5 >> (size + 1); tcg_gen_gvec_dup_mem(tcg_ctx, size, vec_full_reg_offset(s, rd), - vec_reg_offset(s, rn, index, size), - is_q ? 16 : 8, vec_full_reg_size(s)); + vec_reg_offset(s, rn, index, size), is_q ? 16 : 8, + vec_full_reg_size(s)); } /* DUP (element, scalar) @@ -7660,8 +8222,7 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | * +-----------------------+--------+-------------+------+------+ */ -static void handle_simd_dupes(DisasContext *s, int rd, int rn, - int imm5) +static void handle_simd_dupes(DisasContext *s, int rd, int rn, int imm5) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = ctz32(imm5); @@ -7730,8 +8291,8 @@ static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, * size: encoded in imm5 (see ARM ARM LowestSetBit()) * index: encoded in imm5<4:size+1> */ -static void handle_simd_inse(DisasContext *s, int rd, int rn, - int imm4, int imm5) +static void handle_simd_inse(DisasContext *s, int rd, int rn, int imm4, + int imm5) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = ctz32(imm5); @@ -7747,7 +8308,7 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn, return; } - dst_index = extract32(imm5, 1+size, 5); + dst_index = extract32(imm5, 1 + size, 5); src_index = extract32(imm4, size, 4); tmp = tcg_temp_new_i64(tcg_ctx); @@ -7761,7 +8322,6 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn, clear_vec_high(s, true, rd); } - /* INS (General) * * 31 21 20 16 15 10 9 5 4 0 @@ -7820,9 +8380,7 @@ static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, return; } } else { - if (size > 3 - || (size < 3 && is_q) - || (size == 3 && !is_q)) { + if (size > 3 || (size < 3 && is_q) || (size == 3 && !is_q)) { unallocated_encoding(s); return; } @@ -7832,7 +8390,7 @@ static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, return; } - element = extract32(imm5, 1+size, 4); + element = extract32(imm5, 1 + size, 4); tcg_rd = cpu_reg(s, rd); read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); @@ -8014,8 +8572,8 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { /* MOVI or MVNI, with MVNI negation handled above. */ - tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(s, rd), is_q ? 16 : 8, - vec_full_reg_size(s), imm); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), imm); } else { /* ORR or BIC, with BIC negation to AND handled above. */ if (is_neg) { @@ -8083,9 +8641,9 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) fpst = NULL; break; - case 0xc: /* FMAXNMP */ - case 0xd: /* FADDP */ - case 0xf: /* FMAXP */ + case 0xc: /* FMAXNMP */ + case 0xd: /* FADDP */ + case 0xf: /* FMAXP */ case 0x2c: /* FMINNMP */ case 0x2f: /* FMINP */ /* FP op, size[0] is 32 or 64 bit*/ @@ -8158,19 +8716,24 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) if (size == MO_16) { switch (opcode) { case 0xc: /* FMAXNMP */ - gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xd: /* FADDP */ - gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xf: /* FMAXP */ - gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x2c: /* FMINNMP */ - gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x2f: /* FMINP */ - gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -8178,7 +8741,8 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) } else { switch (opcode) { case 0xc: /* FMAXNMP */ - gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xd: /* FADDP */ gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -8187,7 +8751,8 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x2c: /* FMINNMP */ - gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x2f: /* FMINP */ gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -8215,9 +8780,10 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) * This code is handles the common shifting code and is used by both * the vector and scalar code. */ -static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_i64 tcg_src, - TCGv_i64 tcg_rnd, bool accumulate, - bool is_u, int size, int shift) +static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, + TCGv_i64 tcg_src, TCGv_i64 tcg_rnd, + bool accumulate, bool is_u, int size, + int shift) { bool extended_result = false; bool round = tcg_rnd != NULL; @@ -8243,13 +8809,11 @@ static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_ if (!is_u) { /* take care of sign extending tcg_res */ tcg_gen_sari_i64(tcg_ctx, tcg_src_hi, tcg_src, 63); - tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, - tcg_src, tcg_src_hi, - tcg_rnd, tcg_zero); + tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, tcg_src, + tcg_src_hi, tcg_rnd, tcg_zero); } else { - tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, - tcg_src, tcg_zero, - tcg_rnd, tcg_zero); + tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, tcg_src, + tcg_zero, tcg_rnd, tcg_zero); } tcg_temp_free_i64(tcg_ctx, tcg_zero); } else { @@ -8298,9 +8862,8 @@ static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_ } /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ -static void handle_scalar_simd_shri(DisasContext *s, - bool is_u, int immh, int immb, - int opcode, int rn, int rd) +static void handle_scalar_simd_shri(DisasContext *s, bool is_u, int immh, + int immb, int opcode, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; const int size = 3; @@ -8345,7 +8908,8 @@ static void handle_scalar_simd_shri(DisasContext *s, } tcg_rn = read_fp_dreg(s, rn); - tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(tcg_ctx); + tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) + : tcg_temp_new_i64(tcg_ctx); if (insert) { /* shift count same as element size is valid but does nothing; @@ -8354,11 +8918,12 @@ static void handle_scalar_simd_shri(DisasContext *s, int esize = 8 << size; if (shift != esize) { tcg_gen_shri_i64(tcg_ctx, tcg_rn, tcg_rn, shift); - tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); + tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, 0, + esize - shift); } } else { - handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); + handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, accumulate, + is_u, size, shift); } write_fp_dreg(s, rd, tcg_rd); @@ -8371,9 +8936,8 @@ static void handle_scalar_simd_shri(DisasContext *s, } /* SHL/SLI - Scalar shift left */ -static void handle_scalar_simd_shli(DisasContext *s, bool insert, - int immh, int immb, int opcode, - int rn, int rd) +static void handle_scalar_simd_shli(DisasContext *s, bool insert, int immh, + int immb, int opcode, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = 32 - clz32(immh) - 1; @@ -8409,9 +8973,8 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert, /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with * (signed/unsigned) narrowing */ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, - bool is_u_shift, bool is_u_narrow, - int immh, int immb, int opcode, - int rn, int rd) + bool is_u_shift, bool is_u_narrow, int immh, + int immb, int opcode, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int immhb = immh << 3 | immb; @@ -8425,21 +8988,15 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, TCGv_i32 tcg_rd_narrowed; TCGv_i64 tcg_final; - static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { - { gen_helper_neon_narrow_sat_s8, - gen_helper_neon_unarrow_sat8 }, - { gen_helper_neon_narrow_sat_s16, - gen_helper_neon_unarrow_sat16 }, - { gen_helper_neon_narrow_sat_s32, - gen_helper_neon_unarrow_sat32 }, - { NULL, NULL }, - }; - static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { - gen_helper_neon_narrow_sat_u8, - gen_helper_neon_narrow_sat_u16, - gen_helper_neon_narrow_sat_u32, - NULL + static NeonGenNarrowEnvFn *const signed_narrow_fns[4][2] = { + {gen_helper_neon_narrow_sat_s8, gen_helper_neon_unarrow_sat8}, + {gen_helper_neon_narrow_sat_s16, gen_helper_neon_unarrow_sat16}, + {gen_helper_neon_narrow_sat_s32, gen_helper_neon_unarrow_sat32}, + {NULL, NULL}, }; + static NeonGenNarrowEnvFn *const unsigned_narrow_fns[4] = { + gen_helper_neon_narrow_sat_u8, gen_helper_neon_narrow_sat_u16, + gen_helper_neon_narrow_sat_u32, NULL}; NeonGenNarrowEnvFn *narrowfn; int i; @@ -8475,11 +9032,12 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, for (i = 0; i < elements; i++) { read_vec_element(s, tcg_rn, rn, i, ldop); - handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, - false, is_u_shift, size+1, shift); + handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, false, + is_u_shift, size + 1, shift); narrowfn(tcg_ctx, tcg_rd_narrowed, tcg_ctx->cpu_env, tcg_rd); tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_rd_narrowed); - tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize); + tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, + esize); } if (!is_q) { @@ -8501,8 +9059,8 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, - bool src_unsigned, bool dst_unsigned, - int immh, int immb, int rn, int rd) + bool src_unsigned, bool dst_unsigned, int immh, + int immb, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int immhb = immh << 3 | immb; @@ -8544,9 +9102,9 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, if (size == 3) { TCGv_i64 tcg_shift = tcg_const_i64(tcg_ctx, shift); - static NeonGenTwo64OpEnvFn * const fns[2][2] = { - { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, - { NULL, gen_helper_neon_qshl_u64 }, + static NeonGenTwo64OpEnvFn *const fns[2][2] = { + {gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64}, + {NULL, gen_helper_neon_qshl_u64}, }; NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; int maxpass = is_q ? 2 : 1; @@ -8564,21 +9122,14 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, clear_vec_high(s, is_q, rd); } else { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, shift); - static NeonGenTwoOpEnvFn * const fns[2][2][3] = { - { - { gen_helper_neon_qshl_s8, - gen_helper_neon_qshl_s16, - gen_helper_neon_qshl_s32 }, - { gen_helper_neon_qshlu_s8, - gen_helper_neon_qshlu_s16, - gen_helper_neon_qshlu_s32 } - }, { - { NULL, NULL, NULL }, - { gen_helper_neon_qshl_u8, - gen_helper_neon_qshl_u16, - gen_helper_neon_qshl_u32 } - } - }; + static NeonGenTwoOpEnvFn *const fns[2][2][3] = { + {{gen_helper_neon_qshl_s8, gen_helper_neon_qshl_s16, + gen_helper_neon_qshl_s32}, + {gen_helper_neon_qshlu_s8, gen_helper_neon_qshlu_s16, + gen_helper_neon_qshlu_s32}}, + {{NULL, NULL, NULL}, + {gen_helper_neon_qshl_u8, gen_helper_neon_qshl_u16, + gen_helper_neon_qshl_u32}}}; NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; MemOp memop = scalar ? size : MO_32; int maxpass = scalar ? 1 : is_q ? 4 : 2; @@ -8618,8 +9169,8 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, /* Common vector code for handling integer to FP conversion */ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, - int elements, int is_signed, - int fracbits, int size) + int elements, int is_signed, int fracbits, + int size) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx, size == MO_16); @@ -8640,11 +9191,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, read_vec_element(s, tcg_int64, rn, pass, mop); if (is_signed) { - gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int64, - tcg_shift, tcg_fpst); + gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int64, tcg_shift, + tcg_fpst); } else { - gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int64, - tcg_shift, tcg_fpst); + gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int64, tcg_shift, + tcg_fpst); } if (elements == 1) { write_fp_dreg(s, rd, tcg_double); @@ -8675,9 +9226,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, } } else { if (is_signed) { - gen_helper_vfp_sitos(tcg_ctx, tcg_float, tcg_int32, tcg_fpst); + gen_helper_vfp_sitos(tcg_ctx, tcg_float, tcg_int32, + tcg_fpst); } else { - gen_helper_vfp_uitos(tcg_ctx, tcg_float, tcg_int32, tcg_fpst); + gen_helper_vfp_uitos(tcg_ctx, tcg_float, tcg_int32, + tcg_fpst); } } break; @@ -8692,9 +9245,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, } } else { if (is_signed) { - gen_helper_vfp_sitoh(tcg_ctx, tcg_float, tcg_int32, tcg_fpst); + gen_helper_vfp_sitoh(tcg_ctx, tcg_float, tcg_int32, + tcg_fpst); } else { - gen_helper_vfp_uitoh(tcg_ctx, tcg_float, tcg_int32, tcg_fpst); + gen_helper_vfp_uitoh(tcg_ctx, tcg_float, tcg_int32, + tcg_fpst); } } break; @@ -8723,9 +9278,8 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, /* UCVTF/SCVTF - Integer to FP conversion */ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, - bool is_q, bool is_u, - int immh, int immb, int opcode, - int rn, int rd) + bool is_q, bool is_u, int immh, + int immb, int opcode, int rn, int rd) { int size, elements, fracbits; int immhb = immh << 3 | immb; @@ -8767,8 +9321,8 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, - bool is_q, bool is_u, - int immh, int immb, int rn, int rd) + bool is_q, bool is_u, int immh, + int immb, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int immhb = immh << 3 | immb; @@ -8817,9 +9371,11 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, read_vec_element(s, tcg_op, rn, pass, MO_64); if (is_u) { - gen_helper_vfp_touqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + gen_helper_vfp_touqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, + tcg_fpstatus); } else { - gen_helper_vfp_tosqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + gen_helper_vfp_tosqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, + tcg_fpstatus); } write_vec_element(s, tcg_op, rd, pass, MO_64); tcg_temp_free_i64(tcg_ctx, tcg_op); @@ -8910,8 +9466,8 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); break; case 0x1c: /* SCVTF, UCVTF */ - handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, - opcode, rn, rd); + handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, opcode, + rn, rd); break; case 0x10: /* SQSHRUN, SQSHRUN2 */ case 0x11: /* SQRSHRUN, SQRSHRUN2 */ @@ -8919,13 +9475,13 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - handle_vec_simd_sqshrn(s, true, false, false, true, - immh, immb, opcode, rn, rd); + handle_vec_simd_sqshrn(s, true, false, false, true, immh, immb, opcode, + rn, rd); break; case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ - handle_vec_simd_sqshrn(s, true, false, is_u, is_u, - immh, immb, opcode, rn, rd); + handle_vec_simd_sqshrn(s, true, false, is_u, is_u, immh, immb, opcode, + rn, rd); break; case 0xc: /* SQSHLU */ if (!is_u) { @@ -8994,7 +9550,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); tcg_gen_mul_i64(tcg_ctx, tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res); + gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, + tcg_res, tcg_res); switch (opcode) { case 0xd: /* SQDMULL, SQDMULL2 */ @@ -9004,8 +9561,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) /* fall through */ case 0x9: /* SQDMLAL, SQDMLAL2 */ read_vec_element(s, tcg_op1, rd, 0, MO_64); - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_res, tcg_op1); + gen_helper_neon_addl_saturate_s64( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_op1); break; default: g_assert_not_reached(); @@ -9022,7 +9579,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx); gen_helper_neon_mull_s16(tcg_ctx, tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res); + gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, + tcg_res, tcg_res); switch (opcode) { case 0xd: /* SQDMULL, SQDMULL2 */ @@ -9034,8 +9592,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) { TCGv_i64 tcg_op3 = tcg_temp_new_i64(tcg_ctx); read_vec_element(s, tcg_op3, rd, 0, MO_32); - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_res, tcg_op3); + gen_helper_neon_addl_saturate_s32( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_op3); tcg_temp_free_i64(tcg_ctx, tcg_op3); break; } @@ -9066,16 +9624,20 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, switch (opcode) { case 0x1: /* SQADD */ if (u) { - gen_helper_neon_qadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } else { - gen_helper_neon_qadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } break; case 0x5: /* SQSUB */ if (u) { - gen_helper_neon_qsub_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qsub_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } else { - gen_helper_neon_qsub_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qsub_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } break; case 0x6: /* CMGT, CMHI */ @@ -9106,9 +9668,11 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, break; case 0x9: /* SQSHL, UQSHL */ if (u) { - gen_helper_neon_qshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } else { - gen_helper_neon_qshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } break; case 0xa: /* SRSHL, URSHL */ @@ -9120,9 +9684,11 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, break; case 0xb: /* SQRSHL, UQRSHL */ if (u) { - gen_helper_neon_qrshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qrshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } else { - gen_helper_neon_qrshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qrshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } break; case 0x10: /* ADD, SUB */ @@ -9169,7 +9735,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements, tcg_res, fpst); break; case 0x18: /* FMAXNM */ - gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1a: /* FADD */ gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9178,7 +9745,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_mulxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x1c: /* FCMEQ */ - gen_helper_neon_ceq_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_ceq_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1e: /* FMAX */ gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9187,7 +9755,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_recpsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x38: /* FMINNM */ - gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x3a: /* FSUB */ gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9196,16 +9765,19 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x3f: /* FRSQRTS */ - gen_helper_rsqrtsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_rsqrtsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5b: /* FMUL */ gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x5c: /* FCMGE */ - gen_helper_neon_cge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_cge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5d: /* FACGE */ - gen_helper_neon_acge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_acge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5f: /* FDIV */ gen_helper_vfp_divd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9215,10 +9787,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_absd(tcg_ctx, tcg_res, tcg_res); break; case 0x7c: /* FCMGT */ - gen_helper_neon_cgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_cgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x7d: /* FACGT */ - gen_helper_neon_acgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_acgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -9255,7 +9829,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x1c: /* FCMEQ */ - gen_helper_neon_ceq_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_ceq_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1e: /* FMAX */ gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9264,10 +9839,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_recpsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x18: /* FMAXNM */ - gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x38: /* FMINNM */ - gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x3a: /* FSUB */ gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9276,16 +9853,19 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x3f: /* FRSQRTS */ - gen_helper_rsqrtsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_rsqrtsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5b: /* FMUL */ gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x5c: /* FCMGE */ - gen_helper_neon_cge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_cge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5d: /* FACGE */ - gen_helper_neon_acge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_acge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5f: /* FDIV */ gen_helper_vfp_divs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9295,10 +9875,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_res); break; case 0x7c: /* FCMGT */ - gen_helper_neon_cgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_cgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x7d: /* FACGT */ - gen_helper_neon_acgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_acgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -9376,10 +9958,10 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) case 0x9: /* SQSHL, UQSHL */ case 0xb: /* SQRSHL, UQRSHL */ break; - case 0x8: /* SSHL, USHL */ - case 0xa: /* SRSHL, URSHL */ - case 0x6: /* CMGT, CMHI */ - case 0x7: /* CMGE, CMHS */ + case 0x8: /* SSHL, USHL */ + case 0xa: /* SRSHL, URSHL */ + case 0x6: /* CMGT, CMHI */ + case 0x7: /* CMGE, CMHS */ case 0x11: /* CMTST, CMEQ */ case 0x10: /* ADD, SUB (vector) */ if (size != 3) { @@ -9429,49 +10011,49 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) switch (opcode) { case 0x1: /* SQADD, UQADD */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, - { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, - { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8}, + {gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16}, + {gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32}, }; genenvfn = fns[size][u]; break; } case 0x5: /* SQSUB, UQSUB */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, - { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, - { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8}, + {gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16}, + {gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32}, }; genenvfn = fns[size][u]; break; } case 0x9: /* SQSHL, UQSHL */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, - { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, - { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8}, + {gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16}, + {gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32}, }; genenvfn = fns[size][u]; break; } case 0xb: /* SQRSHL, UQRSHL */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, - { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, - { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8}, + {gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16}, + {gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32}, }; genenvfn = fns[size][u]; break; } case 0x16: /* SQDMULH, SQRDMULH */ { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, + static NeonGenTwoOpEnvFn *const fns[2][2] = { + {gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16}, + {gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32}, }; assert(size == 1 || size == 2); genenvfn = fns[size - 1][u]; @@ -9511,7 +10093,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, int rm = extract32(insn, 16, 5); bool u = extract32(insn, 29, 1); bool a = extract32(insn, 23, 1); - int fpopcode = opcode | (a << 3) | (u << 4); + int fpopcode = opcode | (a << 3) | (u << 4); TCGv_ptr fpst; TCGv_i32 tcg_op1; TCGv_i32 tcg_op2; @@ -9582,7 +10164,6 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, write_fp_sreg(s, rd, tcg_res); - tcg_temp_free_i32(tcg_ctx, tcg_res); tcg_temp_free_i32(tcg_ctx, tcg_op1); tcg_temp_free_i32(tcg_ctx, tcg_op2); @@ -9647,16 +10228,20 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, switch (opcode) { case 0x0: /* SQRDMLAH */ if (size == 1) { - gen_helper_neon_qrdmlah_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlah_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, + ele2, ele3); } else { - gen_helper_neon_qrdmlah_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlah_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, + ele2, ele3); } break; case 0x1: /* SQRDMLSH */ if (size == 1) { - gen_helper_neon_qrdmlsh_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlsh_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, + ele2, ele3); } else { - gen_helper_neon_qrdmlsh_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlsh_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, + ele2, ele3); } break; default: @@ -9785,9 +10370,9 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, } } -static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, - bool is_scalar, bool is_u, bool is_q, - int size, int rn, int rd) +static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, bool is_scalar, + bool is_u, bool is_q, int size, int rn, + int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; bool is_double = (size == MO_64); @@ -9803,7 +10388,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx); TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0); TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx); - NeonGenTwoDoubleOPFn *genfn = NULL; + NeonGenTwoDoubleOpFn *genfn = NULL; bool swap = false; int pass; @@ -9845,7 +10430,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx); TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0); TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx); - NeonGenTwoSingleOPFn *genfn = NULL; + NeonGenTwoSingleOpFn *genfn = NULL; bool swap = false; int pass, maxpasses; @@ -9922,9 +10507,9 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, tcg_temp_free_ptr(tcg_ctx, fpst); } -static void handle_2misc_reciprocal(DisasContext *s, int opcode, - bool is_scalar, bool is_u, bool is_q, - int size, int rn, int rd) +static void handle_2misc_reciprocal(DisasContext *s, int opcode, bool is_scalar, + bool is_u, bool is_q, int size, int rn, + int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; bool is_double = (size == 3); @@ -9971,7 +10556,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, switch (opcode) { case 0x3c: /* URECPE */ - gen_helper_recpe_u32(tcg_ctx, tcg_res, tcg_op, fpst); + gen_helper_recpe_u32(tcg_ctx, tcg_res, tcg_op); break; case 0x3d: /* FRECPE */ gen_helper_recpe_f32(tcg_ctx, tcg_res, tcg_op, fpst); @@ -10001,9 +10586,8 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, tcg_temp_free_ptr(tcg_ctx, fpst); } -static void handle_2misc_narrow(DisasContext *s, bool scalar, - int opcode, bool u, bool is_q, - int size, int rn, int rd) +static void handle_2misc_narrow(DisasContext *s, bool scalar, int opcode, + bool u, bool is_q, int size, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; /* Handle 2-reg-misc ops which are narrowing (so each 2*size element @@ -10033,12 +10617,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, switch (opcode) { case 0x12: /* XTN, SQXTUN */ { - static NeonGenNarrowFn * const xtnfns[3] = { + static NeonGenNarrowFn *const xtnfns[3] = { gen_helper_neon_narrow_u8, gen_helper_neon_narrow_u16, tcg_gen_extrl_i64_i32, }; - static NeonGenNarrowEnvFn * const sqxtunfns[3] = { + static NeonGenNarrowEnvFn *const sqxtunfns[3] = { gen_helper_neon_unarrow_sat8, gen_helper_neon_unarrow_sat16, gen_helper_neon_unarrow_sat32, @@ -10052,13 +10636,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, } case 0x14: /* SQXTN, UQXTN */ { - static NeonGenNarrowEnvFn * const fns[3][2] = { - { gen_helper_neon_narrow_sat_s8, - gen_helper_neon_narrow_sat_u8 }, - { gen_helper_neon_narrow_sat_s16, - gen_helper_neon_narrow_sat_u16 }, - { gen_helper_neon_narrow_sat_s32, - gen_helper_neon_narrow_sat_u32 }, + static NeonGenNarrowEnvFn *const fns[3][2] = { + {gen_helper_neon_narrow_sat_s8, gen_helper_neon_narrow_sat_u8}, + {gen_helper_neon_narrow_sat_s16, + gen_helper_neon_narrow_sat_u16}, + {gen_helper_neon_narrow_sat_s32, + gen_helper_neon_narrow_sat_u32}, }; genenvfn = fns[size][u]; break; @@ -10066,7 +10649,8 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, case 0x16: /* FCVTN, FCVTN2 */ /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ if (size == 2) { - gen_helper_vfp_fcvtsd(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env); + gen_helper_vfp_fcvtsd(tcg_ctx, tcg_res[pass], tcg_op, + tcg_ctx->cpu_env); } else { TCGv_i32 tcg_lo = tcg_temp_new_i32(tcg_ctx); TCGv_i32 tcg_hi = tcg_temp_new_i32(tcg_ctx); @@ -10074,21 +10658,25 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, TCGv_i32 ahp = get_ahp_flag(tcg_ctx); tcg_gen_extr_i64_i32(tcg_ctx, tcg_lo, tcg_hi, tcg_op); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_lo, tcg_lo, fpst, ahp); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_hi, tcg_hi, fpst, ahp); - tcg_gen_deposit_i32(tcg_ctx, tcg_res[pass], tcg_lo, tcg_hi, 16, 16); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_lo, tcg_lo, fpst, + ahp); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_hi, tcg_hi, fpst, + ahp); + tcg_gen_deposit_i32(tcg_ctx, tcg_res[pass], tcg_lo, tcg_hi, 16, + 16); tcg_temp_free_i32(tcg_ctx, tcg_lo); tcg_temp_free_i32(tcg_ctx, tcg_hi); tcg_temp_free_ptr(tcg_ctx, fpst); tcg_temp_free_i32(tcg_ctx, ahp); } break; - case 0x56: /* FCVTXN, FCVTXN2 */ + case 0x56: /* FCVTXN, FCVTXN2 */ /* 64 bit to 32 bit float conversion * with von Neumann rounding (round to odd) */ assert(size == 2); - gen_helper_fcvtx_f64_to_f32(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env); + gen_helper_fcvtx_f64_to_f32(tcg_ctx, tcg_res[pass], tcg_op, + tcg_ctx->cpu_env); break; default: g_assert_not_reached(); @@ -10127,9 +10715,11 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, read_vec_element(s, tcg_rd, rd, pass, MO_64); if (is_u) { /* USQADD */ - gen_helper_neon_uqadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); } else { /* SUQADD */ - gen_helper_neon_sqadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); } write_vec_element(s, tcg_rd, rd, pass, MO_64); } @@ -10159,13 +10749,16 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, if (is_u) { /* USQADD */ switch (size) { case 0: - gen_helper_neon_uqadd_s8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; case 1: - gen_helper_neon_uqadd_s16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; case 2: - gen_helper_neon_uqadd_s32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; default: g_assert_not_reached(); @@ -10173,13 +10766,16 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, } else { /* SUQADD */ switch (size) { case 0: - gen_helper_neon_sqadd_u8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; case 1: - gen_helper_neon_sqadd_u16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; case 2: - gen_helper_neon_sqadd_u32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; default: g_assert_not_reached(); @@ -10369,10 +10965,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7: /* SQABS, SQNEG */ { NeonGenOneOpEnvFn *genfn; - static NeonGenOneOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, - { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, - { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, + static NeonGenOneOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8}, + {gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16}, + {gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32}, }; genfn = fns[size][u]; genfn(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn); @@ -10385,7 +10981,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x3b: /* FCVTZS */ { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0); - gen_helper_vfp_tosls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + gen_helper_vfp_tosls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, + tcg_fpstatus); tcg_temp_free_i32(tcg_ctx, tcg_shift); break; } @@ -10396,7 +10993,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7b: /* FCVTZU */ { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0); - gen_helper_vfp_touls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + gen_helper_vfp_touls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, + tcg_fpstatus); tcg_temp_free_i32(tcg_ctx, tcg_shift); break; } @@ -10424,16 +11022,7 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = 2 * (8 << size) - immhb; - bool accumulate = false; - int dsize = is_q ? 128 : 64; - int esize = 8 << size; - int elements = dsize/esize; - MemOp memop = size | (is_u ? 0 : MO_SIGN); - TCGv_i64 tcg_rn = new_tmp_a64(s); - TCGv_i64 tcg_rd = new_tmp_a64(s); - TCGv_i64 tcg_round; - uint64_t round_const; - int i; + GVecGen2iFn *gvec_fn; if (extract32(immh, 3, 1) && !is_q) { unallocated_encoding(s); @@ -10447,73 +11036,44 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, switch (opcode) { case 0x02: /* SSRA / USRA (accumulate) */ - if (is_u) { - /* Shift count same as element size produces zero to add. */ - if (shift == 8 << size) { - goto done; - } - gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]); - } else { - /* Shift count same as element size produces all sign to add. */ - if (shift == 8 << size) { - shift -= 1; - } - gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]); - } - return; + gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; + break; + case 0x08: /* SRI */ - /* Shift count same as element size is valid but does nothing. */ - if (shift == 8 << size) { - goto done; - } - gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]); - return; + gvec_fn = gen_gvec_sri; + break; case 0x00: /* SSHR / USHR */ if (is_u) { if (shift == 8 << size) { /* Shift count the same size as element size produces zero. */ - tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(s, rd), - is_q ? 16 : 8, vec_full_reg_size(s), 0); - } else { - gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size); + tcg_gen_gvec_dup_imm(tcg_ctx, size, vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), 0); + return; } + gvec_fn = tcg_gen_gvec_shri; } else { /* Shift count the same size as element size produces all sign. */ if (shift == 8 << size) { shift -= 1; } - gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size); + gvec_fn = tcg_gen_gvec_sari; } - return; + break; case 0x04: /* SRSHR / URSHR (rounding) */ + gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; break; + case 0x06: /* SRSRA / URSRA (accum + rounding) */ - accumulate = true; + gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; + break; default: g_assert_not_reached(); } - round_const = 1ULL << (shift - 1); - tcg_round = tcg_const_i64(tcg_ctx, round_const); - - for (i = 0; i < elements; i++) { - read_vec_element(s, tcg_rn, rn, i, memop); - if (accumulate) { - read_vec_element(s, tcg_rd, rd, i, memop); - } - - handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); - - write_vec_element(s, tcg_rd, rd, i, size); - } - tcg_temp_free_i64(tcg_ctx, tcg_round); - - done: - clear_vec_high(s, is_q, rd); + gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); } /* SHL/SLI - Vector shift left */ @@ -10537,7 +11097,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, } if (insert) { - gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]); + gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); } else { gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); } @@ -10545,7 +11105,8 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, /* USHLL/SHLL - Vector shift left with widening */ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, - int immh, int immb, int opcode, int rn, int rd) + int immh, int immb, int opcode, int rn, + int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = 32 - clz32(immh) - 1; @@ -10553,7 +11114,7 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, int shift = immhb - (8 << size); int dsize = 64; int esize = 8 << size; - int elements = dsize/esize; + int elements = dsize / esize; TCGv_i64 tcg_rn = new_tmp_a64(s); TCGv_i64 tcg_rd = new_tmp_a64(s); int i; @@ -10582,15 +11143,15 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, } /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ -static void handle_vec_simd_shrn(DisasContext *s, bool is_q, - int immh, int immb, int opcode, int rn, int rd) +static void handle_vec_simd_shrn(DisasContext *s, bool is_q, int immh, int immb, + int opcode, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int immhb = immh << 3 | immb; int size = 32 - clz32(immh) - 1; int dsize = 64; int esize = 8 << size; - int elements = dsize/esize; + int elements = dsize / esize; int shift = (2 * esize) - immhb; bool round = extract32(opcode, 0, 1); TCGv_i64 tcg_rn, tcg_rd, tcg_final; @@ -10619,11 +11180,12 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q, } for (i = 0; i < elements; i++) { - read_vec_element(s, tcg_rn, rn, i, size+1); - handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, - false, true, size+1, shift); + read_vec_element(s, tcg_rn, rn, i, size + 1); + handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, false, true, + size + 1, shift); - tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize); + tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, + esize); } if (!is_q) { @@ -10641,7 +11203,6 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q, clear_vec_high(s, is_q, rd); } - /* AdvSIMD shift by immediate * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 * +---+---+---+-------------+------+------+--------+---+------+------+ @@ -10688,15 +11249,15 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) break; case 0x12: /* SQSHRN / UQSHRN */ case 0x13: /* SQRSHRN / UQRSHRN */ - handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, - opcode, rn, rd); + handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, opcode, + rn, rd); break; case 0x14: /* SSHLL / USHLL */ handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); break; case 0x1c: /* SCVTF / UCVTF */ - handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, - opcode, rn, rd); + handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, opcode, + rn, rd); break; case 0xc: /* SQSHLU */ if (!is_u) { @@ -10720,13 +11281,13 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) /* Generate code to do a "long" addition or subtraction, ie one done in * TCGv_i64 on vector lanes twice the width specified by size. */ -static void gen_neon_addl(TCGContext *tcg_ctx, int size, bool is_sub, TCGv_i64 tcg_res, - TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) +static void gen_neon_addl(TCGContext *tcg_ctx, int size, bool is_sub, + TCGv_i64 tcg_res, TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) { - static NeonGenTwo64OpFn * const fns[3][2] = { - { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, - { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, - { tcg_gen_add_i64, tcg_gen_sub_i64 }, + static NeonGenTwo64OpFn *const fns[3][2] = { + {gen_helper_neon_addl_u16, gen_helper_neon_subl_u16}, + {gen_helper_neon_addl_u32, gen_helper_neon_subl_u32}, + {tcg_gen_add_i64, tcg_gen_sub_i64}, }; NeonGenTwo64OpFn *genfn; assert(size < 3); @@ -10806,23 +11367,24 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, tcg_gen_sub_i64(tcg_ctx, tcg_tmp1, tcg_op1, tcg_op2); tcg_gen_sub_i64(tcg_ctx, tcg_tmp2, tcg_op2, tcg_op1); tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_GEU : TCG_COND_GE, - tcg_passres, - tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); + tcg_passres, tcg_op1, tcg_op2, tcg_tmp1, + tcg_tmp2); tcg_temp_free_i64(tcg_ctx, tcg_tmp1); tcg_temp_free_i64(tcg_ctx, tcg_tmp2); break; } - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); break; - case 9: /* SQDMLAL, SQDMLAL2 */ + case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, - tcg_passres, tcg_passres); + gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, + tcg_ctx->cpu_env, tcg_passres, + tcg_passres); break; default: g_assert_not_reached(); @@ -10833,12 +11395,15 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, if (accop < 0) { tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres); } - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], + tcg_ctx->cpu_env, tcg_res[pass], tcg_passres); } else if (accop > 0) { - tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres); + tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], + tcg_passres); } else if (accop < 0) { - tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres); + tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], + tcg_passres); } if (accop != 0) { @@ -10870,9 +11435,9 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ { TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(tcg_ctx); - static NeonGenWidenFn * const widenfns[2][2] = { - { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, - { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, + static NeonGenWidenFn *const widenfns[2][2] = { + {gen_helper_neon_widen_s8, gen_helper_neon_widen_u8}, + {gen_helper_neon_widen_s16, gen_helper_neon_widen_u16}, }; NeonGenWidenFn *widenfn = widenfns[size][is_u]; @@ -10887,42 +11452,52 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ if (size == 0) { if (is_u) { - gen_helper_neon_abdl_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_abdl_u16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } else { - gen_helper_neon_abdl_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_abdl_s16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } } else { if (is_u) { - gen_helper_neon_abdl_u32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_abdl_u32(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } else { - gen_helper_neon_abdl_s32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_abdl_s32(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } } break; - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ if (size == 0) { if (is_u) { - gen_helper_neon_mull_u8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_mull_u8(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } else { - gen_helper_neon_mull_s8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_mull_s8(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } } else { if (is_u) { - gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } else { - gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } } break; - case 9: /* SQDMLAL, SQDMLAL2 */ + case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ assert(size == 1); - gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, - tcg_passres, tcg_passres); + gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); + gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, + tcg_ctx->cpu_env, tcg_passres, + tcg_passres); break; default: g_assert_not_reached(); @@ -10934,11 +11509,12 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, if (opcode == 9 || opcode == 11) { /* saturating accumulate ops */ if (accop < 0) { - gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres); + gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, + tcg_passres); } - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, - tcg_res[pass], - tcg_passres); + gen_helper_neon_addl_saturate_s32( + tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass], + tcg_passres); } else { gen_neon_addl(tcg_ctx, size, (accop < 0), tcg_res[pass], tcg_res[pass], tcg_passres); @@ -10966,10 +11542,10 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx); TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx); TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(tcg_ctx); - static NeonGenWidenFn * const widenfns[3][2] = { - { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, - { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, - { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, + static NeonGenWidenFn *const widenfns[3][2] = { + {gen_helper_neon_widen_s8, gen_helper_neon_widen_u8}, + {gen_helper_neon_widen_s16, gen_helper_neon_widen_u16}, + {tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64}, }; NeonGenWidenFn *widenfn = widenfns[size][is_u]; @@ -10978,8 +11554,8 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, widenfn(tcg_ctx, tcg_op2_wide, tcg_op2); tcg_temp_free_i32(tcg_ctx, tcg_op2); tcg_res[pass] = tcg_temp_new_i64(tcg_ctx); - gen_neon_addl(tcg_ctx, size, (opcode == 3), - tcg_res[pass], tcg_op1, tcg_op2_wide); + gen_neon_addl(tcg_ctx, size, (opcode == 3), tcg_res[pass], tcg_op1, + tcg_op2_wide); tcg_temp_free_i64(tcg_ctx, tcg_op1); tcg_temp_free_i64(tcg_ctx, tcg_op2_wide); } @@ -10990,7 +11566,8 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, } } -static void do_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i64 in) +static void do_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, + TCGv_i64 in) { tcg_gen_addi_i64(tcg_ctx, in, in, 1U << 31); tcg_gen_extrh_i64_i32(tcg_ctx, res, in); @@ -11008,19 +11585,20 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx); TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx); TCGv_i64 tcg_wideres = tcg_temp_new_i64(tcg_ctx); - static NeonGenNarrowFn * const narrowfns[3][2] = { - { gen_helper_neon_narrow_high_u8, - gen_helper_neon_narrow_round_high_u8 }, - { gen_helper_neon_narrow_high_u16, - gen_helper_neon_narrow_round_high_u16 }, - { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, + static NeonGenNarrowFn *const narrowfns[3][2] = { + {gen_helper_neon_narrow_high_u8, + gen_helper_neon_narrow_round_high_u8}, + {gen_helper_neon_narrow_high_u16, + gen_helper_neon_narrow_round_high_u16}, + {tcg_gen_extrh_i64_i32, do_narrow_round_high_u32}, }; NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; read_vec_element(s, tcg_op1, rn, pass, MO_64); read_vec_element(s, tcg_op2, rm, pass, MO_64); - gen_neon_addl(tcg_ctx, size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); + gen_neon_addl(tcg_ctx, size, (opcode == 6), tcg_wideres, tcg_op1, + tcg_op2); tcg_temp_free_i64(tcg_ctx, tcg_op1); tcg_temp_free_i64(tcg_ctx, tcg_op2); @@ -11121,7 +11699,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) break; } return; - case 9: /* SQDMLAL, SQDMLAL2 */ + case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ if (is_u || size == 0) { @@ -11129,11 +11707,11 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) return; } /* fall through */ - case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ - case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ - case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ - case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ + case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ + case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ + case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ /* 64 x 64 -> 128 */ @@ -11243,19 +11821,24 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2); break; case 0x58: /* FMAXNMP */ - gen_helper_vfp_maxnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x5a: /* FADDP */ - gen_helper_vfp_addd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_addd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x5e: /* FMAXP */ - gen_helper_vfp_maxd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x78: /* FMINNMP */ - gen_helper_vfp_minnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x7e: /* FMINP */ - gen_helper_vfp_mind(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_mind(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -11287,7 +11870,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, switch (opcode) { case 0x17: /* ADDP */ { - static NeonGenTwoOpFn * const fns[3] = { + static NeonGenTwoOpFn *const fns[3] = { gen_helper_neon_padd_u8, gen_helper_neon_padd_u16, tcg_gen_add_i32, @@ -11297,39 +11880,44 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, } case 0x14: /* SMAXP, UMAXP */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, - { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, - { tcg_gen_smax_i32, tcg_gen_umax_i32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8}, + {gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16}, + {tcg_gen_smax_i32, tcg_gen_umax_i32}, }; genfn = fns[size][u]; break; } case 0x15: /* SMINP, UMINP */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, - { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, - { tcg_gen_smin_i32, tcg_gen_umin_i32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8}, + {gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16}, + {tcg_gen_smin_i32, tcg_gen_umin_i32}, }; genfn = fns[size][u]; break; } /* The FP operations are all on single floats (32 bit) */ case 0x58: /* FMAXNMP */ - gen_helper_vfp_maxnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x5a: /* FADDP */ - gen_helper_vfp_adds(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_adds(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x5e: /* FMAXP */ - gen_helper_vfp_maxs(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxs(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x78: /* FMINNMP */ - gen_helper_vfp_minnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x7e: /* FMINP */ - gen_helper_vfp_mins(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_mins(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -11364,9 +11952,8 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) * together indicate the operation. size[0] indicates single * or double. */ - int fpopcode = extract32(insn, 11, 5) - | (extract32(insn, 23, 1) << 5) - | (extract32(insn, 29, 1) << 6); + int fpopcode = extract32(insn, 11, 5) | (extract32(insn, 23, 1) << 5) | + (extract32(insn, 29, 1) << 6); int is_q = extract32(insn, 30, 1); int size = extract32(insn, 22, 1); int rm = extract32(insn, 16, 5); @@ -11392,8 +11979,8 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, - rn, rm, rd); + handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, rn, + rm, rd); return; case 0x1b: /* FMULX */ case 0x1f: /* FRECPS */ @@ -11432,11 +12019,10 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) int is_s = extract32(insn, 23, 1); int is_2 = extract32(insn, 29, 1); int data = (is_2 << 1) | is_s; - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, - is_q ? 16 : 8, vec_full_reg_size(s), - data, gen_helper_gvec_fmlal_a64); + tcg_gen_gvec_3_ptr( + tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, is_q ? 16 : 8, + vec_full_reg_size(s), data, gen_helper_gvec_fmlal_a64); } return; @@ -11467,13 +12053,13 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) return; } /* fall through */ - case 0x0: /* SHADD, UHADD */ - case 0x2: /* SRHADD, URHADD */ - case 0x4: /* SHSUB, UHSUB */ - case 0xc: /* SMAX, UMAX */ - case 0xd: /* SMIN, UMIN */ - case 0xe: /* SABD, UABD */ - case 0xf: /* SABA, UABA */ + case 0x0: /* SHADD, UHADD */ + case 0x2: /* SRHADD, URHADD */ + case 0x4: /* SHSUB, UHSUB */ + case 0xc: /* SMAX, UMAX */ + case 0xd: /* SMIN, UMIN */ + case 0xe: /* SABD, UABD */ + case 0xf: /* SABA, UABA */ case 0x12: /* MLA, MLS */ if (size == 3) { unallocated_encoding(s); @@ -11500,24 +12086,25 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) switch (opcode) { case 0x01: /* SQADD, UQADD */ - tcg_gen_gvec_4(tcg_ctx, vec_full_reg_offset(s, rd), - offsetof(CPUARMState, vfp.qc), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - is_q ? 16 : 8, vec_full_reg_size(s), - (u ? uqadd_op : sqadd_op) + size); + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); + } return; case 0x05: /* SQSUB, UQSUB */ - tcg_gen_gvec_4(tcg_ctx, vec_full_reg_offset(s, rd), - offsetof(CPUARMState, vfp.qc), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - is_q ? 16 : 8, vec_full_reg_size(s), - (u ? uqsub_op : sqsub_op) + size); + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); + } return; case 0x08: /* SSHL, USHL */ - gen_gvec_op3(s, is_q, rd, rn, rm, - u ? &ushl_op[size] : &sshl_op[size]); + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); + } return; case 0x0c: /* SMAX, UMAX */ if (u) { @@ -11533,6 +12120,20 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); } return; + case 0xe: /* SABD, UABD */ + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); + } + return; + case 0xf: /* SABA, UABA */ + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); + } + return; case 0x10: /* ADD, SUB */ if (u) { gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); @@ -11540,23 +12141,23 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); } return; - case 0x13: /* MUL, PMUL */ + case 0x13: /* MUL, PMUL */ if (!u) { /* MUL */ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); - } else { /* PMUL */ + } else { /* PMUL */ gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); } return; case 0x12: /* MLA, MLS */ if (u) { - gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]); + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); } else { - gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]); + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); } return; case 0x11: if (!u) { /* CMTST */ - gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]); + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); return; } /* else CMEQ */ @@ -11569,8 +12170,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) cond = u ? TCG_COND_GEU : TCG_COND_GE; do_gvec_cmp: tcg_gen_gvec_cmp(tcg_ctx, cond, size, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); return; } @@ -11607,80 +12207,69 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) switch (opcode) { case 0x0: /* SHADD, UHADD */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, - { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, - { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8}, + {gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16}, + {gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32}, }; genfn = fns[size][u]; break; } case 0x2: /* SRHADD, URHADD */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, - { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, - { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8}, + {gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16}, + {gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32}, }; genfn = fns[size][u]; break; } case 0x4: /* SHSUB, UHSUB */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, - { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, - { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8}, + {gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16}, + {gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32}, }; genfn = fns[size][u]; break; } case 0x9: /* SQSHL, UQSHL */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, - { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, - { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8}, + {gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16}, + {gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32}, }; genenvfn = fns[size][u]; break; } case 0xa: /* SRSHL, URSHL */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, - { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, - { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8}, + {gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16}, + {gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32}, }; genfn = fns[size][u]; break; } case 0xb: /* SQRSHL, UQRSHL */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, - { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, - { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8}, + {gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16}, + {gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32}, }; genenvfn = fns[size][u]; break; } - case 0xe: /* SABD, UABD */ - case 0xf: /* SABA, UABA */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 }, - { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 }, - { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 }, - }; - genfn = fns[size][u]; - break; - } case 0x16: /* SQDMULH, SQRDMULH */ { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, + static NeonGenTwoOpEnvFn *const fns[2][2] = { + {gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16}, + {gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32}, }; assert(size == 1 || size == 2); genenvfn = fns[size - 1][u]; @@ -11696,18 +12285,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn(tcg_ctx, tcg_res, tcg_op1, tcg_op2); } - if (opcode == 0xf) { - /* SABA, UABA: accumulating ops */ - static NeonGenTwoOpFn * const fns[3] = { - gen_helper_neon_add_u8, - gen_helper_neon_add_u16, - tcg_gen_add_i32, - }; - - read_vec_element_i32(s, tcg_op1, rd, pass, MO_32); - fns[size](tcg_ctx, tcg_res, tcg_op1, tcg_res); - } - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); tcg_temp_free_i32(tcg_ctx, tcg_res); @@ -11834,7 +12411,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) rn = extract32(insn, 5, 5); rd = extract32(insn, 0, 5); - fpopcode = opcode | (a << 3) | (u << 4); + fpopcode = opcode | (a << 3) | (u << 4); datasize = is_q ? 128 : 64; elements = datasize / 16; @@ -11866,21 +12443,24 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) switch (fpopcode) { case 0x10: /* FMAXNMP */ - gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, - fpst); + gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; case 0x12: /* FADDP */ - gen_helper_advsimd_addh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_addh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; case 0x16: /* FMAXP */ - gen_helper_advsimd_maxh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; case 0x18: /* FMINNMP */ - gen_helper_advsimd_minnumh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, - fpst); + gen_helper_advsimd_minnumh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; case 0x1e: /* FMINP */ - gen_helper_advsimd_minh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; default: g_assert_not_reached(); @@ -11906,68 +12486,84 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) switch (fpopcode) { case 0x0: /* FMAXNM */ - gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1: /* FMLA */ read_vec_element_i32(s, tcg_res, rd, pass, MO_16); - gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res, - fpst); + gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + tcg_res, fpst); break; case 0x2: /* FADD */ - gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x3: /* FMULX */ - gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x4: /* FCMEQ */ - gen_helper_advsimd_ceq_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_ceq_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x6: /* FMAX */ - gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x7: /* FRECPS */ gen_helper_recpsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x8: /* FMINNM */ - gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x9: /* FMLS */ /* As usual for ARM, separate negation for fused multiply-add */ tcg_gen_xori_i32(tcg_ctx, tcg_op1, tcg_op1, 0x8000); read_vec_element_i32(s, tcg_res, rd, pass, MO_16); - gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res, - fpst); + gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + tcg_res, fpst); break; case 0xa: /* FSUB */ - gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xe: /* FMIN */ - gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xf: /* FRSQRTS */ - gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x13: /* FMUL */ - gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x14: /* FCMGE */ - gen_helper_advsimd_cge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_cge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x15: /* FACGE */ - gen_helper_advsimd_acge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_acge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x17: /* FDIV */ - gen_helper_advsimd_divh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_divh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1a: /* FABD */ - gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); tcg_gen_andi_i32(tcg_ctx, tcg_res, tcg_res, 0x7fff); break; case 0x1c: /* FCMGT */ - gen_helper_advsimd_cgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_cgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1d: /* FACGT */ - gen_helper_advsimd_acgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_acgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; default: fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n", @@ -12028,9 +12624,8 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) case 0x1b: /* FCMLA, #270 */ case 0x1c: /* FCADD, #90 */ case 0x1e: /* FCADD, #270 */ - if (size == 0 - || (size == 1 && !dc_isar_feature(aa64_fp16, s)) - || (size == 3 && !is_q)) { + if (size == 0 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) || + (size == 3 && !is_q)) { unallocated_encoding(s); return; } @@ -12050,29 +12645,11 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) switch (opcode) { case 0x0: /* SQRDMLAH (vector) */ - switch (size) { - case 1: - gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16); - break; - case 2: - gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32); - break; - default: - g_assert_not_reached(); - } + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); return; case 0x1: /* SQRDMLSH (vector) */ - switch (size) { - case 1: - gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16); - break; - case 2: - gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32); - break; - default: - g_assert_not_reached(); - } + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); return; case 0x2: /* SDOT / UDOT */ @@ -12149,7 +12726,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, tcg_res[pass] = tcg_temp_new_i64(tcg_ctx); read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); - gen_helper_vfp_fcvtds(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env); + gen_helper_vfp_fcvtds(tcg_ctx, tcg_res[pass], tcg_op, + tcg_ctx->cpu_env); tcg_temp_free_i32(tcg_ctx, tcg_op); } for (pass = 0; pass < 2; pass++) { @@ -12167,8 +12745,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, tcg_res[pass] = tcg_temp_new_i32(tcg_ctx); read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_res[pass], tcg_res[pass], - fpst, ahp); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_res[pass], + tcg_res[pass], fpst, ahp); } for (pass = 0; pass < 4; pass++) { write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); @@ -12180,8 +12758,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, } } -static void handle_rev(DisasContext *s, int opcode, bool u, - bool is_q, int size, int rn, int rd) +static void handle_rev(DisasContext *s, int opcode, bool u, bool is_q, int size, + int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int op = (opcode << 1) | u; @@ -12237,10 +12815,11 @@ static void handle_rev(DisasContext *s, int opcode, bool u, int off = e_rev * esize; read_vec_element(s, tcg_rn, rn, i, size); if (off >= 64) { - tcg_gen_deposit_i64(tcg_ctx, tcg_rd_hi, tcg_rd_hi, - tcg_rn, off - 64, esize); + tcg_gen_deposit_i64(tcg_ctx, tcg_rd_hi, tcg_rd_hi, tcg_rn, + off - 64, esize); } else { - tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, off, esize); + tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, off, + esize); } } write_vec_element(s, tcg_rd, rd, 0, MO_64); @@ -12290,10 +12869,10 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, } else { for (pass = 0; pass < maxpass; pass++) { TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx); - NeonGenOneOpFn *genfn; - static NeonGenOneOpFn * const fns[2][2] = { - { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, - { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, + NeonGenOne64OpFn *genfn; + static NeonGenOne64OpFn *const fns[2][2] = { + {gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8}, + {gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16}, }; genfn = fns[size][u]; @@ -12334,7 +12913,7 @@ static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) TCGv_i64 tcg_res[2]; for (pass = 0; pass < 2; pass++) { - static NeonGenWidenFn * const widenfns[3] = { + static NeonGenWidenFn *const widenfns[3] = { gen_helper_neon_widen_u8, gen_helper_neon_widen_u16, tcg_gen_extu_i32_i64, @@ -12477,8 +13056,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x1c: case 0x1d: case 0x1e: - case 0x1f: - { + case 0x1f: { /* Floating point: U, size[1] and opcode indicate operation; * size[0] indicates single or double precision. */ @@ -12618,7 +13196,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - need_fpstatus = true; break; case 0x1e: /* FRINT32Z */ case 0x1f: /* FRINT64Z */ @@ -12667,6 +13244,23 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; + case 0x8: /* CMGT, CMGE */ + if (u) { + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); + } else { + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); + } + return; + case 0x9: /* CMEQ, CMLE */ + if (u) { + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); + } else { + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); + } + return; + case 0xa: /* CMLT */ + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); + return; case 0xb: if (u) { /* ABS, NEG */ gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); @@ -12690,8 +13284,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_op, rn, pass, MO_64); - handle_2misc_64(s, opcode, u, tcg_res, tcg_op, - tcg_rmode, tcg_fpstatus); + handle_2misc_64(s, opcode, u, tcg_res, tcg_op, tcg_rmode, + tcg_fpstatus); write_vec_element(s, tcg_res, rd, pass, MO_64); @@ -12704,29 +13298,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) for (pass = 0; pass < (is_q ? 4 : 2); pass++) { TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx); TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx); - TCGCond cond; read_vec_element_i32(s, tcg_op, rn, pass, MO_32); if (size == 2) { /* Special cases for 32 bit elements */ switch (opcode) { - case 0xa: /* CMLT */ - /* 32 bit integer comparison against zero, result is - * test ? (2^32 - 1) : 0. We implement via setcond(test) - * and inverting. - */ - cond = TCG_COND_LT; - do_cmop: - tcg_gen_setcondi_i32(tcg_ctx, cond, tcg_res, tcg_op, 0); - tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_res); - break; - case 0x8: /* CMGT, CMGE */ - cond = u ? TCG_COND_GE : TCG_COND_GT; - goto do_cmop; - case 0x9: /* CMEQ, CMLE */ - cond = u ? TCG_COND_LE : TCG_COND_EQ; - goto do_cmop; case 0x4: /* CLS */ if (u) { tcg_gen_clzi_i32(tcg_ctx, tcg_res, tcg_op, 32); @@ -12736,9 +13313,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) break; case 0x7: /* SQABS, SQNEG */ if (u) { - gen_helper_neon_qneg_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op); + gen_helper_neon_qneg_s32(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op); } else { - gen_helper_neon_qabs_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op); + gen_helper_neon_qabs_s32(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op); } break; case 0x2f: /* FABS */ @@ -12748,7 +13327,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_op); break; case 0x7f: /* FSQRT */ - gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env); + gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, + tcg_ctx->cpu_env); break; case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ @@ -12757,8 +13337,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x3b: /* FCVTZS */ { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0); - gen_helper_vfp_tosls(tcg_ctx, tcg_res, tcg_op, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_tosls(tcg_ctx, tcg_res, tcg_op, tcg_shift, + tcg_fpstatus); tcg_temp_free_i32(tcg_ctx, tcg_shift); break; } @@ -12769,8 +13349,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7b: /* FCVTZU */ { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0); - gen_helper_vfp_touls(tcg_ctx, tcg_res, tcg_op, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_touls(tcg_ctx, tcg_res, tcg_op, tcg_shift, + tcg_fpstatus); tcg_temp_free_i32(tcg_ctx, tcg_shift); break; } @@ -12783,18 +13363,21 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_rints(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); break; case 0x59: /* FRINTX */ - gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x7c: /* URSQRTE */ - gen_helper_rsqrte_u32(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_rsqrte_u32(tcg_ctx, tcg_res, tcg_op); break; case 0x1e: /* FRINT32Z */ case 0x5e: /* FRINT32X */ - gen_helper_frint32_s(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_frint32_s(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x1f: /* FRINT64Z */ case 0x5f: /* FRINT64X */ - gen_helper_frint64_s(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_frint64_s(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; default: g_assert_not_reached(); @@ -12815,44 +13398,14 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7: /* SQABS, SQNEG */ { NeonGenOneOpEnvFn *genfn; - static NeonGenOneOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, - { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + static NeonGenOneOpEnvFn *const fns[2][2] = { + {gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8}, + {gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16}, }; genfn = fns[size][u]; genfn(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op); break; } - case 0x8: /* CMGT, CMGE */ - case 0x9: /* CMEQ, CMLE */ - case 0xa: /* CMLT */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 }, - { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 }, - { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 }, - }; - NeonGenTwoOpFn *genfn; - int comp; - bool reverse; - TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0); - - /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */ - comp = (opcode - 0x8) * 2 + u; - /* ...but LE, LT are implemented as reverse GE, GT */ - reverse = (comp > 2); - if (reverse) { - comp = 4 - comp; - } - genfn = fns[comp][size]; - if (reverse) { - genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op); - } else { - genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero); - } - tcg_temp_free_i32(tcg_ctx, tcg_zero); - break; - } case 0x4: /* CLS, CLZ */ if (u) { if (size == 0) { @@ -12957,8 +13510,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) } handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); return; - } - break; + } break; case 0x2c: /* FCMGT (zero) */ case 0x2d: /* FCMEQ (zero) */ case 0x2e: /* FCMLT (zero) */ @@ -13051,7 +13603,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) g_assert_not_reached(); } - /* Check additional constraints for the scalar encoding */ if (is_scalar) { if (!is_q) { @@ -13088,7 +13639,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x1c: /* FCVTAS */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x3d: /* FRECPE */ gen_helper_recpe_f16(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); @@ -13101,7 +13653,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x5c: /* FCVTAU */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x6f: /* FNEG */ tcg_gen_xori_i32(tcg_ctx, tcg_res, tcg_op, 0x8000); @@ -13132,7 +13685,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x1c: /* FCVTAS */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x3d: /* FRECPE */ gen_helper_recpe_f16(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); @@ -13142,7 +13696,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x5c: /* FCVTAU */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x18: /* FRINTN */ case 0x19: /* FRINTM */ @@ -13150,10 +13705,12 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x39: /* FRINTZ */ case 0x58: /* FRINTA */ case 0x79: /* FRINTI */ - gen_helper_advsimd_rinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_rinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x59: /* FRINTX */ - gen_helper_advsimd_rinth_exact(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_rinth_exact(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x2f: /* FABS */ tcg_gen_andi_i32(tcg_ctx, tcg_res, tcg_op, 0x7fff); @@ -13390,40 +13947,38 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x1e: /* UDOT */ gen_gvec_op3_ool(s, is_q, rd, rn, rm, index, u ? gen_helper_gvec_udot_idx_b - : gen_helper_gvec_sdot_idx_b); + : gen_helper_gvec_sdot_idx_b); return; case 0x11: /* FCMLA #0 */ case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ case 0x17: /* FCMLA #270 */ - { - int rot = extract32(insn, 13, 2); - int data = (index << 2) | rot; - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), fpst, - is_q ? 16 : 8, vec_full_reg_size(s), data, - size == MO_64 - ? gen_helper_gvec_fcmlas_idx - : gen_helper_gvec_fcmlah_idx); - tcg_temp_free_ptr(tcg_ctx, fpst); - } + { + int rot = extract32(insn, 13, 2); + int data = (index << 2) | rot; + tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), fpst, is_q ? 16 : 8, + vec_full_reg_size(s), data, + size == MO_64 ? gen_helper_gvec_fcmlas_idx + : gen_helper_gvec_fcmlah_idx); + tcg_temp_free_ptr(tcg_ctx, fpst); + } return; case 0x00: /* FMLAL */ case 0x04: /* FMLSL */ case 0x18: /* FMLAL2 */ case 0x1c: /* FMLSL2 */ - { - int is_s = extract32(opcode, 2, 1); - int is_2 = u; - int data = (index << 2) | (is_2 << 1) | is_s; - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, - is_q ? 16 : 8, vec_full_reg_size(s), - data, gen_helper_gvec_fmlal_idx_a64); - } + { + int is_s = extract32(opcode, 2, 1); + int is_2 = u; + int data = (index << 2) | (is_2 << 1) | is_s; + tcg_gen_gvec_3_ptr( + tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, is_q ? 16 : 8, + vec_full_reg_size(s), data, gen_helper_gvec_fmlal_idx_a64); + } return; } @@ -13448,7 +14003,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) /* fall through */ case 0x01: /* FMLA */ read_vec_element(s, tcg_res, rd, pass, MO_64); - gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op, tcg_idx, tcg_res, fpst); + gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op, tcg_idx, + tcg_res, fpst); break; case 0x09: /* FMUL */ gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst); @@ -13502,9 +14058,9 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x10: /* MLA */ case 0x14: /* MLS */ { - static NeonGenTwoOpFn * const fns[2][2] = { - { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, - { tcg_gen_add_i32, tcg_gen_sub_i32 }, + static NeonGenTwoOpFn *const fns[2][2] = { + {gen_helper_neon_add_u16, gen_helper_neon_sub_u16}, + {tcg_gen_add_i32, tcg_gen_sub_i32}, }; NeonGenTwoOpFn *genfn; bool is_sub = opcode == 0x4; @@ -13534,11 +14090,11 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) tcg_gen_xori_i32(tcg_ctx, tcg_op, tcg_op, 0x80008000); } if (is_scalar) { - gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); + gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op, + tcg_idx, tcg_res, fpst); } else { - gen_helper_advsimd_muladd2h(tcg_ctx, tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); + gen_helper_advsimd_muladd2h(tcg_ctx, tcg_res, tcg_op, + tcg_idx, tcg_res, fpst); } break; case 2: @@ -13566,7 +14122,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; case 2: - gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst); + gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op, tcg_idx, + fpst); break; default: g_assert_not_reached(); @@ -13584,7 +14141,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; case 2: - gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst); + gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op, tcg_idx, + fpst); break; default: g_assert_not_reached(); @@ -13592,42 +14150,46 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) break; case 0x0c: /* SQDMULH */ if (size == 1) { - gen_helper_neon_qdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx); + gen_helper_neon_qdmulh_s16( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx); } else { - gen_helper_neon_qdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx); + gen_helper_neon_qdmulh_s32( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx); } break; case 0x0d: /* SQRDMULH */ if (size == 1) { - gen_helper_neon_qrdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx); + gen_helper_neon_qrdmulh_s16( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx); } else { - gen_helper_neon_qrdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx); + gen_helper_neon_qrdmulh_s32( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx); } break; case 0x1d: /* SQRDMLAH */ read_vec_element_i32(s, tcg_res, rd, pass, is_scalar ? size : MO_32); if (size == 1) { - gen_helper_neon_qrdmlah_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx, tcg_res); + gen_helper_neon_qrdmlah_s16(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op, + tcg_idx, tcg_res); } else { - gen_helper_neon_qrdmlah_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx, tcg_res); + gen_helper_neon_qrdmlah_s32(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op, + tcg_idx, tcg_res); } break; case 0x1f: /* SQRDMLSH */ read_vec_element_i32(s, tcg_res, rd, pass, is_scalar ? size : MO_32); if (size == 1) { - gen_helper_neon_qrdmlsh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx, tcg_res); + gen_helper_neon_qrdmlsh_s16(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op, + tcg_idx, tcg_res); } else { - gen_helper_neon_qrdmlsh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx, tcg_res); + gen_helper_neon_qrdmlsh_s32(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op, + tcg_idx, tcg_res); } break; default: @@ -13689,7 +14251,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) if (satop) { /* saturating, doubling */ - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, + tcg_ctx->cpu_env, tcg_passres, tcg_passres); } @@ -13702,18 +14265,20 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) switch (opcode) { case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres); + tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], + tcg_passres); break; case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres); + tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], + tcg_passres); break; case 0x7: /* SQDMLSL, SQDMLSL2 */ tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres); /* fall through */ case 0x3: /* SQDMLAL, SQDMLAL2 */ - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, - tcg_res[pass], - tcg_passres); + gen_helper_neon_addl_saturate_s64( + tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass], + tcg_passres); break; default: g_assert_not_reached(); @@ -13744,8 +14309,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) if (is_scalar) { read_vec_element_i32(s, tcg_op, rn, pass, size); } else { - read_vec_element_i32(s, tcg_op, rn, - pass + (is_q * 2), MO_32); + read_vec_element_i32(s, tcg_op, rn, pass + (is_q * 2), + MO_32); } tcg_res[pass] = tcg_temp_new_i64(tcg_ctx); @@ -13758,12 +14323,15 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } if (memop & MO_SIGN) { - gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op, tcg_idx); + gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op, + tcg_idx); } else { - gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op, tcg_idx); + gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op, + tcg_idx); } if (satop) { - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, + gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, + tcg_ctx->cpu_env, tcg_passres, tcg_passres); } tcg_temp_free_i32(tcg_ctx, tcg_op); @@ -13777,20 +14345,20 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) switch (opcode) { case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass], - tcg_passres); + gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass], + tcg_res[pass], tcg_passres); break; case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - gen_helper_neon_subl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass], - tcg_passres); + gen_helper_neon_subl_u32(tcg_ctx, tcg_res[pass], + tcg_res[pass], tcg_passres); break; case 0x7: /* SQDMLSL, SQDMLSL2 */ gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres); /* fall through */ case 0x3: /* SQDMLAL, SQDMLAL2 */ - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, - tcg_res[pass], - tcg_passres); + gen_helper_neon_addl_saturate_s32( + tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass], + tcg_passres); break; default: g_assert_not_reached(); @@ -13827,15 +14395,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) */ static void disas_crypto_aes(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = extract32(insn, 22, 2); int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); int decrypt; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - TCGv_i32 tcg_decrypt; - CryptoThreeOpIntFn *genfn; + gen_helper_gvec_2 *genfn2 = NULL; + gen_helper_gvec_3 *genfn3 = NULL; if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); @@ -13845,19 +14411,19 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) switch (opcode) { case 0x4: /* AESE */ decrypt = 0; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x6: /* AESMC */ decrypt = 0; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; case 0x5: /* AESD */ decrypt = 1; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x7: /* AESIMC */ decrypt = 1; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; default: unallocated_encoding(s); @@ -13868,15 +14434,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_decrypt = tcg_const_i32(tcg_ctx, decrypt); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_i32(tcg_ctx, tcg_decrypt); + if (genfn2) { + gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); + } else { + gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); + } } /* Crypto three-reg SHA @@ -13887,14 +14449,12 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = extract32(insn, 22, 2); int opcode = extract32(insn, 12, 3); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoThreeOpFn *genfn; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; + gen_helper_gvec_3 *genfn; bool feature; if (size != 0) { @@ -13904,10 +14464,19 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA1C */ + genfn = gen_helper_crypto_sha1c; + feature = dc_isar_feature(aa64_sha1, s); + break; case 1: /* SHA1P */ + genfn = gen_helper_crypto_sha1p; + feature = dc_isar_feature(aa64_sha1, s); + break; case 2: /* SHA1M */ + genfn = gen_helper_crypto_sha1m; + feature = dc_isar_feature(aa64_sha1, s); + break; case 3: /* SHA1SU0 */ - genfn = NULL; + genfn = gen_helper_crypto_sha1su0; feature = dc_isar_feature(aa64_sha1, s); break; case 4: /* SHA256H */ @@ -13936,23 +14505,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - if (genfn) { - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - } else { - TCGv_i32 tcg_opcode = tcg_const_i32(tcg_ctx, opcode); - - gen_helper_crypto_sha1_3reg(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, - tcg_rm_ptr, tcg_opcode); - tcg_temp_free_i32(tcg_ctx, tcg_opcode); - } - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr); + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); } /* Crypto two-reg SHA @@ -13963,14 +14516,12 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) */ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = extract32(insn, 22, 2); int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoTwoOpFn *genfn; + gen_helper_gvec_2 *genfn; bool feature; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { unallocated_encoding(s); @@ -14003,14 +14554,36 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } + gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); +} - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); +static void gen_rax1_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 n, + TCGv_i64 m) +{ + tcg_gen_rotli_i64(tcg_ctx, d, m, 1); + tcg_gen_xor_i64(tcg_ctx, d, d, n); +} - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr); +static void gen_rax1_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, + TCGv_vec n, TCGv_vec m) +{ + tcg_gen_rotli_vec(tcg_ctx, vece, d, m, 1); + tcg_gen_xor_vec(tcg_ctx, vece, d, d, n); +} - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); +void gen_gvec_rax1(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, + uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = {INDEX_op_rotli_vec, 0}; + static const GVecGen3 op = { + .fni8 = gen_rax1_i64, + .fniv = gen_rax1_vec, + .opt_opc = vecop_list, + .fno = gen_helper_crypto_rax1, + .vece = MO_64, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); } /* Crypto three-reg SHA512 @@ -14021,32 +14594,32 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int opcode = extract32(insn, 10, 2); - int o = extract32(insn, 14, 1); + int o = extract32(insn, 14, 1); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); bool feature; - CryptoThreeOpFn *genfn; + gen_helper_gvec_3 *oolfn = NULL; + GVecGen3Fn *gvecfn = NULL; if (o == 0) { switch (opcode) { case 0: /* SHA512H */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h; + oolfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h2; + oolfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su1; + oolfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ feature = dc_isar_feature(aa64_sha3, s); - genfn = NULL; + gvecfn = gen_gvec_rax1; break; default: g_assert_not_reached(); @@ -14055,15 +14628,15 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SM3PARTW1 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw1; + oolfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw2; + oolfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4ekey; + oolfn = gen_helper_crypto_sm4ekey; break; default: unallocated_encoding(s); @@ -14080,41 +14653,10 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (genfn) { - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr); + if (oolfn) { + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); } else { - TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; - int pass; - - tcg_op1 = tcg_temp_new_i64(tcg_ctx); - tcg_op2 = tcg_temp_new_i64(tcg_ctx); - tcg_res[0] = tcg_temp_new_i64(tcg_ctx); - tcg_res[1] = tcg_temp_new_i64(tcg_ctx); - - for (pass = 0; pass < 2; pass++) { - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - tcg_gen_rotli_i64(tcg_ctx, tcg_res[pass], tcg_op2, 1); - tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1); - } - write_vec_element(s, tcg_res[0], rd, 0, MO_64); - write_vec_element(s, tcg_res[1], rd, 1, MO_64); - - tcg_temp_free_i64(tcg_ctx, tcg_op1); - tcg_temp_free_i64(tcg_ctx, tcg_op2); - tcg_temp_free_i64(tcg_ctx, tcg_res[0]); - tcg_temp_free_i64(tcg_ctx, tcg_res[1]); + gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); } } @@ -14126,22 +14668,17 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) */ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int opcode = extract32(insn, 10, 2); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; bool feature; - CryptoTwoOpFn *genfn; switch (opcode) { case 0: /* SHA512SU0 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4e; break; default: unallocated_encoding(s); @@ -14157,13 +14694,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); + switch (opcode) { + case 0: /* SHA512SU0 */ + gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); + break; + case 1: /* SM4E */ + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); + break; + default: + g_assert_not_reached(); + } } /* Crypto four-register @@ -14321,14 +14861,17 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; + static gen_helper_gvec_3 *const fns[4] = { + gen_helper_crypto_sm3tt1a, + gen_helper_crypto_sm3tt1b, + gen_helper_crypto_sm3tt2a, + gen_helper_crypto_sm3tt2b, + }; int opcode = extract32(insn, 10, 2); int imm2 = extract32(insn, 12, 2); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - TCGv_i32 tcg_imm2, tcg_opcode; if (!dc_isar_feature(aa64_sm3, s)) { unallocated_encoding(s); @@ -14339,20 +14882,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - tcg_imm2 = tcg_const_i32(tcg_ctx, imm2); - tcg_opcode = tcg_const_i32(tcg_ctx, opcode); - - gen_helper_crypto_sm3tt(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2, - tcg_opcode); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr); - tcg_temp_free_i32(tcg_ctx, tcg_imm2); - tcg_temp_free_i32(tcg_ctx, tcg_opcode); + gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); } /* C3.6 Data processing - SIMD, inc Crypto @@ -14362,40 +14892,39 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) */ static const AArch64DecodeTable data_proc_simd[] = { /* pattern , mask , fn */ - { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, - { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, - { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, - { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, - { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, - { 0x0e000400, 0x9fe08400, disas_simd_copy }, - { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ + {0x0e200400, 0x9f200400, disas_simd_three_reg_same}, + {0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra}, + {0x0e200000, 0x9f200c00, disas_simd_three_reg_diff}, + {0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc}, + {0x0e300800, 0x9f3e0c00, disas_simd_across_lanes}, + {0x0e000400, 0x9fe08400, disas_simd_copy}, + {0x0f000000, 0x9f000400, disas_simd_indexed}, /* vector indexed */ /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ - { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, - { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, - { 0x0e000000, 0xbf208c00, disas_simd_tb }, - { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, - { 0x2e000000, 0xbf208400, disas_simd_ext }, - { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, - { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, - { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, - { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, - { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, - { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, - { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ - { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, - { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, - { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, - { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, - { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, - { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, - { 0xce000000, 0xff808000, disas_crypto_four_reg }, - { 0xce800000, 0xffe00000, disas_crypto_xar }, - { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, - { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, - { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, - { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, - { 0x00000000, 0x00000000, NULL } -}; + {0x0f000400, 0x9ff80400, disas_simd_mod_imm}, + {0x0f000400, 0x9f800400, disas_simd_shift_imm}, + {0x0e000000, 0xbf208c00, disas_simd_tb}, + {0x0e000800, 0xbf208c00, disas_simd_zip_trn}, + {0x2e000000, 0xbf208400, disas_simd_ext}, + {0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same}, + {0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra}, + {0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff}, + {0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc}, + {0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise}, + {0x5e000400, 0xdfe08400, disas_simd_scalar_copy}, + {0x5f000000, 0xdf000400, disas_simd_indexed}, /* scalar indexed */ + {0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm}, + {0x4e280800, 0xff3e0c00, disas_crypto_aes}, + {0x5e000000, 0xff208c00, disas_crypto_three_reg_sha}, + {0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha}, + {0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512}, + {0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512}, + {0xce000000, 0xff808000, disas_crypto_four_reg}, + {0xce800000, 0xffe00000, disas_crypto_xar}, + {0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2}, + {0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16}, + {0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16}, + {0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16}, + {0x00000000, 0x00000000, NULL}}; static void disas_data_proc_simd(DisasContext *s, uint32_t insn) { @@ -14447,7 +14976,7 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s) * table entry even for that case. */ return (tlb_hit(s->uc, entry->addr_code, addr) && - env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0); + arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs)); } /** @@ -14551,9 +15080,8 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) * everything else. This allows us to handle this now * instead of waiting until the insn is otherwise decoded. */ - if (s->btype != 0 - && s->guarded_page - && !btype_destination_ok(insn, s->bt, s->btype)) { + if (s->btype != 0 && s->guarded_page && + !btype_destination_ok(insn, s->bt, s->btype)) { gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_btitrap(s->btype), default_exception_el(s)); @@ -14566,7 +15094,9 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) } switch (extract32(insn, 25, 4)) { - case 0x0: case 0x1: case 0x3: /* UNALLOCATED */ + case 0x0: + case 0x1: + case 0x3: /* UNALLOCATED */ unallocated_encoding(s); break; case 0x2: @@ -14574,24 +15104,26 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) unallocated_encoding(s); } break; - case 0x8: case 0x9: /* Data processing - immediate */ + case 0x8: + case 0x9: /* Data processing - immediate */ disas_data_proc_imm(s, insn); break; - case 0xa: case 0xb: /* Branch, exception generation and system insns */ + case 0xa: + case 0xb: /* Branch, exception generation and system insns */ disas_b_exc_sys(s, insn); break; case 0x4: case 0x6: case 0xc: - case 0xe: /* Loads and stores */ + case 0xe: /* Loads and stores */ disas_ldst(s, insn); break; case 0x5: - case 0xd: /* Data processing - register */ + case 0xd: /* Data processing - register */ disas_data_proc_reg(s, insn); break; case 0x7: - case 0xf: /* Data processing - SIMD and floating point */ + case 0xf: /* Data processing - SIMD and floating point */ disas_data_proc_simd_fp(s, insn); break; default: @@ -14630,8 +15162,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, /* If we are coming from secure EL0 in a system with a 32-bit EL3, then * there is no secure EL1, so we route exceptions to EL3. */ - dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) && - !arm_el_is_aa64(env, 3); + dc->secure_routed_to_el3 = + arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3); dc->thumb = 0; dc->sctlr_b = 0; dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; @@ -14640,7 +15172,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX); dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII); - dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID); + dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA); dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); dc->user = (dc->current_el == 0); dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL); @@ -14650,10 +15182,14 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT); dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE); dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV); + dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA); + dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE); + dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; dc->features = env->features; + dc->dcz_blocksize = arm_cpu->dcz_blocksize; /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: @@ -14691,9 +15227,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, init_tmp_a64_array(dc); } -static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) -{ -} +static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) {} static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) { @@ -14794,12 +15328,15 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_goto_tb(dc, 1, dc->base.pc_next); break; default: - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next); /* fall through */ case DISAS_EXIT: tcg_gen_exit_tb(tcg_ctx, NULL, 0); break; + case DISAS_UPDATE_NOCHAIN: + gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next); + /* fall through */ case DISAS_JUMP: tcg_gen_lookup_and_goto_ptr(tcg_ctx); break; @@ -14814,8 +15351,7 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next); gen_helper_yield(tcg_ctx, tcg_ctx->cpu_env); break; - case DISAS_WFI: - { + case DISAS_WFI: { /* This is a special case because we don't want to just halt the CPU * if trying to debug across a WFI. */ @@ -14842,10 +15378,9 @@ static void aarch64_sync_pc(DisasContextBase *db, CPUState *cpu) const TranslatorOps aarch64_translator_ops = { .init_disas_context = aarch64_tr_init_disas_context, - .tb_start = aarch64_tr_tb_start, - .insn_start = aarch64_tr_insn_start, - .breakpoint_check = aarch64_tr_breakpoint_check, - .translate_insn = aarch64_tr_translate_insn, - .tb_stop = aarch64_tr_tb_stop, - .pc_sync = aarch64_sync_pc -}; + .tb_start = aarch64_tr_tb_start, + .insn_start = aarch64_tr_insn_start, + .breakpoint_check = aarch64_tr_breakpoint_check, + .translate_insn = aarch64_tr_translate_insn, + .tb_stop = aarch64_tr_tb_stop, + .pc_sync = aarch64_sync_pc}; diff --git a/qemu/target/arm/translate-a64.h b/qemu/target/arm/translate-a64.h index 6092d1b02c..23bb6d490d 100644 --- a/qemu/target/arm/translate-a64.h +++ b/qemu/target/arm/translate-a64.h @@ -31,6 +31,7 @@ typedef struct TCGContext TCGContext; } while (0) TCGv_i64 new_tmp_a64(DisasContext *s); +TCGv_i64 new_tmp_a64_local(DisasContext *s); TCGv_i64 new_tmp_a64_zero(DisasContext *s); TCGv_i64 cpu_reg(DisasContext *s, int reg); TCGv_i64 cpu_reg_sp(DisasContext *s, int reg); @@ -41,6 +42,11 @@ TCGv_ptr get_fpstatus_ptr(TCGContext *tcg_ctx, bool); bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); bool sve_access_check(DisasContext *s); +TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size); +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int count, int log2_esize); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that @@ -117,13 +123,7 @@ static inline int vec_full_reg_size(DisasContext *s) bool disas_sve(DisasContext *, uint32_t); -/* Note that the gvec expanders operate on offsets + sizes. */ -typedef void GVecGen2Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, uint32_t); -typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t, - uint32_t, uint32_t); -typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t); -typedef void GVecGen4Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t); +void gen_gvec_rax1(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c new file mode 100644 index 0000000000..d3e353a2a4 --- /dev/null +++ b/qemu/target/arm/translate-neon.inc.c @@ -0,0 +1,4276 @@ +/* + * ARM translation: AArch32 Neon instructions + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2005-2007 CodeSourcery + * Copyright (c) 2007 OpenedHand, Ltd. + * Copyright (c) 2020 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +/* + * This file is intended to be included from translate.c; it uses + * some macros and definitions provided by that file. + * It might be possible to convert it to a standalone .c file eventually. + */ + +static inline int plus1(DisasContext *s, int x) +{ + return x + 1; +} + +static inline int rsub_64(DisasContext *s, int x) +{ + return 64 - x; +} + +static inline int rsub_32(DisasContext *s, int x) +{ + return 32 - x; +} +static inline int rsub_16(DisasContext *s, int x) +{ + return 16 - x; +} +static inline int rsub_8(DisasContext *s, int x) +{ + return 8 - x; +} + +/* Include the generated Neon decoder */ +#include "decode-neon-dp.inc.c" +#include "decode-neon-ls.inc.c" +#include "decode-neon-shared.inc.c" + +/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, + * where 0 is the least significant end of the register. + */ +static inline long +neon_element_offset(int reg, int element, MemOp size) +{ + int element_size = 1 << size; + int ofs = element * element_size; +#ifdef HOST_WORDS_BIGENDIAN + /* Calculate the offset assuming fully little-endian, + * then XOR to account for the order of the 8-byte units. + */ + if (element_size < 8) { + ofs ^= 8 - element_size; + } +#endif + return neon_reg_offset(reg, 0) + ofs; +} + +static void neon_load_element(TCGContext *tcg_ctx, TCGv_i32 var, int reg, int ele, MemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_load_element64(TCGContext *tcg_ctx, TCGv_i64 var, int reg, int ele, MemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld32u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_Q: + tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_store_element(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i32 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_32: + tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_store_element64(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i64 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_32: + tcg_gen_st32_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_64: + tcg_gen_st_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + TCGv_ptr fpst; + gen_helper_gvec_3_ptr *fn_gvec_ptr; + + if (!dc_isar_feature(aa32_vcma, s) + || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(tcg_ctx, 1); + fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, a->rot, + fn_gvec_ptr); + tcg_temp_free_ptr(tcg_ctx, fpst); + return true; +} + +static bool trans_VCADD(DisasContext *s, arg_VCADD *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + TCGv_ptr fpst; + gen_helper_gvec_3_ptr *fn_gvec_ptr; + + if (!dc_isar_feature(aa32_vcma, s) + || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(tcg_ctx, 1); + fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, a->rot, + fn_gvec_ptr); + tcg_temp_free_ptr(tcg_ctx, fpst); + return true; +} + +static bool trans_VDOT(DisasContext *s, arg_VDOT *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + gen_helper_gvec_3 *fn_gvec; + + if (!dc_isar_feature(aa32_dp, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; + tcg_gen_gvec_3_ool(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + opr_sz, opr_sz, 0, fn_gvec); + return true; +} + +static bool trans_VFML(DisasContext *s, arg_VFML *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + + if (!dc_isar_feature(aa32_fhm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + (a->vd & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(a->q, a->vn), + vfp_reg_offset(a->q, a->vm), + tcg_ctx->cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ + gen_helper_gvec_fmlal_a32); + return true; +} + +static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + gen_helper_gvec_3_ptr *fn_gvec_ptr; + int opr_sz; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_vcma, s)) { + return false; + } + if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vn) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx + : gen_helper_gvec_fcmlah_idx); + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, + (a->index << 2) | a->rot, fn_gvec_ptr); + tcg_temp_free_ptr(tcg_ctx, fpst); + return true; +} + +static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + gen_helper_gvec_3 *fn_gvec; + int opr_sz; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_dp, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn) & 0x10)) { + return false; + } + + if ((a->vd | a->vn) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_gen_gvec_3_ool(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->rm), + opr_sz, opr_sz, a->index, fn_gvec); + tcg_temp_free_ptr(tcg_ctx, fpst); + return true; +} + +static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + + if (!dc_isar_feature(aa32_fhm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(a->q, a->vn), + vfp_reg_offset(a->q, a->rm), + tcg_ctx->cpu_env, opr_sz, opr_sz, + (a->index << 2) | a->s, /* is_2 == 0 */ + gen_helper_gvec_fmlal_idx_a32); + return true; +} + +static struct { + int nregs; + int interleave; + int spacing; +} const neon_ls_element_type[11] = { + {1, 4, 1}, + {1, 4, 2}, + {4, 1, 1}, + {2, 2, 2}, + {1, 3, 1}, + {1, 3, 2}, + {3, 1, 1}, + {1, 1, 1}, + {1, 2, 1}, + {1, 2, 2}, + {2, 1, 1} +}; + +static void gen_neon_ldst_base_update(TCGContext *tcg_ctx, DisasContext *s, int rm, int rn, + int stride) +{ + if (rm != 15) { + TCGv_i32 base; + + base = load_reg(s, rn); + if (rm == 13) { + tcg_gen_addi_i32(tcg_ctx, base, base, stride); + } else { + TCGv_i32 index; + index = load_reg(s, rm); + tcg_gen_add_i32(tcg_ctx, base, base, index); + tcg_temp_free_i32(tcg_ctx, index); + } + store_reg(s, rn, base); + } +} + +static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Neon load/store multiple structures */ + int nregs, interleave, spacing, reg, n; + MemOp endian = s->be_data; + int mmu_idx = get_mem_index(s); + int size = a->size; + TCGv_i64 tmp64; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + if (a->itype > 10) { + return false; + } + /* Catch UNDEF cases for bad values of align field */ + switch (a->itype & 0xc) { + case 4: + if (a->align >= 2) { + return false; + } + break; + case 8: + if (a->align == 3) { + return false; + } + break; + default: + break; + } + nregs = neon_ls_element_type[a->itype].nregs; + interleave = neon_ls_element_type[a->itype].interleave; + spacing = neon_ls_element_type[a->itype].spacing; + if (size == 3 && (interleave | spacing) != 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + /* + * Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (interleave == 1 && endian == MO_LE) { + size = 3; + } + tmp64 = tcg_temp_new_i64(tcg_ctx); + addr = tcg_temp_new_i32(tcg_ctx); + tmp = tcg_const_i32(tcg_ctx, 1 << size); + load_reg_var(s, addr, a->rn); + for (reg = 0; reg < nregs; reg++) { + for (n = 0; n < 8 >> size; n++) { + int xs; + for (xs = 0; xs < interleave; xs++) { + int tt = a->vd + reg + spacing * xs; + + if (a->l) { + gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + neon_store_element64(tcg_ctx, tt, n, size, tmp64); + } else { + neon_load_element64(tcg_ctx, tmp64, tt, n, size); + gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); + } + tcg_gen_add_i32(tcg_ctx, addr, addr, tmp); + } + } + } + tcg_temp_free_i32(tcg_ctx, addr); + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i64(tcg_ctx, tmp64); + + gen_neon_ldst_base_update( + tcg_ctx, s, a->rm, a->rn, nregs * interleave * 8); + return true; +} + +static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Neon load single structure to all lanes */ + int reg, stride, vec_size; + int vd = a->vd; + int size = a->size; + int nregs = a->n + 1; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + if (size == 3) { + if (nregs != 4 || a->a == 0) { + return false; + } + /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ + size = 2; + } + if (nregs == 1 && a->a == 1 && size == 0) { + return false; + } + if (nregs == 3 && a->a == 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * VLD1 to all lanes: T bit indicates how many Dregs to write. + * VLD2/3/4 to all lanes: T bit indicates register stride. + */ + stride = a->t ? 2 : 1; + vec_size = nregs == 1 ? stride * 8 : 8; + + tmp = tcg_temp_new_i32(tcg_ctx); + addr = tcg_temp_new_i32(tcg_ctx); + load_reg_var(s, addr, a->rn); + for (reg = 0; reg < nregs; reg++) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + if ((vd & 1) && vec_size == 16) { + /* + * We cannot write 16 bytes at once because the + * destination is unaligned. + */ + tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0), + 8, 8, tmp); + tcg_gen_gvec_mov(tcg_ctx, 0, neon_reg_offset(vd + 1, 0), + neon_reg_offset(vd, 0), 8, 8); + } else { + tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0), + vec_size, vec_size, tmp); + } + tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size); + vd += stride; + } + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, addr); + + gen_neon_ldst_base_update(tcg_ctx, s, a->rm, a->rn, (1 << size) * nregs); + + return true; +} + +static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Neon load/store single structure to one lane */ + int reg; + int nregs = a->n + 1; + int vd = a->vd; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + /* Catch the UNDEF cases. This is unavoidably a bit messy. */ + switch (nregs) { + case 1: + if (((a->align & (1 << a->size)) != 0) || + (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) { + return false; + } + break; + case 3: + if ((a->align & 1) != 0) { + return false; + } + /* fall through */ + case 2: + if (a->size == 2 && (a->align & 2) != 0) { + return false; + } + break; + case 4: + if ((a->size == 2) && ((a->align & 3) == 3)) { + return false; + } + break; + default: + abort(); + } + if ((vd + a->stride * (nregs - 1)) > 31) { + /* + * Attempts to write off the end of the register file are + * UNPREDICTABLE; we choose to UNDEF because otherwise we would + * access off the end of the array that holds the register data. + */ + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(tcg_ctx); + addr = tcg_temp_new_i32(tcg_ctx); + load_reg_var(s, addr, a->rn); + /* + * TODO: if we implemented alignment exceptions, we should check + * addr against the alignment encoded in a->align here. + */ + for (reg = 0; reg < nregs; reg++) { + if (a->l) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | a->size); + neon_store_element(tcg_ctx, vd, a->reg_idx, a->size, tmp); + } else { /* Store */ + neon_load_element(tcg_ctx, tmp, vd, a->reg_idx, a->size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), + s->be_data | a->size); + } + vd += a->stride; + tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << a->size); + } + tcg_temp_free_i32(tcg_ctx, addr); + tcg_temp_free_i32(tcg_ctx, tmp); + + gen_neon_ldst_base_update(tcg_ctx, s, a->rm, a->rn, (1 << a->size) * nregs); + + return true; +} + +static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rn_ofs = neon_reg_offset(a->vn, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(tcg_ctx, a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return true; +} + +#define DO_3SAME(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + return do_3same(s, a, FUNC); \ + } + +DO_3SAME(VADD, tcg_gen_gvec_add) +DO_3SAME(VSUB, tcg_gen_gvec_sub) +DO_3SAME(VAND, tcg_gen_gvec_and) +DO_3SAME(VBIC, tcg_gen_gvec_andc) +DO_3SAME(VORR, tcg_gen_gvec_or) +DO_3SAME(VORN, tcg_gen_gvec_orc) +DO_3SAME(VEOR, tcg_gen_gvec_xor) +DO_3SAME(VSHL_S, gen_gvec_sshl) +DO_3SAME(VSHL_U, gen_gvec_ushl) +DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) +DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) +DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) +DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) + +/* These insns are all gvec_bitsel but with the inputs in various orders. */ +#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, unsigned vece, \ + uint32_t rd_ofs, \ + uint32_t rn_ofs, \ + uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_bitsel(tcg_ctx, vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ + } \ + DO_3SAME(INSN, gen_##INSN##_3s) + +DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) +DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) +DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) + +#define DO_3SAME_NO_SZ_3(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size == 3) { \ + return false; \ + } \ + return do_3same(s, a, FUNC); \ + } + +DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) +DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) +DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) +DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) +DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) +DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla) +DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls) +DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst) +DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) +DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) +DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) +DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) + +#define DO_3SAME_CMP(INSN, COND) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_cmp(tcg_ctx, COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ + } \ + DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) + +DO_3SAME_CMP(VCGT_S, TCG_COND_GT) +DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) +DO_3SAME_CMP(VCGE_S, TCG_COND_GE) +DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) +DO_3SAME_CMP(VCEQ, TCG_COND_EQ) + +#define WRAP_OOL_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ + uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ + } + +WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) + +static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) +{ + if (a->size != 0) { + return false; + } + return do_3same(s, a, gen_VMUL_p_3s); +} + +#define DO_VQRDMLAH(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_rdm, s)) { \ + return false; \ + } \ + if (a->size != 1 && a->size != 2) { \ + return false; \ + } \ + return do_3same(s, a, FUNC); \ + } + +DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) +DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) + +#define DO_SHA1(NAME, FUNC) \ + WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ + static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_sha1, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##NAME##_3s); \ + } + +DO_SHA1(SHA1C, gen_helper_crypto_sha1c) +DO_SHA1(SHA1P, gen_helper_crypto_sha1p) +DO_SHA1(SHA1M, gen_helper_crypto_sha1m) +DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) + +#define DO_SHA2(NAME, FUNC) \ + WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ + static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_sha2, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##NAME##_3s); \ + } + +DO_SHA2(SHA256H, gen_helper_crypto_sha256h) +DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) +DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) + +#define DO_3SAME_64(INSN, FUNC) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 op = { .fni8 = FUNC }; \ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ + } \ + DO_3SAME(INSN, gen_##INSN##_3s) + +#define DO_3SAME_64_ENV(INSN, FUNC) \ + static void gen_##INSN##_elt(TCGContext *tcg_ctx, \ + TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ + { \ + FUNC(tcg_ctx, d, tcg_ctx->cpu_env, n, m); \ + } \ + DO_3SAME_64(INSN, gen_##INSN##_elt) + +DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) +DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) +DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) +DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) +DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) +DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) + +#define DO_3SAME_32(INSN, FUNC) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 ops[4] = { \ + { .fni4 = gen_helper_neon_##FUNC##8 }, \ + { .fni4 = gen_helper_neon_##FUNC##16 }, \ + { .fni4 = gen_helper_neon_##FUNC##32 }, \ + { 0 }, \ + }; \ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ + } \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size > 2) { \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + +/* + * Some helper functions need to be passed the tcg_ctx->cpu_env. In order + * to use those with the gvec APIs like tcg_gen_gvec_3(tcg_ctx, ) we need + * to create wrapper functions whose prototype is a NeonGenTwoopfn(tcg_ctx, ) + * and which call a NeonGenTwoOpEnvFn(). + */ +#define WRAP_ENV_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, \ + TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ + { \ + FUNC(tcg_ctx, d, tcg_ctx->cpu_env, n, m); \ + } + +#define DO_3SAME_32_ENV(INSN, FUNC) \ + WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ + WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ + WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 ops[4] = { \ + { .fni4 = gen_##INSN##_tramp8 }, \ + { .fni4 = gen_##INSN##_tramp16 }, \ + { .fni4 = gen_##INSN##_tramp32 }, \ + { 0 }, \ + }; \ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ + } \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size > 2) { \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + +DO_3SAME_32(VHADD_S, hadd_s) +DO_3SAME_32(VHADD_U, hadd_u) +DO_3SAME_32(VHSUB_S, hsub_s) +DO_3SAME_32(VHSUB_U, hsub_u) +DO_3SAME_32(VRHADD_S, rhadd_s) +DO_3SAME_32(VRHADD_U, rhadd_u) +DO_3SAME_32(VRSHL_S, rshl_s) +DO_3SAME_32(VRSHL_U, rshl_u) + +DO_3SAME_32_ENV(VQSHL_S, qshl_s) +DO_3SAME_32_ENV(VQSHL_U, qshl_u) +DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) +DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) + +static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + /* Operations handled pairwise 32 bits at a time */ + TCGv_i32 tmp, tmp2, tmp3; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (a->size == 3) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + assert(a->q == 0); /* enforced by decode patterns */ + + /* + * Note that we have to be careful not to clobber the source operands + * in the "vm == vd" case by storing the result of the first pass too + * early. Since Q is 0 there are always just two passes, so instead + * of a complicated loop over each pass we just unroll. + */ + tmp = neon_load_reg(tcg_ctx, a->vn, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vn, 1); + fn(tcg_ctx, tmp, tmp, tmp2); + tcg_temp_free_i32(tcg_ctx, tmp2); + + tmp3 = neon_load_reg(tcg_ctx, a->vm, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vm, 1); + fn(tcg_ctx, tmp3, tmp3, tmp2); + tcg_temp_free_i32(tcg_ctx, tmp2); + + neon_store_reg(tcg_ctx, a->vd, 0, tmp); + neon_store_reg(tcg_ctx, a->vd, 1, tmp3); + return true; +} + +#define DO_3SAME_PAIR(INSN, func) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + static NeonGenTwoOpFn * const fns[] = { \ + gen_helper_neon_##func##8, \ + gen_helper_neon_##func##16, \ + gen_helper_neon_##func##32, \ + }; \ + if (a->size > 2) { \ + return false; \ + } \ + return do_3same_pair(s, a, fns[a->size]); \ + } + +/* 32-bit pairwise ops end up the same as the elementwise versions. */ +#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 +#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 +#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 +#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 +#define gen_helper_neon_padd_u32 tcg_gen_add_i32 + +DO_3SAME_PAIR(VPMAX_S, pmax_s) +DO_3SAME_PAIR(VPMIN_S, pmin_s) +DO_3SAME_PAIR(VPMAX_U, pmax_u) +DO_3SAME_PAIR(VPMIN_U, pmin_u) +DO_3SAME_PAIR(VPADD, padd_u) + +#define DO_3SAME_VQDMULH(INSN, FUNC) \ + WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ + WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 ops[2] = { \ + { .fni4 = gen_##INSN##_tramp16 }, \ + { .fni4 = gen_##INSN##_tramp32 }, \ + }; \ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ + } \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size != 1 && a->size != 2) { \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + +DO_3SAME_VQDMULH(VQDMULH, qdmulh) +DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) + +static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn, + bool reads_vd) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * FP operations handled elementwise 32 bits at a time. + * If reads_vd is true then the old value of Vd will be + * loaded before calling the callback function. This is + * used for multiply-accumulate type operations. + */ + TCGv_i32 tmp, tmp2; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + tmp = neon_load_reg(tcg_ctx, a->vn, pass); + tmp2 = neon_load_reg(tcg_ctx, a->vm, pass); + if (reads_vd) { + TCGv_i32 tmp_rd = neon_load_reg(tcg_ctx, a->vd, pass); + fn(tcg_ctx, tmp_rd, tmp, tmp2, fpstatus); + neon_store_reg(tcg_ctx, a->vd, pass, tmp_rd); + tcg_temp_free_i32(tcg_ctx, tmp); + } else { + fn(tcg_ctx, tmp, tmp, tmp2, fpstatus); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_i32(tcg_ctx, tmp2); + } + tcg_temp_free_ptr(tcg_ctx, fpstatus); + return true; +} + +/* + * For all the functions using this macro, size == 1 means fp16, + * which is an architecture extension we don't implement yet. + */ +#define DO_3S_FP_GVEC(INSN,FUNC) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, 1); \ + tcg_gen_gvec_3_ptr(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, fpst, \ + oprsz, maxsz, 0, FUNC); \ + tcg_temp_free_ptr(tcg_ctx, fpst); \ + } \ + static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size != 0) { \ + /* TODO fp16 support */ \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + + +DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s) +DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s) +DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s) +DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s) + +/* + * For all the functions using this macro, size == 1 means fp16, + * which is an architecture extension we don't implement yet. + */ +#define DO_3S_FP(INSN,FUNC,READS_VD) \ + static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size != 0) { \ + /* TODO fp16 support */ \ + return false; \ + } \ + return do_3same_fp(s, a, FUNC, READS_VD); \ + } + +DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false) +DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false) +DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false) +DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false) +DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false) +DO_3S_FP(VMAX, gen_helper_vfp_maxs, false) +DO_3S_FP(VMIN, gen_helper_vfp_mins, false) + +static void gen_VMLA_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, + TCGv_i32 vm, TCGv_ptr fpstatus) +{ + gen_helper_vfp_muls(tcg_ctx, vn, vn, vm, fpstatus); + gen_helper_vfp_adds(tcg_ctx, vd, vd, vn, fpstatus); +} + +static void gen_VMLS_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, + TCGv_i32 vm, TCGv_ptr fpstatus) +{ + gen_helper_vfp_muls(tcg_ctx, vn, vn, vm, fpstatus); + gen_helper_vfp_subs(tcg_ctx, vd, vd, vn, fpstatus); +} + +DO_3S_FP(VMLA, gen_VMLA_fp_3s, true) +DO_3S_FP(VMLS, gen_VMLS_fp_3s, true) + +static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) +{ + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same_fp(s, a, gen_helper_vfp_maxnums, false); +} + +static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) +{ + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same_fp(s, a, gen_helper_vfp_minnums, false); +} + +WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32) + +static void gen_VRECPS_fp_3s(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops); +} + +static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a) +{ + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same(s, a, gen_VRECPS_fp_3s); +} + +WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32) + +static void gen_VRSQRTS_fp_3s(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops); +} + +static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a) +{ + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same(s, a, gen_VRSQRTS_fp_3s); +} + +static void gen_VFMA_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, + TCGv_ptr fpstatus) +{ + gen_helper_vfp_muladds(tcg_ctx, vd, vn, vm, vd, fpstatus); +} + +static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a) +{ + if (!dc_isar_feature(aa32_simdfmac, s)) { + return false; + } + + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same_fp(s, a, gen_VFMA_fp_3s, true); +} + +static void gen_VFMS_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, + TCGv_ptr fpstatus) +{ + gen_helper_vfp_negs(tcg_ctx, vn, vn); + gen_helper_vfp_muladds(tcg_ctx, vd, vn, vm, vd, fpstatus); +} + +static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a) +{ + if (!dc_isar_feature(aa32_simdfmac, s)) { + return false; + } + + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same_fp(s, a, gen_VFMS_fp_3s, true); +} + +static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* FP operations handled pairwise 32 bits at a time */ + TCGv_i32 tmp, tmp2, tmp3; + TCGv_ptr fpstatus; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + assert(a->q == 0); /* enforced by decode patterns */ + + /* + * Note that we have to be careful not to clobber the source operands + * in the "vm == vd" case by storing the result of the first pass too + * early. Since Q is 0 there are always just two passes, so instead + * of a complicated loop over each pass we just unroll. + */ + fpstatus = get_fpstatus_ptr(tcg_ctx, 1); + tmp = neon_load_reg(tcg_ctx, a->vn, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vn, 1); + fn(tcg_ctx, tmp, tmp, tmp2, fpstatus); + tcg_temp_free_i32(tcg_ctx, tmp2); + + tmp3 = neon_load_reg(tcg_ctx, a->vm, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vm, 1); + fn(tcg_ctx, tmp3, tmp3, tmp2, fpstatus); + tcg_temp_free_i32(tcg_ctx, tmp2); + tcg_temp_free_ptr(tcg_ctx, fpstatus); + + neon_store_reg(tcg_ctx, a->vd, 0, tmp); + neon_store_reg(tcg_ctx, a->vd, 1, tmp3); + return true; +} + +/* + * For all the functions using this macro, size == 1 means fp16, + * which is an architecture extension we don't implement yet. + */ +#define DO_3S_FP_PAIR(INSN,FUNC) \ + static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size != 0) { \ + /* TODO fp16 support */ \ + return false; \ + } \ + return do_3same_fp_pair(s, a, FUNC); \ + } + +DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds) +DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs) +DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins) + +static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Handle a 2-reg-shift insn which can be vectorized. */ + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(tcg_ctx, a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); + return true; +} + +#define DO_2SH(INSN, FUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_vector_2sh(s, a, FUNC); \ + } \ + +DO_2SH(VSHL, tcg_gen_gvec_shli) +DO_2SH(VSLI, gen_gvec_sli) +DO_2SH(VSRI, gen_gvec_sri) +DO_2SH(VSRA_S, gen_gvec_ssra) +DO_2SH(VSRA_U, gen_gvec_usra) +DO_2SH(VRSHR_S, gen_gvec_srshr) +DO_2SH(VRSHR_U, gen_gvec_urshr) +DO_2SH(VRSRA_S, gen_gvec_srsra) +DO_2SH(VRSRA_U, gen_gvec_ursra) + +static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Signed shift out of range results in all-sign-bits */ + a->shift = MIN(a->shift, (8 << a->size) - 1); + return do_vector_2sh(s, a, tcg_gen_gvec_sari); +} + +static void gen_zero_rd_2sh(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(tcg_ctx, vece, rd_ofs, oprsz, maxsz, 0); +} + +static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Shift out of range is architecturally valid and results in zero. */ + if (a->shift >= (8 << a->size)) { + return do_vector_2sh(s, a, gen_zero_rd_2sh); + } else { + return do_vector_2sh(s, a, tcg_gen_gvec_shri); + } +} + +static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, + NeonGenTwo64OpEnvFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * 2-reg-and-shift operations, size == 3 case, where the + * function needs to be passed tcg_ctx->cpu_env. + */ + TCGv_i64 constimm; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + constimm = tcg_const_i64(tcg_ctx, dup_const(a->size, a->shift)); + + for (pass = 0; pass < a->q + 1; pass++) { + TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx); + + neon_load_reg64(tcg_ctx, tmp, a->vm + pass); + fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, constimm); + neon_store_reg64(tcg_ctx, tmp, a->vd + pass); + tcg_temp_free_i64(tcg_ctx, tmp); + } + tcg_temp_free_i64(tcg_ctx, constimm); + return true; +} + +static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoOpEnvFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * 2-reg-and-shift operations, size < 3 case, where the + * helper needs to be passed tcg_ctx->cpu_env. + */ + TCGv_i32 constimm; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + constimm = tcg_const_i32(tcg_ctx, dup_const(a->size, a->shift)); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, constimm); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_i32(tcg_ctx, constimm); + return true; +} + +#define DO_2SHIFT_ENV(INSN, FUNC) \ + static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ + } \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + static NeonGenTwoOpEnvFn * const fns[] = { \ + gen_helper_neon_##FUNC##8, \ + gen_helper_neon_##FUNC##16, \ + gen_helper_neon_##FUNC##32, \ + }; \ + assert(a->size < ARRAY_SIZE(fns)); \ + return do_2shift_env_32(s, a, fns[a->size]); \ + } + +DO_2SHIFT_ENV(VQSHLU, qshlu_s) +DO_2SHIFT_ENV(VQSHL_U, qshl_u) +DO_2SHIFT_ENV(VQSHL_S, qshl_s) + +static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, + NeonGenTwo64OpFn *shiftfn, + NeonGenNarrowEnvFn *narrowfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ + TCGv_i64 constimm, rm1, rm2; + TCGv_i32 rd; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is always a right shift, and the shiftfn is always a + * left-shift helper, which thus needs the negated shift count. + */ + constimm = tcg_const_i64(tcg_ctx, -a->shift); + rm1 = tcg_temp_new_i64(tcg_ctx); + rm2 = tcg_temp_new_i64(tcg_ctx); + + /* Load both inputs first to avoid potential overwrite if rm == rd */ + neon_load_reg64(tcg_ctx, rm1, a->vm); + neon_load_reg64(tcg_ctx, rm2, a->vm + 1); + + shiftfn(tcg_ctx, rm1, rm1, constimm); + rd = tcg_temp_new_i32(tcg_ctx); + narrowfn(tcg_ctx, rd, tcg_ctx->cpu_env, rm1); + neon_store_reg(tcg_ctx, a->vd, 0, rd); + + shiftfn(tcg_ctx, rm2, rm2, constimm); + rd = tcg_temp_new_i32(tcg_ctx); + narrowfn(tcg_ctx, rd, tcg_ctx->cpu_env, rm2); + neon_store_reg(tcg_ctx, a->vd, 1, rd); + + tcg_temp_free_i64(tcg_ctx, rm1); + tcg_temp_free_i64(tcg_ctx, rm2); + tcg_temp_free_i64(tcg_ctx, constimm); + + return true; +} + +static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoOpFn *shiftfn, + NeonGenNarrowEnvFn *narrowfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ + TCGv_i32 constimm, rm1, rm2, rm3, rm4; + TCGv_i64 rtmp; + uint32_t imm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is always a right shift, and the shiftfn is always a + * left-shift helper, which thus needs the negated shift count + * duplicated into each lane of the immediate value. + */ + if (a->size == 1) { + imm = (uint16_t)(-a->shift); + imm |= imm << 16; + } else { + /* size == 2 */ + imm = -a->shift; + } + constimm = tcg_const_i32(tcg_ctx, imm); + + /* Load all inputs first to avoid potential overwrite */ + rm1 = neon_load_reg(tcg_ctx, a->vm, 0); + rm2 = neon_load_reg(tcg_ctx, a->vm, 1); + rm3 = neon_load_reg(tcg_ctx, a->vm + 1, 0); + rm4 = neon_load_reg(tcg_ctx, a->vm + 1, 1); + rtmp = tcg_temp_new_i64(tcg_ctx); + + shiftfn(tcg_ctx, rm1, rm1, constimm); + shiftfn(tcg_ctx, rm2, rm2, constimm); + + tcg_gen_concat_i32_i64(tcg_ctx, rtmp, rm1, rm2); + tcg_temp_free_i32(tcg_ctx, rm2); + + narrowfn(tcg_ctx, rm1, tcg_ctx->cpu_env, rtmp); + neon_store_reg(tcg_ctx, a->vd, 0, rm1); + + shiftfn(tcg_ctx, rm3, rm3, constimm); + shiftfn(tcg_ctx, rm4, rm4, constimm); + tcg_temp_free_i32(tcg_ctx, constimm); + + tcg_gen_concat_i32_i64(tcg_ctx, rtmp, rm3, rm4); + tcg_temp_free_i32(tcg_ctx, rm4); + + narrowfn(tcg_ctx, rm3, tcg_ctx->cpu_env, rtmp); + tcg_temp_free_i64(tcg_ctx, rtmp); + neon_store_reg(tcg_ctx, a->vd, 1, rm3); + return true; +} + +#define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ + } +#define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ + } + +static void gen_neon_narrow_u32(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + tcg_gen_extrl_i64_i32(tcg_ctx, dest, src); +} + +static void gen_neon_narrow_u16(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + gen_helper_neon_narrow_u16(tcg_ctx, dest, src); +} + +static void gen_neon_narrow_u8(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + gen_helper_neon_narrow_u8(tcg_ctx, dest, src); +} + +DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) +DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) +DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) + +DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) +DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) +DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) + +DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) +DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) +DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) + +DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) +DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) +DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) +DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) +DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) +DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) + +DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) +DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) +DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) + +DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) +DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) +DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) + +DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) +DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) +DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) + +static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, + NeonGenWidenFn *widenfn, bool u) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i64 tmp; + TCGv_i32 rm0, rm1; + uint64_t widen_mask = 0; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is a widen-and-shift operation. The shift is always less + * than the width of the source type, so after widening the input + * vector we can simply shift the whole 64-bit widened register, + * and then clear the potential overflow bits resulting from left + * bits of the narrow input appearing as right bits of the left + * neighbour narrow input. Calculate a mask of bits to clear. + */ + if ((a->shift != 0) && (a->size < 2 || u)) { + int esize = 8 << a->size; + widen_mask = MAKE_64BIT_MASK(0, esize); + widen_mask >>= esize - a->shift; + widen_mask = dup_const(a->size + 1, widen_mask); + } + + rm0 = neon_load_reg(tcg_ctx, a->vm, 0); + rm1 = neon_load_reg(tcg_ctx, a->vm, 1); + tmp = tcg_temp_new_i64(tcg_ctx); + + widenfn(tcg_ctx, tmp, rm0); + tcg_temp_free_i32(tcg_ctx, rm0); + if (a->shift != 0) { + tcg_gen_shli_i64(tcg_ctx, tmp, tmp, a->shift); + tcg_gen_andi_i64(tcg_ctx, tmp, tmp, ~widen_mask); + } + neon_store_reg64(tcg_ctx, tmp, a->vd); + + widenfn(tcg_ctx, tmp, rm1); + tcg_temp_free_i32(tcg_ctx, rm1); + if (a->shift != 0) { + tcg_gen_shli_i64(tcg_ctx, tmp, tmp, a->shift); + tcg_gen_andi_i64(tcg_ctx, tmp, tmp, ~widen_mask); + } + neon_store_reg64(tcg_ctx, tmp, a->vd + 1); + tcg_temp_free_i64(tcg_ctx, tmp); + return true; +} + +static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_s8, + gen_helper_neon_widen_s16, + tcg_gen_ext_i32_i64, + }; + return do_vshll_2sh(s, a, widenfn[a->size], false); +} + +static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + }; + return do_vshll_2sh(s, a, widenfn[a->size], true); +} + +static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoSingleOpFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* FP operations in 2-reg-and-shift group */ + TCGv_i32 tmp, shiftv; + TCGv_ptr fpstatus; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpstatus = get_fpstatus_ptr(tcg_ctx, 1); + shiftv = tcg_const_i32(tcg_ctx, a->shift); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + tmp = neon_load_reg(tcg_ctx, a->vm, pass); + fn(tcg_ctx, tmp, tmp, shiftv, fpstatus); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_ptr(tcg_ctx, fpstatus); + tcg_temp_free_i32(tcg_ctx, shiftv); + return true; +} + +#define DO_FP_2SH(INSN, FUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_fp_2sh(s, a, FUNC); \ + } + +DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos) +DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos) +DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero) +DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero) + +static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) +{ + /* + * Expand the encoded constant. + * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. + * We choose to not special-case this and will behave as if a + * valid constant encoding of 0 had been given. + * cmode = 15 op = 1 must UNDEF; we assume decode has handled that. + */ + switch (cmode) { + case 0: case 1: + /* no-op */ + break; + case 2: case 3: + imm <<= 8; + break; + case 4: case 5: + imm <<= 16; + break; + case 6: case 7: + imm <<= 24; + break; + case 8: case 9: + imm |= imm << 16; + break; + case 10: case 11: + imm = (imm << 8) | (imm << 24); + break; + case 12: + imm = (imm << 8) | 0xff; + break; + case 13: + imm = (imm << 16) | 0xffff; + break; + case 14: + if (op) { + /* + * This is the only case where the top and bottom 32 bits + * of the encoded constant differ. + */ + uint64_t imm64 = 0; + int n; + + for (n = 0; n < 8; n++) { + if (imm & (1 << n)) { + imm64 |= (0xffULL << (n * 8)); + } + } + return imm64; + } + imm |= (imm << 8) | (imm << 16) | (imm << 24); + break; + case 15: + imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) + | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); + break; + } + if (op) { + imm = ~imm; + } + return dup_const(MO_32, imm); +} + +static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, + GVecGen2iFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + uint64_t imm; + int reg_ofs, vec_size; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + reg_ofs = neon_reg_offset(a->vd, 0); + vec_size = a->q ? 16 : 8; + imm = asimd_imm_const(a->imm, a->cmode, a->op); + + fn(tcg_ctx, MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); + return true; +} + +static void gen_VMOV_1r(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, c); +} + +static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) +{ + /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ + GVecGen2iFn *fn; + + if ((a->cmode & 1) && a->cmode < 12) { + /* for op=1, the imm will be inverted, so BIC becomes AND. */ + fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; + } else { + /* There is one unallocated cmode/op combination in this space */ + if (a->cmode == 15 && a->op == 1) { + return false; + } + fn = gen_VMOV_1r; + } + return do_1reg_imm(s, a, fn); +} + +static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, + NeonGenWidenFn *widenfn, + NeonGenTwo64OpFn *opfn, + bool src1_wide) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ + TCGv_i64 rn0_64, rn1_64, rm_64; + TCGv_i32 rm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!widenfn || !opfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if ((a->vd & 1) || (src1_wide && (a->vn & 1))) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rn0_64 = tcg_temp_new_i64(tcg_ctx); + rn1_64 = tcg_temp_new_i64(tcg_ctx); + rm_64 = tcg_temp_new_i64(tcg_ctx); + + if (src1_wide) { + neon_load_reg64(tcg_ctx, rn0_64, a->vn); + } else { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, 0); + widenfn(tcg_ctx, rn0_64, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + } + rm = neon_load_reg(tcg_ctx, a->vm, 0); + + widenfn(tcg_ctx, rm_64, rm); + tcg_temp_free_i32(tcg_ctx, rm); + opfn(tcg_ctx, rn0_64, rn0_64, rm_64); + + /* + * Load second pass inputs before storing the first pass result, to + * avoid incorrect results if a narrow input overlaps with the result. + */ + if (src1_wide) { + neon_load_reg64(tcg_ctx, rn1_64, a->vn + 1); + } else { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, 1); + widenfn(tcg_ctx, rn1_64, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + } + rm = neon_load_reg(tcg_ctx, a->vm, 1); + + neon_store_reg64(tcg_ctx, rn0_64, a->vd); + + widenfn(tcg_ctx, rm_64, rm); + tcg_temp_free_i32(tcg_ctx, rm); + opfn(tcg_ctx, rn1_64, rn1_64, rm_64); + neon_store_reg64(tcg_ctx, rn1_64, a->vd + 1); + + tcg_temp_free_i64(tcg_ctx, rn0_64); + tcg_temp_free_i64(tcg_ctx, rn1_64); + tcg_temp_free_i64(tcg_ctx, rm_64); + + return true; +} + +#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenWidenFn * const widenfn[] = { \ + gen_helper_neon_widen_##S##8, \ + gen_helper_neon_widen_##S##16, \ + tcg_gen_##EXT##_i32_i64, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const addfn[] = { \ + gen_helper_neon_##OP##l_u16, \ + gen_helper_neon_##OP##l_u32, \ + tcg_gen_##OP##_i64, \ + NULL, \ + }; \ + return do_prewiden_3d(s, a, widenfn[a->size], \ + addfn[a->size], SRC1WIDE); \ + } + +DO_PREWIDEN(VADDL_S, s, ext, add, false) +DO_PREWIDEN(VADDL_U, u, extu, add, false) +DO_PREWIDEN(VSUBL_S, s, ext, sub, false) +DO_PREWIDEN(VSUBL_U, u, extu, sub, false) +DO_PREWIDEN(VADDW_S, s, ext, add, true) +DO_PREWIDEN(VADDW_U, u, extu, add, true) +DO_PREWIDEN(VSUBW_S, s, ext, sub, true) +DO_PREWIDEN(VSUBW_U, u, extu, sub, true) + +static bool do_narrow_3d(DisasContext *s, arg_3diff *a, + NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ + TCGv_i64 rn_64, rm_64; + TCGv_i32 rd0, rd1; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn || !narrowfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if ((a->vn | a->vm) & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rn_64 = tcg_temp_new_i64(tcg_ctx); + rm_64 = tcg_temp_new_i64(tcg_ctx); + rd0 = tcg_temp_new_i32(tcg_ctx); + rd1 = tcg_temp_new_i32(tcg_ctx); + + neon_load_reg64(tcg_ctx, rn_64, a->vn); + neon_load_reg64(tcg_ctx, rm_64, a->vm); + + opfn(tcg_ctx, rn_64, rn_64, rm_64); + + narrowfn(tcg_ctx, rd0, rn_64); + + neon_load_reg64(tcg_ctx, rn_64, a->vn + 1); + neon_load_reg64(tcg_ctx, rm_64, a->vm + 1); + + opfn(tcg_ctx, rn_64, rn_64, rm_64); + + narrowfn(tcg_ctx, rd1, rn_64); + + neon_store_reg(tcg_ctx, a->vd, 0, rd0); + neon_store_reg(tcg_ctx, a->vd, 1, rd1); + + tcg_temp_free_i64(tcg_ctx, rn_64); + tcg_temp_free_i64(tcg_ctx, rm_64); + + return true; +} + +#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenTwo64OpFn * const addfn[] = { \ + gen_helper_neon_##OP##l_u16, \ + gen_helper_neon_##OP##l_u32, \ + tcg_gen_##OP##_i64, \ + NULL, \ + }; \ + static NeonGenNarrowFn * const narrowfn[] = { \ + gen_helper_neon_##NARROWTYPE##_high_u8, \ + gen_helper_neon_##NARROWTYPE##_high_u16, \ + EXTOP, \ + NULL, \ + }; \ + return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ + } + +static void gen_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 rd, TCGv_i64 rn) +{ + tcg_gen_addi_i64(tcg_ctx, rn, rn, 1u << 31); + tcg_gen_extrh_i64_i32(tcg_ctx, rd, rn); +} + +DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) +DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) +DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) +DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) + +static bool do_long_3d(DisasContext *s, arg_3diff *a, + NeonGenTwoOpWidenFn *opfn, + NeonGenTwo64OpFn *accfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * 3-regs different lengths, long operations. + * These perform an operation on two inputs that returns a double-width + * result, and then possibly perform an accumulation operation of + * that result into the double-width destination. + */ + TCGv_i64 rd0, rd1, tmp; + TCGv_i32 rn, rm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rd0 = tcg_temp_new_i64(tcg_ctx); + rd1 = tcg_temp_new_i64(tcg_ctx); + + rn = neon_load_reg(tcg_ctx, a->vn, 0); + rm = neon_load_reg(tcg_ctx, a->vm, 0); + opfn(tcg_ctx, rd0, rn, rm); + tcg_temp_free_i32(tcg_ctx, rn); + tcg_temp_free_i32(tcg_ctx, rm); + + rn = neon_load_reg(tcg_ctx, a->vn, 1); + rm = neon_load_reg(tcg_ctx, a->vm, 1); + opfn(tcg_ctx, rd1, rn, rm); + tcg_temp_free_i32(tcg_ctx, rn); + tcg_temp_free_i32(tcg_ctx, rm); + + /* Don't store results until after all loads: they might overlap */ + if (accfn) { + tmp = tcg_temp_new_i64(tcg_ctx); + neon_load_reg64(tcg_ctx, tmp, a->vd); + accfn(tcg_ctx, tmp, tmp, rd0); + neon_store_reg64(tcg_ctx, tmp, a->vd); + neon_load_reg64(tcg_ctx, tmp, a->vd + 1); + accfn(tcg_ctx, tmp, tmp, rd1); + neon_store_reg64(tcg_ctx, tmp, a->vd + 1); + tcg_temp_free_i64(tcg_ctx, tmp); + } else { + neon_store_reg64(tcg_ctx, rd0, a->vd); + neon_store_reg64(tcg_ctx, rd1, a->vd + 1); + } + + tcg_temp_free_i64(tcg_ctx, rd0); + tcg_temp_free_i64(tcg_ctx, rd1); + + return true; +} + +static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_s16, + gen_helper_neon_abdl_s32, + gen_helper_neon_abdl_s64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_u16, + gen_helper_neon_abdl_u32, + gen_helper_neon_abdl_u64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_s16, + gen_helper_neon_abdl_s32, + gen_helper_neon_abdl_s64, + NULL, + }; + static NeonGenTwo64OpFn * const addfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], addfn[a->size]); +} + +static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_u16, + gen_helper_neon_abdl_u32, + gen_helper_neon_abdl_u64, + NULL, + }; + static NeonGenTwo64OpFn * const addfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], addfn[a->size]); +} + +static void gen_mull_s32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_muls2_i32(tcg_ctx, lo, hi, rn, rm); + tcg_gen_concat_i32_i64(tcg_ctx, rd, lo, hi); + + tcg_temp_free_i32(tcg_ctx, lo); + tcg_temp_free_i32(tcg_ctx, hi); +} + +static void gen_mull_u32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_mulu2_i32(tcg_ctx, lo, hi, rn, rm); + tcg_gen_concat_i32_i64(tcg_ctx, rd, lo, hi); + + tcg_temp_free_i32(tcg_ctx, lo); + tcg_temp_free_i32(tcg_ctx, hi); +} + +static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_mull_s8, + gen_helper_neon_mull_s16, + gen_mull_s32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_mull_u8, + gen_helper_neon_mull_u16, + gen_mull_u32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +#define DO_VMLAL(INSN,MULL,ACC) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenTwoOpWidenFn * const opfn[] = { \ + gen_helper_neon_##MULL##8, \ + gen_helper_neon_##MULL##16, \ + gen_##MULL##32, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const accfn[] = { \ + gen_helper_neon_##ACC##l_u16, \ + gen_helper_neon_##ACC##l_u32, \ + tcg_gen_##ACC##_i64, \ + NULL, \ + }; \ + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ + } + +DO_VMLAL(VMLAL_S,mull_s,add) +DO_VMLAL(VMLAL_U,mull_u,add) +DO_VMLAL(VMLSL_S,mull_s,sub) +DO_VMLAL(VMLSL_U,mull_u,sub) + +static void gen_VQDMULL_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + gen_helper_neon_mull_s16(tcg_ctx, rd, rn, rm); + gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rd, rd); +} + +static void gen_VQDMULL_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + gen_mull_s32(tcg_ctx, rd, rn, rm); + gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rd, rd); +} + +static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static void gen_VQDMLAL_acc_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm); +} + +static void gen_VQDMLAL_acc_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm); +} + +static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLAL_acc_16, + gen_VQDMLAL_acc_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); +} + +static void gen_VQDMLSL_acc_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_negl_u32(tcg_ctx, rm, rm); + gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm); +} + +static void gen_VQDMLSL_acc_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_neg_i64(tcg_ctx, rm, rm); + gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm); +} + +static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLSL_acc_16, + gen_VQDMLSL_acc_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + gen_helper_gvec_3 *fn_gvec; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + switch (a->size) { + case 0: + fn_gvec = gen_helper_neon_pmull_h; + break; + case 2: + if (!dc_isar_feature(aa32_pmull, s)) { + return false; + } + fn_gvec = gen_helper_gvec_pmull_q; + break; + default: + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tcg_gen_gvec_3_ool(tcg_ctx, neon_reg_offset(a->vd, 0), + neon_reg_offset(a->vn, 0), + neon_reg_offset(a->vm, 0), + 16, 16, 0, fn_gvec); + return true; +} + +static void gen_neon_dup_low16(TCGContext *tcg_ctx, TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_ext16u_i32(tcg_ctx, var, var); + tcg_gen_shli_i32(tcg_ctx, tmp, var, 16); + tcg_gen_or_i32(tcg_ctx, var, var, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); +} + +static void gen_neon_dup_high16(TCGContext *tcg_ctx, TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_andi_i32(tcg_ctx, var, var, 0xffff0000); + tcg_gen_shri_i32(tcg_ctx, tmp, var, 16); + tcg_gen_or_i32(tcg_ctx, var, var, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); +} + +static inline TCGv_i32 neon_get_scalar(TCGContext *tcg_ctx, int size, int reg) +{ + TCGv_i32 tmp; + if (size == 1) { + tmp = neon_load_reg(tcg_ctx, reg & 7, reg >> 4); + if (reg & 8) { + gen_neon_dup_high16(tcg_ctx, tmp); + } else { + gen_neon_dup_low16(tcg_ctx, tmp); + } + } else { + tmp = neon_load_reg(tcg_ctx, reg & 15, reg >> 4); + } + return tmp; +} + +static bool do_2scalar(DisasContext *s, arg_2scalar *a, + NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Two registers and a scalar: perform an operation between + * the input elements and the scalar, and then possibly + * perform an accumulation operation of that result into the + * destination. + */ + TCGv_i32 scalar; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->q && ((a->vd | a->vn) & 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(tcg_ctx, a->size, a->vm); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, pass); + opfn(tcg_ctx, tmp, tmp, scalar); + if (accfn) { + TCGv_i32 rd = neon_load_reg(tcg_ctx, a->vd, pass); + accfn(tcg_ctx, tmp, rd, tmp); + tcg_temp_free_i32(tcg_ctx, rd); + } + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_i32(tcg_ctx, scalar); + return true; +} + +static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + gen_helper_neon_add_u16, + tcg_gen_add_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + gen_helper_neon_sub_u16, + tcg_gen_sub_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +/* + * Rather than have a float-specific version of do_2scalar just for + * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into + * a NeonGenTwoOpFn. + */ +#define WRAP_FP_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 rd, \ + TCGv_i32 rn, TCGv_i32 rm) \ + { \ + TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); \ + FUNC(tcg_ctx, rd, rn, rm, fpstatus); \ + tcg_temp_free_ptr(tcg_ctx, fpstatus); \ + } + +WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls) +WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds) +WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs) + +static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_add, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_sub, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) +WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) +WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) +WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) + +static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_VQDMULH_16, + gen_VQDMULH_32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_VQRDMULH_16, + gen_VQRDMULH_32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, + NeonGenThreeOpEnvFn *opfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn + * performs a kind of fused op-then-accumulate using a helper + * function that takes all of rd, rn and the scalar at once. + */ + TCGv_i32 scalar; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + if (!dc_isar_feature(aa32_rdm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->q && ((a->vd | a->vn) & 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(tcg_ctx, a->size, a->vm); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 rn = neon_load_reg(tcg_ctx, a->vn, pass); + TCGv_i32 rd = neon_load_reg(tcg_ctx, a->vd, pass); + opfn(tcg_ctx, rd, tcg_ctx->cpu_env, rn, scalar, rd); + tcg_temp_free_i32(tcg_ctx, rn); + neon_store_reg(tcg_ctx, a->vd, pass, rd); + } + tcg_temp_free_i32(tcg_ctx, scalar); + + return true; +} + +static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenThreeOpEnvFn *opfn[] = { + NULL, + gen_helper_neon_qrdmlah_s16, + gen_helper_neon_qrdmlah_s32, + NULL, + }; + return do_vqrdmlah_2sc(s, a, opfn[a->size]); +} + +static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenThreeOpEnvFn *opfn[] = { + NULL, + gen_helper_neon_qrdmlsh_s16, + gen_helper_neon_qrdmlsh_s32, + NULL, + }; + return do_vqrdmlah_2sc(s, a, opfn[a->size]); +} + +static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, + NeonGenTwoOpWidenFn *opfn, + NeonGenTwo64OpFn *accfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Two registers and a scalar, long operations: perform an + * operation on the input elements and the scalar which produces + * a double-width result, and then possibly perform an accumulation + * operation of that result into the destination. + */ + TCGv_i32 scalar, rn; + TCGv_i64 rn0_64, rn1_64; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(tcg_ctx, a->size, a->vm); + + /* Load all inputs before writing any outputs, in case of overlap */ + rn = neon_load_reg(tcg_ctx, a->vn, 0); + rn0_64 = tcg_temp_new_i64(tcg_ctx); + opfn(tcg_ctx, rn0_64, rn, scalar); + tcg_temp_free_i32(tcg_ctx, rn); + + rn = neon_load_reg(tcg_ctx, a->vn, 1); + rn1_64 = tcg_temp_new_i64(tcg_ctx); + opfn(tcg_ctx, rn1_64, rn, scalar); + tcg_temp_free_i32(tcg_ctx, rn); + tcg_temp_free_i32(tcg_ctx, scalar); + + if (accfn) { + TCGv_i64 t64 = tcg_temp_new_i64(tcg_ctx); + neon_load_reg64(tcg_ctx, t64, a->vd); + accfn(tcg_ctx, t64, t64, rn0_64); + neon_store_reg64(tcg_ctx, t64, a->vd); + neon_load_reg64(tcg_ctx, t64, a->vd + 1); + accfn(tcg_ctx, t64, t64, rn1_64); + neon_store_reg64(tcg_ctx, t64, a->vd + 1); + tcg_temp_free_i64(tcg_ctx, t64); + } else { + neon_store_reg64(tcg_ctx, rn0_64, a->vd); + neon_store_reg64(tcg_ctx, rn1_64, a->vd + 1); + } + tcg_temp_free_i64(tcg_ctx, rn0_64); + tcg_temp_free_i64(tcg_ctx, rn1_64); + return true; +} + +static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_helper_neon_mull_s16, + gen_mull_s32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_helper_neon_mull_u16, + gen_mull_u32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +#define DO_VMLAL_2SC(INSN, MULL, ACC) \ + static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ + { \ + static NeonGenTwoOpWidenFn * const opfn[] = { \ + NULL, \ + gen_helper_neon_##MULL##16, \ + gen_##MULL##32, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const accfn[] = { \ + NULL, \ + gen_helper_neon_##ACC##l_u32, \ + tcg_gen_##ACC##_i64, \ + NULL, \ + }; \ + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ + } + +DO_VMLAL_2SC(VMLAL_S, mull_s, add) +DO_VMLAL_2SC(VMLAL_U, mull_u, add) +DO_VMLAL_2SC(VMLSL_S, mull_s, sub) +DO_VMLAL_2SC(VMLSL_U, mull_u, sub) + +static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLAL_acc_16, + gen_VQDMLAL_acc_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLSL_acc_16, + gen_VQDMLSL_acc_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VEXT(DisasContext *s, arg_VEXT *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (a->imm > 7 && !a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + if (!a->q) { + /* Extract 64 bits from */ + TCGv_i64 left, right, dest; + + left = tcg_temp_new_i64(tcg_ctx); + right = tcg_temp_new_i64(tcg_ctx); + dest = tcg_temp_new_i64(tcg_ctx); + + neon_load_reg64(tcg_ctx, right, a->vn); + neon_load_reg64(tcg_ctx, left, a->vm); + tcg_gen_extract2_i64(tcg_ctx, dest, right, left, a->imm * 8); + neon_store_reg64(tcg_ctx, dest, a->vd); + + tcg_temp_free_i64(tcg_ctx, left); + tcg_temp_free_i64(tcg_ctx, right); + tcg_temp_free_i64(tcg_ctx, dest); + } else { + /* Extract 128 bits from */ + TCGv_i64 left, middle, right, destleft, destright; + + left = tcg_temp_new_i64(tcg_ctx); + middle = tcg_temp_new_i64(tcg_ctx); + right = tcg_temp_new_i64(tcg_ctx); + destleft = tcg_temp_new_i64(tcg_ctx); + destright = tcg_temp_new_i64(tcg_ctx); + + if (a->imm < 8) { + neon_load_reg64(tcg_ctx, right, a->vn); + neon_load_reg64(tcg_ctx, middle, a->vn + 1); + tcg_gen_extract2_i64(tcg_ctx, destright, right, middle, a->imm * 8); + neon_load_reg64(tcg_ctx, left, a->vm); + tcg_gen_extract2_i64(tcg_ctx, destleft, middle, left, a->imm * 8); + } else { + neon_load_reg64(tcg_ctx, right, a->vn + 1); + neon_load_reg64(tcg_ctx, middle, a->vm); + tcg_gen_extract2_i64(tcg_ctx, destright, right, middle, (a->imm - 8) * 8); + neon_load_reg64(tcg_ctx, left, a->vm + 1); + tcg_gen_extract2_i64(tcg_ctx, destleft, middle, left, (a->imm - 8) * 8); + } + + neon_store_reg64(tcg_ctx, destright, a->vd); + neon_store_reg64(tcg_ctx, destleft, a->vd + 1); + + tcg_temp_free_i64(tcg_ctx, destright); + tcg_temp_free_i64(tcg_ctx, destleft); + tcg_temp_free_i64(tcg_ctx, right); + tcg_temp_free_i64(tcg_ctx, middle); + tcg_temp_free_i64(tcg_ctx, left); + } + return true; +} + +static bool trans_VTBL(DisasContext *s, arg_VTBL *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int n; + TCGv_i32 tmp, tmp2, tmp3, tmp4; + TCGv_ptr ptr1; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + n = a->len + 1; + if ((a->vn + n) > 32) { + /* + * This is UNPREDICTABLE; we choose to UNDEF to avoid the + * helper function running off the end of the register file. + */ + return false; + } + n <<= 3; + if (a->op) { + tmp = neon_load_reg(tcg_ctx, a->vd, 0); + } else { + tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, tmp, 0); + } + tmp2 = neon_load_reg(tcg_ctx, a->vm, 0); + ptr1 = vfp_reg_ptr(tcg_ctx, true, a->vn); + tmp4 = tcg_const_i32(tcg_ctx, n); + gen_helper_neon_tbl(tcg_ctx, tmp2, tmp2, tmp, ptr1, tmp4); + tcg_temp_free_i32(tcg_ctx, tmp); + if (a->op) { + tmp = neon_load_reg(tcg_ctx, a->vd, 1); + } else { + tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, tmp, 0); + } + tmp3 = neon_load_reg(tcg_ctx, a->vm, 1); + gen_helper_neon_tbl(tcg_ctx, tmp3, tmp3, tmp, ptr1, tmp4); + tcg_temp_free_i32(tcg_ctx, tmp4); + tcg_temp_free_ptr(tcg_ctx, ptr1); + neon_store_reg(tcg_ctx, a->vd, 0, tmp2); + neon_store_reg(tcg_ctx, a->vd, 1, tmp3); + tcg_temp_free_i32(tcg_ctx, tmp); + return true; +} + +static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tcg_gen_gvec_dup_mem(tcg_ctx, a->size, neon_reg_offset(a->vd, 0), + neon_element_offset(a->vm, a->index, a->size), + a->q ? 16 : 8, a->q ? 16 : 8); + return true; +} + +static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int pass, half; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (a->size == 3) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + for (pass = 0; pass < (a->q ? 2 : 1); pass++) { + TCGv_i32 tmp[2]; + + for (half = 0; half < 2; half++) { + tmp[half] = neon_load_reg(tcg_ctx, a->vm, pass * 2 + half); + switch (a->size) { + case 0: + tcg_gen_bswap32_i32(tcg_ctx, tmp[half], tmp[half]); + break; + case 1: + gen_swap_half(tcg_ctx, tmp[half], tmp[half]); + break; + case 2: + break; + default: + g_assert_not_reached(); + } + } + neon_store_reg(tcg_ctx, a->vd, pass * 2, tmp[1]); + neon_store_reg(tcg_ctx, a->vd, pass * 2 + 1, tmp[0]); + } + return true; +} + +static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, + NeonGenWidenFn *widenfn, + NeonGenTwo64OpFn *opfn, + NeonGenTwo64OpFn *accfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Pairwise long operations: widen both halves of the pair, + * combine the pairs with the opfn, and then possibly accumulate + * into the destination with the accfn. + */ + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!widenfn) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + for (pass = 0; pass < a->q + 1; pass++) { + TCGv_i32 tmp; + TCGv_i64 rm0_64, rm1_64, rd_64; + + rm0_64 = tcg_temp_new_i64(tcg_ctx); + rm1_64 = tcg_temp_new_i64(tcg_ctx); + rd_64 = tcg_temp_new_i64(tcg_ctx); + tmp = neon_load_reg(tcg_ctx, a->vm, pass * 2); + widenfn(tcg_ctx, rm0_64, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + tmp = neon_load_reg(tcg_ctx, a->vm, pass * 2 + 1); + widenfn(tcg_ctx, rm1_64, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + opfn(tcg_ctx, rd_64, rm0_64, rm1_64); + tcg_temp_free_i64(tcg_ctx, rm0_64); + tcg_temp_free_i64(tcg_ctx, rm1_64); + + if (accfn) { + TCGv_i64 tmp64 = tcg_temp_new_i64(tcg_ctx); + neon_load_reg64(tcg_ctx, tmp64, a->vd + pass); + accfn(tcg_ctx, rd_64, tmp64, rd_64); + tcg_temp_free_i64(tcg_ctx, tmp64); + } + neon_store_reg64(tcg_ctx, rd_64, a->vd + pass); + tcg_temp_free_i64(tcg_ctx, rd_64); + } + return true; +} + +static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_s8, + gen_helper_neon_widen_s16, + tcg_gen_ext_i32_i64, + NULL, + }; + static NeonGenTwo64OpFn * const opfn[] = { + gen_helper_neon_paddl_u16, + gen_helper_neon_paddl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); +} + +static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + NULL, + }; + static NeonGenTwo64OpFn * const opfn[] = { + gen_helper_neon_paddl_u16, + gen_helper_neon_paddl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); +} + +static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_s8, + gen_helper_neon_widen_s16, + tcg_gen_ext_i32_i64, + NULL, + }; + static NeonGenTwo64OpFn * const opfn[] = { + gen_helper_neon_paddl_u16, + gen_helper_neon_paddl_u32, + tcg_gen_add_i64, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], + accfn[a->size]); +} + +static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + NULL, + }; + static NeonGenTwo64OpFn * const opfn[] = { + gen_helper_neon_paddl_u16, + gen_helper_neon_paddl_u32, + tcg_gen_add_i64, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], + accfn[a->size]); +} + +typedef void ZipFn(TCGContext *, TCGv_ptr, TCGv_ptr); + +static bool do_zip_uzp(DisasContext *s, arg_2misc *a, + ZipFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_ptr pd, pm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!fn) { + /* Bad size or size/q combination */ + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + pd = vfp_reg_ptr(tcg_ctx, true, a->vd); + pm = vfp_reg_ptr(tcg_ctx, true, a->vm); + fn(tcg_ctx, pd, pm); + tcg_temp_free_ptr(tcg_ctx, pd); + tcg_temp_free_ptr(tcg_ctx, pm); + return true; +} + +static bool trans_VUZP(DisasContext *s, arg_2misc *a) +{ + static ZipFn * const fn[2][4] = { + { + gen_helper_neon_unzip8, + gen_helper_neon_unzip16, + NULL, + NULL, + }, { + gen_helper_neon_qunzip8, + gen_helper_neon_qunzip16, + gen_helper_neon_qunzip32, + NULL, + } + }; + return do_zip_uzp(s, a, fn[a->q][a->size]); +} + +static bool trans_VZIP(DisasContext *s, arg_2misc *a) +{ + static ZipFn * const fn[2][4] = { + { + gen_helper_neon_zip8, + gen_helper_neon_zip16, + NULL, + NULL, + }, { + gen_helper_neon_qzip8, + gen_helper_neon_qzip16, + gen_helper_neon_qzip32, + NULL, + } + }; + return do_zip_uzp(s, a, fn[a->q][a->size]); +} + +static bool do_vmovn(DisasContext *s, arg_2misc *a, + NeonGenNarrowEnvFn *narrowfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i64 rm; + TCGv_i32 rd0, rd1; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!narrowfn) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rm = tcg_temp_new_i64(tcg_ctx); + rd0 = tcg_temp_new_i32(tcg_ctx); + rd1 = tcg_temp_new_i32(tcg_ctx); + + neon_load_reg64(tcg_ctx, rm, a->vm); + narrowfn(tcg_ctx, rd0, tcg_ctx->cpu_env, rm); + neon_load_reg64(tcg_ctx, rm, a->vm + 1); + narrowfn(tcg_ctx, rd1, tcg_ctx->cpu_env, rm); + neon_store_reg(tcg_ctx, a->vd, 0, rd0); + neon_store_reg(tcg_ctx, a->vd, 1, rd1); + tcg_temp_free_i64(tcg_ctx, rm); + return true; +} + +#define DO_VMOVN(INSN, FUNC) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + static NeonGenNarrowEnvFn * const narrowfn[] = { \ + FUNC##8, \ + FUNC##16, \ + FUNC##32, \ + NULL, \ + }; \ + return do_vmovn(s, a, narrowfn[a->size]); \ + } + +DO_VMOVN(VMOVN, gen_neon_narrow_u) +DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat) +DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s) +DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u) + +static bool trans_VSHLL(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i32 rm0, rm1; + TCGv_i64 rd; + static NeonGenWidenFn * const widenfns[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + NULL, + }; + NeonGenWidenFn *widenfn = widenfns[a->size]; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!widenfn) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rd = tcg_temp_new_i64(tcg_ctx); + + rm0 = neon_load_reg(tcg_ctx, a->vm, 0); + rm1 = neon_load_reg(tcg_ctx, a->vm, 1); + + widenfn(tcg_ctx, rd, rm0); + tcg_gen_shli_i64(tcg_ctx, rd, rd, 8 << a->size); + neon_store_reg64(tcg_ctx, rd, a->vd); + widenfn(tcg_ctx, rd, rm1); + tcg_gen_shli_i64(tcg_ctx, rd, rd, 8 << a->size); + neon_store_reg64(tcg_ctx, rd, a->vd + 1); + + tcg_temp_free_i64(tcg_ctx, rd); + tcg_temp_free_i32(tcg_ctx, rm0); + tcg_temp_free_i32(tcg_ctx, rm1); + return true; +} + +static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_ptr fpst; + TCGv_i32 ahp, tmp, tmp2, tmp3; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !dc_isar_feature(aa32_fp16_spconv, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm & 1) || (a->size != 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, true); + ahp = get_ahp_flag(tcg_ctx); + tmp = neon_load_reg(tcg_ctx, a->vm, 0); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); + tmp2 = neon_load_reg(tcg_ctx, a->vm, 1); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp); + tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16); + tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + tmp = neon_load_reg(tcg_ctx, a->vm, 2); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); + tmp3 = neon_load_reg(tcg_ctx, a->vm, 3); + neon_store_reg(tcg_ctx, a->vd, 0, tmp2); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp); + tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16); + tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp); + neon_store_reg(tcg_ctx, a->vd, 1, tmp3); + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, ahp); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_ptr fpst; + TCGv_i32 ahp, tmp, tmp2, tmp3; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !dc_isar_feature(aa32_fp16_spconv, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd & 1) || (a->size != 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, true); + ahp = get_ahp_flag(tcg_ctx); + tmp3 = tcg_temp_new_i32(tcg_ctx); + tmp = neon_load_reg(tcg_ctx, a->vm, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vm, 1); + tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); + neon_store_reg(tcg_ctx, a->vd, 0, tmp3); + tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp); + neon_store_reg(tcg_ctx, a->vd, 1, tmp); + tmp3 = tcg_temp_new_i32(tcg_ctx); + tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); + neon_store_reg(tcg_ctx, a->vd, 2, tmp3); + tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp); + neon_store_reg(tcg_ctx, a->vd, 3, tmp2); + tcg_temp_free_i32(tcg_ctx, ahp); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size == 3) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(tcg_ctx, a->size, rd_ofs, rm_ofs, vec_size, vec_size); + + return true; +} + +#define DO_2MISC_VEC(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_2misc_vec(s, a, FN); \ + } + +DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg) +DO_2MISC_VEC(VABS, tcg_gen_gvec_abs) +DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0) +DO_2MISC_VEC(VCGT0, gen_gvec_cgt0) +DO_2MISC_VEC(VCLE0, gen_gvec_cle0) +DO_2MISC_VEC(VCGE0, gen_gvec_cge0) +DO_2MISC_VEC(VCLT0, gen_gvec_clt0) + +static bool trans_VMVN(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc_vec(s, a, tcg_gen_gvec_not); +} + +#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ + static void WRAPNAME(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rm_ofs, uint32_t oprsz, \ + uint32_t maxsz) \ + { \ + tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \ + DATA, FUNC); \ + } + +#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \ + static void WRAPNAME(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rm_ofs, uint32_t oprsz, \ + uint32_t maxsz) \ + { \ + tcg_gen_gvec_2_ool(tcg_ctx, rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \ + } + +WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0) +WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1) +WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0) +WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1) +WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0) +WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0) +WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0) + +#define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \ + return false; \ + } \ + return do_2misc_vec(s, a, gen_##INSN); \ + } + +DO_2M_CRYPTO(AESE, aa32_aes, 0) +DO_2M_CRYPTO(AESD, aa32_aes, 0) +DO_2M_CRYPTO(AESMC, aa32_aes, 0) +DO_2M_CRYPTO(AESIMC, aa32_aes, 0) +DO_2M_CRYPTO(SHA1H, aa32_sha1, 2) +DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2) +DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) + +static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int pass; + + /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (!fn) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + fn(tcg_ctx, tmp, tmp); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + + return true; +} + +static bool trans_VREV32(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + tcg_gen_bswap32_i32, + gen_swap_half, + NULL, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool trans_VREV16(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc(s, a, gen_rev16); +} + +static bool trans_VCLS(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + gen_helper_neon_cls_s8, + gen_helper_neon_cls_s16, + gen_helper_neon_cls_s32, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static void do_VCLZ_32(TCGContext *tcg_ctx, TCGv_i32 rd, TCGv_i32 rm) +{ + tcg_gen_clzi_i32(tcg_ctx, rd, rm, 32); +} + +static bool trans_VCLZ(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + gen_helper_neon_clz_u8, + gen_helper_neon_clz_u16, + do_VCLZ_32, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool trans_VCNT(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc(s, a, gen_helper_neon_cnt_u8); +} + +static bool trans_VABS_F(DisasContext *s, arg_2misc *a) +{ + if (a->size != 2) { + return false; + } + /* TODO: FP16 : size == 1 */ + return do_2misc(s, a, gen_helper_vfp_abss); +} + +static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) +{ + if (a->size != 2) { + return false; + } + /* TODO: FP16 : size == 1 */ + return do_2misc(s, a, gen_helper_vfp_negs); +} + +static bool trans_VRECPE(DisasContext *s, arg_2misc *a) +{ + if (a->size != 2) { + return false; + } + return do_2misc(s, a, gen_helper_recpe_u32); +} + +static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) +{ + if (a->size != 2) { + return false; + } + return do_2misc(s, a, gen_helper_rsqrte_u32); +} + +#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 m) \ + { \ + FUNC(tcg_ctx, d, tcg_ctx->cpu_env, m); \ + } + +WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8) +WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16) +WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32) +WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8) +WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16) +WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32) + +static bool trans_VQABS(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + gen_VQABS_s8, + gen_VQABS_s16, + gen_VQABS_s32, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool trans_VQNEG(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + gen_VQNEG_s8, + gen_VQNEG_s16, + gen_VQNEG_s32, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool do_2misc_fp(DisasContext *s, arg_2misc *a, + NeonGenOneSingleOpFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int pass; + TCGv_ptr fpst; + + /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 2) { + /* TODO: FP16 will be the size == 1 case */ + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, 1); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + fn(tcg_ctx, tmp, tmp, fpst); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +#define DO_2MISC_FP(INSN, FUNC) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_2misc_fp(s, a, FUNC); \ + } + +DO_2MISC_FP(VRECPE_F, gen_helper_recpe_f32) +DO_2MISC_FP(VRSQRTE_F, gen_helper_rsqrte_f32) +DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos) +DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos) +DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs) +DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs) + +static bool trans_VRINTX(DisasContext *s, arg_2misc *a) +{ + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + return do_2misc_fp(s, a, gen_helper_rints_exact); +} + +#define WRAP_FP_CMP0_FWD(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d, \ + TCGv_i32 m, TCGv_ptr fpst) \ + { \ + TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0); \ + FUNC(tcg_ctx, d, m, zero, fpst); \ + tcg_temp_free_i32(tcg_ctx, zero); \ + } +#define WRAP_FP_CMP0_REV(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d, \ + TCGv_i32 m, TCGv_ptr fpst) \ + { \ + TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0); \ + FUNC(tcg_ctx, d, zero, m, fpst); \ + tcg_temp_free_i32(tcg_ctx, zero); \ + } + +#define DO_FP_CMP0(INSN, FUNC, REV) \ + WRAP_FP_CMP0_##REV(gen_##INSN, FUNC) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_2misc_fp(s, a, gen_##INSN); \ + } + +DO_FP_CMP0(VCGT0_F, gen_helper_neon_cgt_f32, FWD) +DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD) +DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD) +DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV) +DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV) + +static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Handle a VRINT* operation by iterating 32 bits at a time, + * with a specified rounding mode in operation. + */ + int pass; + TCGv_ptr fpst; + TCGv_i32 tcg_rmode; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 2) { + /* TODO: FP16 will be the size == 1 case */ + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + gen_helper_rints(tcg_ctx, tmp, tmp, fpst); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + tcg_temp_free_i32(tcg_ctx, tcg_rmode); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +#define DO_VRINT(INSN, RMODE) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_vrint(s, a, RMODE); \ + } + +DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) +DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) +DO_VRINT(VRINTZ, FPROUNDING_ZERO) +DO_VRINT(VRINTM, FPROUNDING_NEGINF) +DO_VRINT(VRINTP, FPROUNDING_POSINF) + +static bool do_vcvt(DisasContext *s, arg_2misc *a, int rmode, bool is_signed) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Handle a VCVT* operation by iterating 32 bits at a time, + * with a specified rounding mode in operation. + */ + int pass; + TCGv_ptr fpst; + TCGv_i32 tcg_rmode, tcg_shift; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 2) { + /* TODO: FP16 will be the size == 1 case */ + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_shift = tcg_const_i32(tcg_ctx, 0); + tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + if (is_signed) { + gen_helper_vfp_tosls(tcg_ctx, tmp, tmp, tcg_shift, fpst); + } else { + gen_helper_vfp_touls(tcg_ctx, tmp, tmp, tcg_shift, fpst); + } + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + tcg_temp_free_i32(tcg_ctx, tcg_rmode); + tcg_temp_free_i32(tcg_ctx, tcg_shift); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +#define DO_VCVT(INSN, RMODE, SIGNED) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_vcvt(s, a, RMODE, SIGNED); \ + } + +DO_VCVT(VCVTAU, FPROUNDING_TIEAWAY, false) +DO_VCVT(VCVTAS, FPROUNDING_TIEAWAY, true) +DO_VCVT(VCVTNU, FPROUNDING_TIEEVEN, false) +DO_VCVT(VCVTNS, FPROUNDING_TIEEVEN, true) +DO_VCVT(VCVTPU, FPROUNDING_POSINF, false) +DO_VCVT(VCVTPS, FPROUNDING_POSINF, true) +DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false) +DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true) + +static bool trans_VSWP(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i64 rm, rd; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 0) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rm = tcg_temp_new_i64(tcg_ctx); + rd = tcg_temp_new_i64(tcg_ctx); + for (pass = 0; pass < (a->q ? 2 : 1); pass++) { + neon_load_reg64(tcg_ctx, rm, a->vm + pass); + neon_load_reg64(tcg_ctx, rd, a->vd + pass); + neon_store_reg64(tcg_ctx, rm, a->vd + pass); + neon_store_reg64(tcg_ctx, rd, a->vm + pass); + } + tcg_temp_free_i64(tcg_ctx, rm); + tcg_temp_free_i64(tcg_ctx, rd); + + return true; +} +static void gen_neon_trn_u8(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 rd, tmp; + + rd = tcg_temp_new_i32(tcg_ctx); + tmp = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_shli_i32(tcg_ctx, rd, t0, 8); + tcg_gen_andi_i32(tcg_ctx, rd, rd, 0xff00ff00); + tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0x00ff00ff); + tcg_gen_or_i32(tcg_ctx, rd, rd, tmp); + + tcg_gen_shri_i32(tcg_ctx, t1, t1, 8); + tcg_gen_andi_i32(tcg_ctx, t1, t1, 0x00ff00ff); + tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xff00ff00); + tcg_gen_or_i32(tcg_ctx, t1, t1, tmp); + tcg_gen_mov_i32(tcg_ctx, t0, rd); + + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, rd); +} + +static void gen_neon_trn_u16(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 rd, tmp; + + rd = tcg_temp_new_i32(tcg_ctx); + tmp = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_shli_i32(tcg_ctx, rd, t0, 16); + tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0xffff); + tcg_gen_or_i32(tcg_ctx, rd, rd, tmp); + tcg_gen_shri_i32(tcg_ctx, t1, t1, 16); + tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xffff0000); + tcg_gen_or_i32(tcg_ctx, t1, t1, tmp); + tcg_gen_mov_i32(tcg_ctx, t0, rd); + + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, rd); +} + +static bool trans_VTRN(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i32 tmp, tmp2; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (a->size == 3) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + if (a->size == 2) { + for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { + tmp = neon_load_reg(tcg_ctx, a->vm, pass); + tmp2 = neon_load_reg(tcg_ctx, a->vd, pass + 1); + neon_store_reg(tcg_ctx, a->vm, pass, tmp2); + neon_store_reg(tcg_ctx, a->vd, pass + 1, tmp); + } + } else { + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + tmp = neon_load_reg(tcg_ctx, a->vm, pass); + tmp2 = neon_load_reg(tcg_ctx, a->vd, pass); + if (a->size == 0) { + gen_neon_trn_u8(tcg_ctx, tmp, tmp2); + } else { + gen_neon_trn_u16(tcg_ctx, tmp, tmp2); + } + neon_store_reg(tcg_ctx, a->vm, pass, tmp2); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + } + return true; +} diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index ff2f8ff323..fefbc3a202 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -178,7 +178,7 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word) { TCGContext *tcg_ctx = s->uc->tcg_ctx; unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(s, rd), vsz, vsz, word); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); } /* Invoke a vector expander on two Pregs. */ @@ -1481,7 +1481,7 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) unsigned oprsz = size_for_gvec(setsz / 8); if (oprsz * 8 == setsz) { - tcg_gen_gvec_dup64i(tcg_ctx, ofs, oprsz, maxsz, word); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, ofs, oprsz, maxsz, word); goto done; } } @@ -2088,7 +2088,11 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) unsigned nofs = vec_reg_offset(s, a->rn, index, esz); tcg_gen_gvec_dup_mem(tcg_ctx, esz, dofs, nofs, vsz, vsz); } else { - tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, 0); + /* + * While dup_mem handles 128-bit elements, dup_imm does not. + * Thankfully element size doesn't matter for splatting zero. + */ + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, vsz, vsz, 0); } } return true; @@ -3341,7 +3345,7 @@ static bool trans_FDUP(DisasContext *s, arg_FDUP *a) imm = vfp_expand_imm(a->esz, a->imm); imm = dup_const(a->esz, imm); - tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, imm); + tcg_gen_gvec_dup_imm(tcg_ctx, a->esz, dofs, vsz, vsz, imm); } return true; } @@ -3356,7 +3360,7 @@ static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) unsigned vsz = vec_full_reg_size(s); int dofs = vec_full_reg_offset(s, a->rd); - tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, dup_const(a->esz, a->imm)); + tcg_gen_gvec_dup_imm(tcg_ctx, a->esz, dofs, vsz, vsz, a->imm); } return true; } @@ -4050,41 +4054,33 @@ static bool trans_FCADD(DisasContext *s, arg_FCADD *a) typedef void gen_helper_sve_fmla(TCGContext *, TCGv_env, TCGv_ptr, TCGv_i32); -static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn) +static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, + gen_helper_gvec_5_ptr *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - if (fn == NULL) { + if (a->esz == 0) { return false; } - if (!sve_access_check(s)) { - return true; + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16); + tcg_gen_gvec_5_ptr(tcg_ctx, + vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, 0, fn); + tcg_temp_free_ptr(tcg_ctx, status); } - unsigned vsz = vec_full_reg_size(s); - unsigned desc; - TCGv_i32 t_desc; - TCGv_ptr pg = tcg_temp_new_ptr(tcg_ctx); - - /* We would need 7 operands to pass these arguments "properly". - * So we encode all the register numbers into the descriptor. - */ - desc = deposit32(a->rd, 5, 5, a->rn); - desc = deposit32(desc, 10, 5, a->rm); - desc = deposit32(desc, 15, 5, a->ra); - desc = simd_desc(vsz, vsz, desc); - - t_desc = tcg_const_i32(tcg_ctx, desc); - tcg_gen_addi_ptr(tcg_ctx, pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->pg)); - fn(tcg_ctx, tcg_ctx->cpu_env, pg, t_desc); - tcg_temp_free_i32(tcg_ctx, t_desc); - tcg_temp_free_ptr(tcg_ctx, pg); return true; } #define DO_FMLA(NAME, name) \ static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \ { \ - static gen_helper_sve_fmla * const fns[4] = { \ + static gen_helper_gvec_5_ptr * const fns[4] = { \ NULL, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ @@ -4101,7 +4097,8 @@ DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - static gen_helper_sve_fmla * const fns[3] = { + static gen_helper_gvec_5_ptr * const fns[4] = { + NULL, gen_helper_sve_fcmla_zpzzz_h, gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, @@ -4112,25 +4109,15 @@ static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - unsigned desc; - TCGv_i32 t_desc; - TCGv_ptr pg = tcg_temp_new_ptr(tcg_ctx); - - /* We would need 7 operands to pass these arguments "properly". - * So we encode all the register numbers into the descriptor. - */ - desc = deposit32(a->rd, 5, 5, a->rn); - desc = deposit32(desc, 10, 5, a->rm); - desc = deposit32(desc, 15, 5, a->ra); - desc = deposit32(desc, 20, 2, a->rot); - desc = sextract32(desc, 0, 22); - desc = simd_desc(vsz, vsz, desc); - - t_desc = tcg_const_i32(tcg_ctx, desc); - tcg_gen_addi_ptr(tcg_ctx, pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->pg)); - fns[a->esz - 1](tcg_ctx, tcg_ctx->cpu_env, pg, t_desc); - tcg_temp_free_i32(tcg_ctx, t_desc); - tcg_temp_free_ptr(tcg_ctx, pg); + TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16); + tcg_gen_gvec_5_ptr(tcg_ctx, + vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, a->rot, fns[a->esz]); + tcg_temp_free_ptr(tcg_ctx, status); } return true; } @@ -4468,15 +4455,17 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0, t1; + TCGv_i64 dirty_addr, clean_addr, t0, t1; - addr = tcg_temp_new_i64(tcg_ctx); - t0 = tcg_temp_new_i64(tcg_ctx); + dirty_addr = tcg_temp_new_i64(tcg_ctx); + tcg_gen_addi_i64(tcg_ctx, dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(tcg_ctx, dirty_addr); - /* Note that unpredicated load/store of vector/predicate registers + /* + * Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian - * operations on larger quantities. There is no nice way to force - * a little-endian load for aarch64_be-linux-user out of line. + * operations on larger quantities. * * Attempt to keep code expansion to a minimum by limiting the * amount of unrolling done. @@ -4484,56 +4473,58 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(tcg_ctx); for (i = 0; i < len_align; i += 8) { - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEQ); + tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ); tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + i); + tcg_gen_addi_i64(tcg_ctx, clean_addr, cpu_reg_sp(s, rn), 8); } + tcg_temp_free_i64(tcg_ctx, t0); } else { TCGLabel *loop = gen_new_label(tcg_ctx); TCGv_ptr tp, i = tcg_const_local_ptr(tcg_ctx, 0); - gen_set_label(tcg_ctx, loop); + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = new_tmp_a64_local(s); + tcg_gen_mov_i64(tcg_ctx, clean_addr, t0); - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ - tp = tcg_temp_new_ptr(tcg_ctx); - tcg_gen_addi_ptr(tcg_ctx, tp, i, imm); - tcg_gen_extu_ptr_i64(tcg_ctx, addr, tp); - tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, rn)); + gen_set_label(tcg_ctx, loop); - tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEQ); + t0 = tcg_temp_new_i64(tcg_ctx); + tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8); + tp = tcg_temp_new_ptr(tcg_ctx); tcg_gen_add_ptr(tcg_ctx, tp, tcg_ctx->cpu_env, i); tcg_gen_addi_ptr(tcg_ctx, i, i, 8); tcg_gen_st_i64(tcg_ctx, t0, tp, vofs); tcg_temp_free_ptr(tcg_ctx, tp); + tcg_temp_free_i64(tcg_ctx, t0); tcg_gen_brcondi_ptr(tcg_ctx, TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(tcg_ctx, i); } - /* Predicate register loads can be any multiple of 2. + /* + * Predicate register loads can be any multiple of 2. * Note that we still store the entire 64-bit unit into cpu_env. */ if (len_remain) { - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + len_align); - + t0 = tcg_temp_new_i64(tcg_ctx); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: t1 = tcg_temp_new_i64(tcg_ctx); - tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(tcg_ctx, addr, addr, 4); - tcg_gen_qemu_ld_i64(tcg_ctx, t1, addr, midx, MO_LEUW); - tcg_gen_deposit_i64(tcg_ctx, t0, t0, t1, 32, 32); + tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 4); + tcg_gen_qemu_ld_i64(tcg_ctx, t1, clean_addr, midx, MO_LEUW); tcg_gen_deposit_i64(tcg_ctx, t0, t0, t1, 32, 32); tcg_temp_free_i64(tcg_ctx, t1); break; @@ -4541,9 +4532,8 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) g_assert_not_reached(); } tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + len_align); + tcg_temp_free_i64(tcg_ctx, t0); } - tcg_temp_free_i64(tcg_ctx, addr); - tcg_temp_free_i64(tcg_ctx, t0); } /* Similarly for stores. */ @@ -4554,10 +4544,12 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0; + TCGv_i64 dirty_addr, clean_addr, t0; - addr = tcg_temp_new_i64(tcg_ctx); - t0 = tcg_temp_new_i64(tcg_ctx); + dirty_addr = tcg_temp_new_i64(tcg_ctx); + tcg_gen_addi_i64(tcg_ctx, dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(tcg_ctx, dirty_addr); /* Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian @@ -4570,33 +4562,34 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(tcg_ctx); for (i = 0; i < len_align; i += 8) { tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + i); - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEQ); + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(tcg_ctx, clean_addr, cpu_reg_sp(s, rn), 8); } } else { TCGLabel *loop = gen_new_label(tcg_ctx); - TCGv_ptr t2, i = tcg_const_local_ptr(tcg_ctx, 0); - - gen_set_label(tcg_ctx, loop); + TCGv_ptr tp, i = tcg_const_local_ptr(tcg_ctx, 0); - t2 = tcg_temp_new_ptr(tcg_ctx); - tcg_gen_add_ptr(tcg_ctx, t2, tcg_ctx->cpu_env, i); - tcg_gen_ld_i64(tcg_ctx, t0, t2, vofs); - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ - tcg_gen_addi_ptr(tcg_ctx, t2, i, imm); - tcg_gen_extu_ptr_i64(tcg_ctx, addr, t2); - tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, rn)); - tcg_temp_free_ptr(tcg_ctx, t2); + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = new_tmp_a64_local(s); + tcg_gen_mov_i64(tcg_ctx, clean_addr, t0); - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEQ); + gen_set_label(tcg_ctx, loop); + t0 = tcg_temp_new_i64(tcg_ctx); + tp = tcg_temp_new_ptr(tcg_ctx); + tcg_gen_add_ptr(tcg_ctx, tp, tcg_ctx->cpu_env, i); + tcg_gen_ld_i64(tcg_ctx, t0, tp, vofs); tcg_gen_addi_ptr(tcg_ctx, i, i, 8); + tcg_temp_free_ptr(tcg_ctx, tp); + + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8); + tcg_temp_free_i64(tcg_ctx, t0); tcg_gen_brcondi_ptr(tcg_ctx, TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(tcg_ctx, i); @@ -4604,29 +4597,29 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) /* Predicate register stores can be any multiple of 2. */ if (len_remain) { + t0 = tcg_temp_new_i64(tcg_ctx); tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + len_align); - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + len_align); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(tcg_ctx, addr, addr, 4); + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 4); tcg_gen_shri_i64(tcg_ctx, t0, t0, 32); - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEUW); + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUW); break; default: g_assert_not_reached(); } + tcg_temp_free_i64(tcg_ctx, t0); } - tcg_temp_free_i64(tcg_ctx, addr); - tcg_temp_free_i64(tcg_ctx, t0); } static bool trans_LDR_zri(DisasContext *s, arg_rri *a) @@ -4691,27 +4684,36 @@ static const uint8_t dtype_esz[16] = { 3, 2, 1, 3 }; -static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype) -{ - return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s)); -} - static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, - int dtype, gen_helper_gvec_mem *fn) + int dtype, uint32_t mte_n, bool is_write, + gen_helper_gvec_mem *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; TCGv_i32 t_desc; - int desc; + int desc = 0; - /* For e.g. LD4, there are not enough arguments to pass all 4 + /* + * For e.g. LD4, there are not enough arguments to pass all 4 * registers as pointers, so encode the regno into the data field. * For consistency, do this even for LD1. */ - desc = sve_memopidx(s, dtype); - desc |= zt << MEMOPIDX_SHIFT; - desc = simd_desc(vsz, vsz, desc); + if (s->mte_active[0]) { + int msz = dtype_msz(dtype); + + FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc); + FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz, desc); + FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz, desc); + desc <<= SVE_MTEDESC_SHIFT; + } else { + addr = clean_data_tbi(s, addr); + } + + desc = simd_desc(vsz, vsz, zt | desc); t_desc = tcg_const_i32(tcg_ctx, desc); t_pg = tcg_temp_new_ptr(tcg_ctx); @@ -4725,64 +4727,132 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, static void do_ld_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype, int nreg) { - static gen_helper_gvec_mem * const fns[2][16][4] = { - /* Little-endian */ - { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, - gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, - gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, - { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, - gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, - { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, - gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, - - /* Big-endian */ - { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, - gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, - gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, - { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, - gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, - { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, - gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } + static gen_helper_gvec_mem * const fns[2][2][16][4] = { + { /* mte inactive, little-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, + gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, + { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, + gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, + { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, + gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, + + /* mte inactive, big-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, + gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, + { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, + gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, + { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, + gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, + + { /* mte active, little-endian */ + { { gen_helper_sve_ld1bb_r_mte, + gen_helper_sve_ld2bb_r_mte, + gen_helper_sve_ld3bb_r_mte, + gen_helper_sve_ld4bb_r_mte }, + { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r_mte, + gen_helper_sve_ld2hh_le_r_mte, + gen_helper_sve_ld3hh_le_r_mte, + gen_helper_sve_ld4hh_le_r_mte }, + { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r_mte, + gen_helper_sve_ld2ss_le_r_mte, + gen_helper_sve_ld3ss_le_r_mte, + gen_helper_sve_ld4ss_le_r_mte }, + { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r_mte, + gen_helper_sve_ld2dd_le_r_mte, + gen_helper_sve_ld3dd_le_r_mte, + gen_helper_sve_ld4dd_le_r_mte } }, + + /* mte active, big-endian */ + { { gen_helper_sve_ld1bb_r_mte, + gen_helper_sve_ld2bb_r_mte, + gen_helper_sve_ld3bb_r_mte, + gen_helper_sve_ld4bb_r_mte }, + { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r_mte, + gen_helper_sve_ld2hh_be_r_mte, + gen_helper_sve_ld3hh_be_r_mte, + gen_helper_sve_ld4hh_be_r_mte }, + { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r_mte, + gen_helper_sve_ld2ss_be_r_mte, + gen_helper_sve_ld3ss_be_r_mte, + gen_helper_sve_ld4ss_be_r_mte }, + { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r_mte, + gen_helper_sve_ld2dd_be_r_mte, + gen_helper_sve_ld3dd_be_r_mte, + gen_helper_sve_ld4dd_be_r_mte } } }, }; - gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg]; + gen_helper_gvec_mem *fn + = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; - /* While there are holes in the table, they are not + /* + * While there are holes in the table, they are not * accessible via the instruction encoding. */ assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, dtype, fn); + do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); } static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) @@ -4819,56 +4889,98 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - static gen_helper_gvec_mem * const fns[2][16] = { - /* Little-endian */ - { gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, - - gen_helper_sve_ldff1sds_le_r, - gen_helper_sve_ldff1hh_le_r, - gen_helper_sve_ldff1hsu_le_r, - gen_helper_sve_ldff1hdu_le_r, - - gen_helper_sve_ldff1hds_le_r, - gen_helper_sve_ldff1hss_le_r, - gen_helper_sve_ldff1ss_le_r, - gen_helper_sve_ldff1sdu_le_r, - - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_le_r }, - - /* Big-endian */ - { gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, - - gen_helper_sve_ldff1sds_be_r, - gen_helper_sve_ldff1hh_be_r, - gen_helper_sve_ldff1hsu_be_r, - gen_helper_sve_ldff1hdu_be_r, - - gen_helper_sve_ldff1hds_be_r, - gen_helper_sve_ldff1hss_be_r, - gen_helper_sve_ldff1ss_be_r, - gen_helper_sve_ldff1sdu_be_r, - - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_be_r }, + static gen_helper_gvec_mem * const fns[2][2][16] = { + { /* mte inactive, little-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_le_r, + gen_helper_sve_ldff1hh_le_r, + gen_helper_sve_ldff1hsu_le_r, + gen_helper_sve_ldff1hdu_le_r, + + gen_helper_sve_ldff1hds_le_r, + gen_helper_sve_ldff1hss_le_r, + gen_helper_sve_ldff1ss_le_r, + gen_helper_sve_ldff1sdu_le_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_le_r }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_be_r, + gen_helper_sve_ldff1hh_be_r, + gen_helper_sve_ldff1hsu_be_r, + gen_helper_sve_ldff1hdu_be_r, + + gen_helper_sve_ldff1hds_be_r, + gen_helper_sve_ldff1hss_be_r, + gen_helper_sve_ldff1ss_be_r, + gen_helper_sve_ldff1sdu_be_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_be_r } }, + + { /* mte active, little-endian */ + { gen_helper_sve_ldff1bb_r_mte, + gen_helper_sve_ldff1bhu_r_mte, + gen_helper_sve_ldff1bsu_r_mte, + gen_helper_sve_ldff1bdu_r_mte, + + gen_helper_sve_ldff1sds_le_r_mte, + gen_helper_sve_ldff1hh_le_r_mte, + gen_helper_sve_ldff1hsu_le_r_mte, + gen_helper_sve_ldff1hdu_le_r_mte, + + gen_helper_sve_ldff1hds_le_r_mte, + gen_helper_sve_ldff1hss_le_r_mte, + gen_helper_sve_ldff1ss_le_r_mte, + gen_helper_sve_ldff1sdu_le_r_mte, + + gen_helper_sve_ldff1bds_r_mte, + gen_helper_sve_ldff1bss_r_mte, + gen_helper_sve_ldff1bhs_r_mte, + gen_helper_sve_ldff1dd_le_r_mte }, + + /* mte active, big-endian */ + { gen_helper_sve_ldff1bb_r_mte, + gen_helper_sve_ldff1bhu_r_mte, + gen_helper_sve_ldff1bsu_r_mte, + gen_helper_sve_ldff1bdu_r_mte, + + gen_helper_sve_ldff1sds_be_r_mte, + gen_helper_sve_ldff1hh_be_r_mte, + gen_helper_sve_ldff1hsu_be_r_mte, + gen_helper_sve_ldff1hdu_be_r_mte, + + gen_helper_sve_ldff1hds_be_r_mte, + gen_helper_sve_ldff1hss_be_r_mte, + gen_helper_sve_ldff1ss_be_r_mte, + gen_helper_sve_ldff1sdu_be_r_mte, + + gen_helper_sve_ldff1bds_r_mte, + gen_helper_sve_ldff1bss_r_mte, + gen_helper_sve_ldff1bhs_r_mte, + gen_helper_sve_ldff1dd_be_r_mte } }, }; if (sve_access_check(s)) { TCGv_i64 addr = new_tmp_a64(s); tcg_gen_shli_i64(tcg_ctx, addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, a->rn)); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, - fns[s->be_data == MO_BE][a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, + fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); } return true; } @@ -4876,48 +4988,90 @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - static gen_helper_gvec_mem * const fns[2][16] = { - /* Little-endian */ - { gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, - - gen_helper_sve_ldnf1sds_le_r, - gen_helper_sve_ldnf1hh_le_r, - gen_helper_sve_ldnf1hsu_le_r, - gen_helper_sve_ldnf1hdu_le_r, - - gen_helper_sve_ldnf1hds_le_r, - gen_helper_sve_ldnf1hss_le_r, - gen_helper_sve_ldnf1ss_le_r, - gen_helper_sve_ldnf1sdu_le_r, - - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_le_r }, - - /* Big-endian */ - { gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, - - gen_helper_sve_ldnf1sds_be_r, - gen_helper_sve_ldnf1hh_be_r, - gen_helper_sve_ldnf1hsu_be_r, - gen_helper_sve_ldnf1hdu_be_r, - - gen_helper_sve_ldnf1hds_be_r, - gen_helper_sve_ldnf1hss_be_r, - gen_helper_sve_ldnf1ss_be_r, - gen_helper_sve_ldnf1sdu_be_r, - - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_be_r }, + static gen_helper_gvec_mem * const fns[2][2][16] = { + { /* mte inactive, little-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_le_r, + gen_helper_sve_ldnf1hh_le_r, + gen_helper_sve_ldnf1hsu_le_r, + gen_helper_sve_ldnf1hdu_le_r, + + gen_helper_sve_ldnf1hds_le_r, + gen_helper_sve_ldnf1hss_le_r, + gen_helper_sve_ldnf1ss_le_r, + gen_helper_sve_ldnf1sdu_le_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_le_r }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_be_r, + gen_helper_sve_ldnf1hh_be_r, + gen_helper_sve_ldnf1hsu_be_r, + gen_helper_sve_ldnf1hdu_be_r, + + gen_helper_sve_ldnf1hds_be_r, + gen_helper_sve_ldnf1hss_be_r, + gen_helper_sve_ldnf1ss_be_r, + gen_helper_sve_ldnf1sdu_be_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_be_r } }, + + { /* mte inactive, little-endian */ + { gen_helper_sve_ldnf1bb_r_mte, + gen_helper_sve_ldnf1bhu_r_mte, + gen_helper_sve_ldnf1bsu_r_mte, + gen_helper_sve_ldnf1bdu_r_mte, + + gen_helper_sve_ldnf1sds_le_r_mte, + gen_helper_sve_ldnf1hh_le_r_mte, + gen_helper_sve_ldnf1hsu_le_r_mte, + gen_helper_sve_ldnf1hdu_le_r_mte, + + gen_helper_sve_ldnf1hds_le_r_mte, + gen_helper_sve_ldnf1hss_le_r_mte, + gen_helper_sve_ldnf1ss_le_r_mte, + gen_helper_sve_ldnf1sdu_le_r_mte, + + gen_helper_sve_ldnf1bds_r_mte, + gen_helper_sve_ldnf1bss_r_mte, + gen_helper_sve_ldnf1bhs_r_mte, + gen_helper_sve_ldnf1dd_le_r_mte }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldnf1bb_r_mte, + gen_helper_sve_ldnf1bhu_r_mte, + gen_helper_sve_ldnf1bsu_r_mte, + gen_helper_sve_ldnf1bdu_r_mte, + + gen_helper_sve_ldnf1sds_be_r_mte, + gen_helper_sve_ldnf1hh_be_r_mte, + gen_helper_sve_ldnf1hsu_be_r_mte, + gen_helper_sve_ldnf1hdu_be_r_mte, + + gen_helper_sve_ldnf1hds_be_r_mte, + gen_helper_sve_ldnf1hss_be_r_mte, + gen_helper_sve_ldnf1ss_be_r_mte, + gen_helper_sve_ldnf1sdu_be_r_mte, + + gen_helper_sve_ldnf1bds_r_mte, + gen_helper_sve_ldnf1bss_r_mte, + gen_helper_sve_ldnf1bhs_r_mte, + gen_helper_sve_ldnf1dd_be_r_mte } }, }; if (sve_access_check(s)) { @@ -4927,8 +5081,8 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) TCGv_i64 addr = new_tmp_a64(s); tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, a->rn), off); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, - fns[s->be_data == MO_BE][a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, + fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); } return true; } @@ -4948,9 +5102,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz) int desc, poff; /* Load the first quadword using the normal predicated load helpers. */ - desc = sve_memopidx(s, msz_dtype(s, msz)); - desc |= zt << MEMOPIDX_SHIFT; - desc = simd_desc(16, 16, desc); + desc = simd_desc(16, 16, zt); t_desc = tcg_const_i32(tcg_ctx, desc); poff = pred_full_reg_offset(s, pg); @@ -5026,8 +5178,14 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) unsigned psz = pred_full_reg_size(s); unsigned esz = dtype_esz[a->dtype]; unsigned msz = dtype_msz(a->dtype); - TCGLabel *over = gen_new_label(tcg_ctx); - TCGv_i64 temp; + TCGLabel *over; + TCGv_i64 temp, clean_addr; + + if (!sve_access_check(s)) { + return true; + } + + over = gen_new_label(tcg_ctx); /* If the guarding predicate has no bits set, no load occurs. */ if (psz <= 8) { @@ -5050,7 +5208,9 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) /* Load the data. */ temp = tcg_temp_new_i64(tcg_ctx); tcg_gen_addi_i64(tcg_ctx, temp, cpu_reg_sp(s, a->rn), a->imm << msz); - tcg_gen_qemu_ld_i64(tcg_ctx, temp, temp, get_mem_index(s), + clean_addr = gen_mte_check1(s, temp, false, true, msz); + + tcg_gen_qemu_ld_i64(tcg_ctx, temp, clean_addr, get_mem_index(s), s->be_data | dtype_mop[a->dtype]); /* Broadcast to *all* elements. */ @@ -5067,73 +5227,125 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) { - static gen_helper_gvec_mem * const fn_single[2][4][4] = { - { { gen_helper_sve_st1bb_r, - gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, - gen_helper_sve_st1bd_r }, - { NULL, - gen_helper_sve_st1hh_le_r, - gen_helper_sve_st1hs_le_r, - gen_helper_sve_st1hd_le_r }, - { NULL, NULL, - gen_helper_sve_st1ss_le_r, - gen_helper_sve_st1sd_le_r }, - { NULL, NULL, NULL, - gen_helper_sve_st1dd_le_r } }, - { { gen_helper_sve_st1bb_r, - gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, - gen_helper_sve_st1bd_r }, - { NULL, - gen_helper_sve_st1hh_be_r, - gen_helper_sve_st1hs_be_r, - gen_helper_sve_st1hd_be_r }, - { NULL, NULL, - gen_helper_sve_st1ss_be_r, - gen_helper_sve_st1sd_be_r }, - { NULL, NULL, NULL, - gen_helper_sve_st1dd_be_r } }, + static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { + { { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_le_r, + gen_helper_sve_st1hs_le_r, + gen_helper_sve_st1hd_le_r }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r, + gen_helper_sve_st1sd_le_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r } }, + { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_be_r, + gen_helper_sve_st1hs_be_r, + gen_helper_sve_st1hd_be_r }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r, + gen_helper_sve_st1sd_be_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r } } }, + + { { { gen_helper_sve_st1bb_r_mte, + gen_helper_sve_st1bh_r_mte, + gen_helper_sve_st1bs_r_mte, + gen_helper_sve_st1bd_r_mte }, + { NULL, + gen_helper_sve_st1hh_le_r_mte, + gen_helper_sve_st1hs_le_r_mte, + gen_helper_sve_st1hd_le_r_mte }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r_mte, + gen_helper_sve_st1sd_le_r_mte }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r_mte } }, + { { gen_helper_sve_st1bb_r_mte, + gen_helper_sve_st1bh_r_mte, + gen_helper_sve_st1bs_r_mte, + gen_helper_sve_st1bd_r_mte }, + { NULL, + gen_helper_sve_st1hh_be_r_mte, + gen_helper_sve_st1hs_be_r_mte, + gen_helper_sve_st1hd_be_r_mte }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r_mte, + gen_helper_sve_st1sd_be_r_mte }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r_mte } } }, }; - static gen_helper_gvec_mem * const fn_multiple[2][3][4] = { - { { gen_helper_sve_st2bb_r, - gen_helper_sve_st2hh_le_r, - gen_helper_sve_st2ss_le_r, - gen_helper_sve_st2dd_le_r }, - { gen_helper_sve_st3bb_r, - gen_helper_sve_st3hh_le_r, - gen_helper_sve_st3ss_le_r, - gen_helper_sve_st3dd_le_r }, - { gen_helper_sve_st4bb_r, - gen_helper_sve_st4hh_le_r, - gen_helper_sve_st4ss_le_r, - gen_helper_sve_st4dd_le_r } }, - { { gen_helper_sve_st2bb_r, - gen_helper_sve_st2hh_be_r, - gen_helper_sve_st2ss_be_r, - gen_helper_sve_st2dd_be_r }, - { gen_helper_sve_st3bb_r, - gen_helper_sve_st3hh_be_r, - gen_helper_sve_st3ss_be_r, - gen_helper_sve_st3dd_be_r }, - { gen_helper_sve_st4bb_r, - gen_helper_sve_st4hh_be_r, - gen_helper_sve_st4ss_be_r, - gen_helper_sve_st4dd_be_r } }, + static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { + { { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_le_r, + gen_helper_sve_st2ss_le_r, + gen_helper_sve_st2dd_le_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_le_r, + gen_helper_sve_st3ss_le_r, + gen_helper_sve_st3dd_le_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_le_r, + gen_helper_sve_st4ss_le_r, + gen_helper_sve_st4dd_le_r } }, + { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_be_r, + gen_helper_sve_st2ss_be_r, + gen_helper_sve_st2dd_be_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_be_r, + gen_helper_sve_st3ss_be_r, + gen_helper_sve_st3dd_be_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_be_r, + gen_helper_sve_st4ss_be_r, + gen_helper_sve_st4dd_be_r } } }, + { { { gen_helper_sve_st2bb_r_mte, + gen_helper_sve_st2hh_le_r_mte, + gen_helper_sve_st2ss_le_r_mte, + gen_helper_sve_st2dd_le_r_mte }, + { gen_helper_sve_st3bb_r_mte, + gen_helper_sve_st3hh_le_r_mte, + gen_helper_sve_st3ss_le_r_mte, + gen_helper_sve_st3dd_le_r_mte }, + { gen_helper_sve_st4bb_r_mte, + gen_helper_sve_st4hh_le_r_mte, + gen_helper_sve_st4ss_le_r_mte, + gen_helper_sve_st4dd_le_r_mte } }, + { { gen_helper_sve_st2bb_r_mte, + gen_helper_sve_st2hh_be_r_mte, + gen_helper_sve_st2ss_be_r_mte, + gen_helper_sve_st2dd_be_r_mte }, + { gen_helper_sve_st3bb_r_mte, + gen_helper_sve_st3hh_be_r_mte, + gen_helper_sve_st3ss_be_r_mte, + gen_helper_sve_st3dd_be_r_mte }, + { gen_helper_sve_st4bb_r_mte, + gen_helper_sve_st4hh_be_r_mte, + gen_helper_sve_st4ss_be_r_mte, + gen_helper_sve_st4dd_be_r_mte } } }, }; gen_helper_gvec_mem *fn; int be = s->be_data == MO_BE; if (nreg == 0) { /* ST1 */ - fn = fn_single[be][msz][esz]; + fn = fn_single[s->mte_active[0]][be][msz][esz]; + nreg = 1; } else { /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ assert(msz == esz); - fn = fn_multiple[be][nreg - 1][msz]; + fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) @@ -5174,7 +5386,7 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) */ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, - int scale, TCGv_i64 scalar, int msz, + int scale, TCGv_i64 scalar, int msz, bool is_write, gen_helper_gvec_mem_scatter *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -5183,11 +5395,17 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, TCGv_ptr t_pg = tcg_temp_new_ptr(tcg_ctx); TCGv_ptr t_zt = tcg_temp_new_ptr(tcg_ctx); TCGv_i32 t_desc; - int desc; + int desc = 0; - desc = sve_memopidx(s, msz_dtype(s, msz)); - desc |= scale << MEMOPIDX_SHIFT; - desc = simd_desc(vsz, vsz, desc); + if (s->mte_active[0]) { + FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc); + FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz, desc); + desc <<= SVE_MTEDESC_SHIFT; + } + desc = simd_desc(vsz, vsz, desc | scale); t_desc = tcg_const_i32(tcg_ctx, desc); tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, pg)); @@ -5201,176 +5419,339 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, tcg_temp_free_i32(tcg_ctx, t_desc); } -/* Indexed by [be][ff][xs][u][msz]. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = { - /* Little-endian */ - { { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_le_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_le_zsu, - gen_helper_sve_ldss_le_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_le_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_le_zss, - gen_helper_sve_ldss_le_zss, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_le_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_le_zsu, - gen_helper_sve_ldffss_le_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_le_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_le_zss, - gen_helper_sve_ldffss_le_zss, } } } }, - - /* Big-endian */ - { { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_be_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_be_zsu, - gen_helper_sve_ldss_be_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_be_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_be_zss, - gen_helper_sve_ldss_be_zss, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_be_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_be_zsu, - gen_helper_sve_ldffss_be_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_be_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_be_zss, - gen_helper_sve_ldffss_be_zss, } } } }, +/* Indexed by [mte][be][ff][xs][u][msz]. */ +static gen_helper_gvec_mem_scatter * const +gather_load_fn32[2][2][2][2][2][3] = { + { /* MTE Inactive */ + { /* Little-endian */ + { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_le_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_le_zsu, + gen_helper_sve_ldss_le_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_le_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_le_zss, + gen_helper_sve_ldss_le_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_le_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_le_zsu, + gen_helper_sve_ldffss_le_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_le_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_le_zss, + gen_helper_sve_ldffss_le_zss, } } } }, + + { /* Big-endian */ + { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_be_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_be_zsu, + gen_helper_sve_ldss_be_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_be_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_be_zss, + gen_helper_sve_ldss_be_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_be_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_be_zsu, + gen_helper_sve_ldffss_be_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_be_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_be_zss, + gen_helper_sve_ldffss_be_zss, } } } } }, + { /* MTE Active */ + { /* Little-endian */ + { { { gen_helper_sve_ldbss_zsu_mte, + gen_helper_sve_ldhss_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldbsu_zsu_mte, + gen_helper_sve_ldhsu_le_zsu_mte, + gen_helper_sve_ldss_le_zsu_mte, } }, + { { gen_helper_sve_ldbss_zss_mte, + gen_helper_sve_ldhss_le_zss_mte, + NULL, }, + { gen_helper_sve_ldbsu_zss_mte, + gen_helper_sve_ldhsu_le_zss_mte, + gen_helper_sve_ldss_le_zss_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu_mte, + gen_helper_sve_ldffhss_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zsu_mte, + gen_helper_sve_ldffhsu_le_zsu_mte, + gen_helper_sve_ldffss_le_zsu_mte, } }, + { { gen_helper_sve_ldffbss_zss_mte, + gen_helper_sve_ldffhss_le_zss_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zss_mte, + gen_helper_sve_ldffhsu_le_zss_mte, + gen_helper_sve_ldffss_le_zss_mte, } } } }, + + { /* Big-endian */ + { { { gen_helper_sve_ldbss_zsu_mte, + gen_helper_sve_ldhss_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldbsu_zsu_mte, + gen_helper_sve_ldhsu_be_zsu_mte, + gen_helper_sve_ldss_be_zsu_mte, } }, + { { gen_helper_sve_ldbss_zss_mte, + gen_helper_sve_ldhss_be_zss_mte, + NULL, }, + { gen_helper_sve_ldbsu_zss_mte, + gen_helper_sve_ldhsu_be_zss_mte, + gen_helper_sve_ldss_be_zss_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu_mte, + gen_helper_sve_ldffhss_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zsu_mte, + gen_helper_sve_ldffhsu_be_zsu_mte, + gen_helper_sve_ldffss_be_zsu_mte, } }, + { { gen_helper_sve_ldffbss_zss_mte, + gen_helper_sve_ldffhss_be_zss_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zss_mte, + gen_helper_sve_ldffhsu_be_zss_mte, + gen_helper_sve_ldffss_be_zss_mte, } } } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = { - /* Little-endian */ - { { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_le_zsu, - gen_helper_sve_ldsds_le_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_le_zsu, - gen_helper_sve_ldsdu_le_zsu, - gen_helper_sve_lddd_le_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_le_zss, - gen_helper_sve_ldsds_le_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_le_zss, - gen_helper_sve_ldsdu_le_zss, - gen_helper_sve_lddd_le_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_le_zd, - gen_helper_sve_ldsds_le_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_le_zd, - gen_helper_sve_ldsdu_le_zd, - gen_helper_sve_lddd_le_zd, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_le_zsu, - gen_helper_sve_ldffsds_le_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_le_zsu, - gen_helper_sve_ldffsdu_le_zsu, - gen_helper_sve_ldffdd_le_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_le_zss, - gen_helper_sve_ldffsds_le_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_le_zss, - gen_helper_sve_ldffsdu_le_zss, - gen_helper_sve_ldffdd_le_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_le_zd, - gen_helper_sve_ldffsds_le_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_le_zd, - gen_helper_sve_ldffsdu_le_zd, - gen_helper_sve_ldffdd_le_zd, } } } }, - - /* Big-endian */ - { { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_be_zsu, - gen_helper_sve_ldsds_be_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_be_zsu, - gen_helper_sve_ldsdu_be_zsu, - gen_helper_sve_lddd_be_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_be_zss, - gen_helper_sve_ldsds_be_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_be_zss, - gen_helper_sve_ldsdu_be_zss, - gen_helper_sve_lddd_be_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_be_zd, - gen_helper_sve_ldsds_be_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_be_zd, - gen_helper_sve_ldsdu_be_zd, - gen_helper_sve_lddd_be_zd, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_be_zsu, - gen_helper_sve_ldffsds_be_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_be_zsu, - gen_helper_sve_ldffsdu_be_zsu, - gen_helper_sve_ldffdd_be_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_be_zss, - gen_helper_sve_ldffsds_be_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_be_zss, - gen_helper_sve_ldffsdu_be_zss, - gen_helper_sve_ldffdd_be_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_be_zd, - gen_helper_sve_ldffsds_be_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_be_zd, - gen_helper_sve_ldffsdu_be_zd, - gen_helper_sve_ldffdd_be_zd, } } } }, +static gen_helper_gvec_mem_scatter * const +gather_load_fn64[2][2][2][3][2][4] = { + { /* MTE Inactive */ + { /* Little-endian */ + { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_le_zsu, + gen_helper_sve_ldsds_le_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_le_zsu, + gen_helper_sve_ldsdu_le_zsu, + gen_helper_sve_lddd_le_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_le_zss, + gen_helper_sve_ldsds_le_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_le_zss, + gen_helper_sve_ldsdu_le_zss, + gen_helper_sve_lddd_le_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_le_zd, + gen_helper_sve_ldsds_le_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_le_zd, + gen_helper_sve_ldsdu_le_zd, + gen_helper_sve_lddd_le_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_le_zsu, + gen_helper_sve_ldffsds_le_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_le_zsu, + gen_helper_sve_ldffsdu_le_zsu, + gen_helper_sve_ldffdd_le_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_le_zss, + gen_helper_sve_ldffsds_le_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_le_zss, + gen_helper_sve_ldffsdu_le_zss, + gen_helper_sve_ldffdd_le_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_le_zd, + gen_helper_sve_ldffsds_le_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_le_zd, + gen_helper_sve_ldffsdu_le_zd, + gen_helper_sve_ldffdd_le_zd, } } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_be_zsu, + gen_helper_sve_ldsds_be_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_be_zsu, + gen_helper_sve_ldsdu_be_zsu, + gen_helper_sve_lddd_be_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_be_zss, + gen_helper_sve_ldsds_be_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_be_zss, + gen_helper_sve_ldsdu_be_zss, + gen_helper_sve_lddd_be_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_be_zd, + gen_helper_sve_ldsds_be_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_be_zd, + gen_helper_sve_ldsdu_be_zd, + gen_helper_sve_lddd_be_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_be_zsu, + gen_helper_sve_ldffsds_be_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_be_zsu, + gen_helper_sve_ldffsdu_be_zsu, + gen_helper_sve_ldffdd_be_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_be_zss, + gen_helper_sve_ldffsds_be_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_be_zss, + gen_helper_sve_ldffsdu_be_zss, + gen_helper_sve_ldffdd_be_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_be_zd, + gen_helper_sve_ldffsds_be_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_be_zd, + gen_helper_sve_ldffsdu_be_zd, + gen_helper_sve_ldffdd_be_zd, } } } } }, + { /* MTE Active */ + { /* Little-endian */ + { { { gen_helper_sve_ldbds_zsu_mte, + gen_helper_sve_ldhds_le_zsu_mte, + gen_helper_sve_ldsds_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldbdu_zsu_mte, + gen_helper_sve_ldhdu_le_zsu_mte, + gen_helper_sve_ldsdu_le_zsu_mte, + gen_helper_sve_lddd_le_zsu_mte, } }, + { { gen_helper_sve_ldbds_zss_mte, + gen_helper_sve_ldhds_le_zss_mte, + gen_helper_sve_ldsds_le_zss_mte, + NULL, }, + { gen_helper_sve_ldbdu_zss_mte, + gen_helper_sve_ldhdu_le_zss_mte, + gen_helper_sve_ldsdu_le_zss_mte, + gen_helper_sve_lddd_le_zss_mte, } }, + { { gen_helper_sve_ldbds_zd_mte, + gen_helper_sve_ldhds_le_zd_mte, + gen_helper_sve_ldsds_le_zd_mte, + NULL, }, + { gen_helper_sve_ldbdu_zd_mte, + gen_helper_sve_ldhdu_le_zd_mte, + gen_helper_sve_ldsdu_le_zd_mte, + gen_helper_sve_lddd_le_zd_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu_mte, + gen_helper_sve_ldffhds_le_zsu_mte, + gen_helper_sve_ldffsds_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zsu_mte, + gen_helper_sve_ldffhdu_le_zsu_mte, + gen_helper_sve_ldffsdu_le_zsu_mte, + gen_helper_sve_ldffdd_le_zsu_mte, } }, + { { gen_helper_sve_ldffbds_zss_mte, + gen_helper_sve_ldffhds_le_zss_mte, + gen_helper_sve_ldffsds_le_zss_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zss_mte, + gen_helper_sve_ldffhdu_le_zss_mte, + gen_helper_sve_ldffsdu_le_zss_mte, + gen_helper_sve_ldffdd_le_zss_mte, } }, + { { gen_helper_sve_ldffbds_zd_mte, + gen_helper_sve_ldffhds_le_zd_mte, + gen_helper_sve_ldffsds_le_zd_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zd_mte, + gen_helper_sve_ldffhdu_le_zd_mte, + gen_helper_sve_ldffsdu_le_zd_mte, + gen_helper_sve_ldffdd_le_zd_mte, } } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbds_zsu_mte, + gen_helper_sve_ldhds_be_zsu_mte, + gen_helper_sve_ldsds_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldbdu_zsu_mte, + gen_helper_sve_ldhdu_be_zsu_mte, + gen_helper_sve_ldsdu_be_zsu_mte, + gen_helper_sve_lddd_be_zsu_mte, } }, + { { gen_helper_sve_ldbds_zss_mte, + gen_helper_sve_ldhds_be_zss_mte, + gen_helper_sve_ldsds_be_zss_mte, + NULL, }, + { gen_helper_sve_ldbdu_zss_mte, + gen_helper_sve_ldhdu_be_zss_mte, + gen_helper_sve_ldsdu_be_zss_mte, + gen_helper_sve_lddd_be_zss_mte, } }, + { { gen_helper_sve_ldbds_zd_mte, + gen_helper_sve_ldhds_be_zd_mte, + gen_helper_sve_ldsds_be_zd_mte, + NULL, }, + { gen_helper_sve_ldbdu_zd_mte, + gen_helper_sve_ldhdu_be_zd_mte, + gen_helper_sve_ldsdu_be_zd_mte, + gen_helper_sve_lddd_be_zd_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu_mte, + gen_helper_sve_ldffhds_be_zsu_mte, + gen_helper_sve_ldffsds_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zsu_mte, + gen_helper_sve_ldffhdu_be_zsu_mte, + gen_helper_sve_ldffsdu_be_zsu_mte, + gen_helper_sve_ldffdd_be_zsu_mte, } }, + { { gen_helper_sve_ldffbds_zss_mte, + gen_helper_sve_ldffhds_be_zss_mte, + gen_helper_sve_ldffsds_be_zss_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zss_mte, + gen_helper_sve_ldffhdu_be_zss_mte, + gen_helper_sve_ldffsdu_be_zss_mte, + gen_helper_sve_ldffdd_be_zss_mte, } }, + { { gen_helper_sve_ldffbds_zd_mte, + gen_helper_sve_ldffhds_be_zd_mte, + gen_helper_sve_ldffsds_be_zd_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zd_mte, + gen_helper_sve_ldffhdu_be_zd_mte, + gen_helper_sve_ldffsdu_be_zd_mte, + gen_helper_sve_ldffdd_be_zd_mte, } } } } }, }; static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; if (!sve_access_check(s)) { return true; @@ -5378,16 +5759,16 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) switch (a->esz) { case MO_32: - fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; break; } assert(fn != NULL); do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), a->msz, fn); + cpu_reg_sp(s, a->rn), a->msz, false, fn); return true; } @@ -5395,7 +5776,8 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; TCGv_i64 imm; if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { @@ -5407,10 +5789,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) switch (a->esz) { case MO_32: - fn = gather_load_fn32[be][a->ff][0][a->u][a->msz]; + fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[be][a->ff][2][a->u][a->msz]; + fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; break; } assert(fn != NULL); @@ -5419,63 +5801,108 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(tcg_ctx, a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn); tcg_temp_free_i64(tcg_ctx, imm); return true; } -/* Indexed by [be][xs][msz]. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = { - /* Little-endian */ - { { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_le_zsu, - gen_helper_sve_stss_le_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_le_zss, - gen_helper_sve_stss_le_zss, } }, - /* Big-endian */ - { { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_be_zsu, - gen_helper_sve_stss_be_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_be_zss, - gen_helper_sve_stss_be_zss, } }, +/* Indexed by [mte][be][xs][msz]. */ +static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_le_zsu, + gen_helper_sve_stss_le_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_le_zss, + gen_helper_sve_stss_le_zss, } }, + { /* Big-endian */ + { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_be_zsu, + gen_helper_sve_stss_be_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_be_zss, + gen_helper_sve_stss_be_zss, } } }, + { /* MTE Active */ + { /* Little-endian */ + { gen_helper_sve_stbs_zsu_mte, + gen_helper_sve_sths_le_zsu_mte, + gen_helper_sve_stss_le_zsu_mte, }, + { gen_helper_sve_stbs_zss_mte, + gen_helper_sve_sths_le_zss_mte, + gen_helper_sve_stss_le_zss_mte, } }, + { /* Big-endian */ + { gen_helper_sve_stbs_zsu_mte, + gen_helper_sve_sths_be_zsu_mte, + gen_helper_sve_stss_be_zsu_mte, }, + { gen_helper_sve_stbs_zss_mte, + gen_helper_sve_sths_be_zss_mte, + gen_helper_sve_stss_be_zss_mte, } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = { - /* Little-endian */ - { { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_le_zsu, - gen_helper_sve_stsd_le_zsu, - gen_helper_sve_stdd_le_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_le_zss, - gen_helper_sve_stsd_le_zss, - gen_helper_sve_stdd_le_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_le_zd, - gen_helper_sve_stsd_le_zd, - gen_helper_sve_stdd_le_zd, } }, - /* Big-endian */ - { { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_be_zsu, - gen_helper_sve_stsd_be_zsu, - gen_helper_sve_stdd_be_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_be_zss, - gen_helper_sve_stsd_be_zss, - gen_helper_sve_stdd_be_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_be_zd, - gen_helper_sve_stsd_be_zd, - gen_helper_sve_stdd_be_zd, } }, +static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_le_zsu, + gen_helper_sve_stsd_le_zsu, + gen_helper_sve_stdd_le_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_le_zss, + gen_helper_sve_stsd_le_zss, + gen_helper_sve_stdd_le_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_le_zd, + gen_helper_sve_stsd_le_zd, + gen_helper_sve_stdd_le_zd, } }, + { /* Big-endian */ + { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_be_zsu, + gen_helper_sve_stsd_be_zsu, + gen_helper_sve_stdd_be_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_be_zss, + gen_helper_sve_stsd_be_zss, + gen_helper_sve_stdd_be_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_be_zd, + gen_helper_sve_stsd_be_zd, + gen_helper_sve_stdd_be_zd, } } }, + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbd_zsu_mte, + gen_helper_sve_sthd_le_zsu_mte, + gen_helper_sve_stsd_le_zsu_mte, + gen_helper_sve_stdd_le_zsu_mte, }, + { gen_helper_sve_stbd_zss_mte, + gen_helper_sve_sthd_le_zss_mte, + gen_helper_sve_stsd_le_zss_mte, + gen_helper_sve_stdd_le_zss_mte, }, + { gen_helper_sve_stbd_zd_mte, + gen_helper_sve_sthd_le_zd_mte, + gen_helper_sve_stsd_le_zd_mte, + gen_helper_sve_stdd_le_zd_mte, } }, + { /* Big-endian */ + { gen_helper_sve_stbd_zsu_mte, + gen_helper_sve_sthd_be_zsu_mte, + gen_helper_sve_stsd_be_zsu_mte, + gen_helper_sve_stdd_be_zsu_mte, }, + { gen_helper_sve_stbd_zss_mte, + gen_helper_sve_sthd_be_zss_mte, + gen_helper_sve_stsd_be_zss_mte, + gen_helper_sve_stdd_be_zss_mte, }, + { gen_helper_sve_stbd_zd_mte, + gen_helper_sve_sthd_be_zd_mte, + gen_helper_sve_stsd_be_zd_mte, + gen_helper_sve_stdd_be_zd_mte, } } }, }; static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; if (a->esz < a->msz || (a->msz == 0 && a->scale)) { return false; @@ -5485,16 +5912,16 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) } switch (a->esz) { case MO_32: - fn = scatter_store_fn32[be][a->xs][a->msz]; + fn = scatter_store_fn32[mte][be][a->xs][a->msz]; break; case MO_64: - fn = scatter_store_fn64[be][a->xs][a->msz]; + fn = scatter_store_fn64[mte][be][a->xs][a->msz]; break; default: g_assert_not_reached(); } do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), a->msz, fn); + cpu_reg_sp(s, a->rn), a->msz, true, fn); return true; } @@ -5502,7 +5929,8 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; TCGv_i64 imm; if (a->esz < a->msz) { @@ -5514,10 +5942,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) switch (a->esz) { case MO_32: - fn = scatter_store_fn32[be][0][a->msz]; + fn = scatter_store_fn32[mte][be][0][a->msz]; break; case MO_64: - fn = scatter_store_fn64[be][2][a->msz]; + fn = scatter_store_fn64[mte][be][2][a->msz]; break; } assert(fn != NULL); @@ -5526,7 +5954,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(tcg_ctx, a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn); tcg_temp_free_i64(tcg_ctx, imm); return true; } diff --git a/qemu/target/arm/translate-vfp.inc.c b/qemu/target/arm/translate-vfp.inc.c index 4773efb9b2..e0fd1dfda9 100644 --- a/qemu/target/arm/translate-vfp.inc.c +++ b/qemu/target/arm/translate-vfp.inc.c @@ -122,15 +122,14 @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled) if (s->v7m_lspact) { /* * Lazy state saving affects external memory and also the NVIC, - * so we must mark it as an IO operation for icount. + * so we must mark it as an IO operation for icount (and cause + * this to be the last insn in the TB). */ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { + s->base.is_jmp = DISAS_UPDATE_EXIT; gen_io_start(tcg_ctx); } gen_helper_v7m_preserve_fp_state(tcg_ctx, tcg_ctx->cpu_env); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(tcg_ctx); - } /* * If the preserve_fp_state helper doesn't throw an exception * then it will clear LSPACT; we don't need to repeat this for @@ -1911,12 +1910,6 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) return false; } - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - if (!vfp_access_check(s)) { return true; } @@ -2930,6 +2923,6 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) tcg_temp_free_i32(tcg_ctx, fptr); /* End the TB, because we have updated FP control bits */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; return true; } diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 744d8ff709..489db79713 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -368,47 +368,10 @@ static void gen_revsh(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var) tcg_gen_ext16s_i32(tcg_ctx, dest, var); } -/* 32x32->64 multiply. Marks inputs as dead. */ -static TCGv_i64 gen_mulu_i64_i32(TCGContext *tcg_ctx, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx); - TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx); - TCGv_i64 ret; - - tcg_gen_mulu2_i32(tcg_ctx, lo, hi, a, b); - tcg_temp_free_i32(tcg_ctx, a); - tcg_temp_free_i32(tcg_ctx, b); - - ret = tcg_temp_new_i64(tcg_ctx); - tcg_gen_concat_i32_i64(tcg_ctx, ret, lo, hi); - tcg_temp_free_i32(tcg_ctx, lo); - tcg_temp_free_i32(tcg_ctx, hi); - - return ret; -} - -static TCGv_i64 gen_muls_i64_i32(TCGContext *tcg_ctx, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx); - TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx); - TCGv_i64 ret; - - tcg_gen_muls2_i32(tcg_ctx, lo, hi, a, b); - tcg_temp_free_i32(tcg_ctx, a); - tcg_temp_free_i32(tcg_ctx, b); - - ret = tcg_temp_new_i64(tcg_ctx); - tcg_gen_concat_i32_i64(tcg_ctx, ret, lo, hi); - tcg_temp_free_i32(tcg_ctx, lo); - tcg_temp_free_i32(tcg_ctx, hi); - - return ret; -} - /* Swap low and high halfwords. */ -static void gen_swap_half(TCGContext *tcg_ctx, TCGv_i32 var) +static void gen_swap_half(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var) { - tcg_gen_rotri_i32(tcg_ctx, var, var, 16); + tcg_gen_rotri_i32(tcg_ctx, dest, var, 16); } /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead. @@ -1197,25 +1160,6 @@ neon_reg_offset (int reg, int n) return vfp_reg_offset(0, sreg); } -/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, - * where 0 is the least significant end of the register. - */ -static inline long -neon_element_offset(int reg, int element, MemOp size) -{ - int element_size = 1 << size; - int ofs = element * element_size; -#ifdef HOST_WORDS_BIGENDIAN - /* Calculate the offset assuming fully little-endian, - * then XOR to account for the order of the 8-byte units. - */ - if (element_size < 8) { - ofs ^= 8 - element_size; - } -#endif - return neon_reg_offset(reg, 0) + ofs; -} - static TCGv_i32 neon_load_reg(TCGContext *tcg_ctx, int reg, int pass) { TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); @@ -1223,98 +1167,12 @@ static TCGv_i32 neon_load_reg(TCGContext *tcg_ctx, int reg, int pass) return tmp; } -static void neon_load_element(TCGContext *tcg_ctx, TCGv_i32 var, int reg, int ele, MemOp mop) -{ - long offset = neon_element_offset(reg, ele, mop & MO_SIZE); - - switch (mop) { - case MO_UB: - tcg_gen_ld8u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_UW: - tcg_gen_ld16u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_UL: - tcg_gen_ld_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - default: - g_assert_not_reached(); - break; - } -} - -static void neon_load_element64(TCGContext *tcg_ctx, TCGv_i64 var, int reg, int ele, MemOp mop) -{ - long offset = neon_element_offset(reg, ele, mop & MO_SIZE); - - switch (mop) { - case MO_UB: - tcg_gen_ld8u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_UW: - tcg_gen_ld16u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_UL: - tcg_gen_ld32u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_Q: - tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - default: - g_assert_not_reached(); - break; - } -} - static void neon_store_reg(TCGContext *tcg_ctx, int reg, int pass, TCGv_i32 var) { tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, neon_reg_offset(reg, pass)); tcg_temp_free_i32(tcg_ctx, var); } -static void neon_store_element(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i32 var) -{ - long offset = neon_element_offset(reg, ele, size); - - switch (size) { - case MO_8: - tcg_gen_st8_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_16: - tcg_gen_st16_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_32: - tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - default: - g_assert_not_reached(); - break; - } -} - -static void neon_store_element64(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i64 var) -{ - long offset = neon_element_offset(reg, ele, size); - - switch (size) { - case MO_8: - tcg_gen_st8_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_16: - tcg_gen_st16_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_32: - tcg_gen_st32_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_64: - tcg_gen_st_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - default: - g_assert_not_reached(); - break; - } -} - static inline void neon_load_reg64(TCGContext *tcg_ctx, TCGv_i64 var, int reg) { tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, vfp_reg_offset(1, reg)); @@ -1344,8 +1202,9 @@ static TCGv_ptr vfp_reg_ptr(TCGContext *tcg_ctx, bool dp, int reg) #define ARM_CP_RW_BIT (1 << 20) -/* Include the VFP decoder */ +/* Include the VFP and Neon decoder */ #include "translate-vfp.inc.c" +#include "translate-neon.inc.c" static inline void iwmmxt_load_reg(TCGContext *tcg_ctx, TCGv_i64 var, int reg) { @@ -2660,8 +2519,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) ((VFP_REG_SHR_POS(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1)) #else #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n)) -#define VFP_SREG(insn, bigbit, smallbit) \ - ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1)) #endif #define VFP_DREG(reg, insn, bigbit, smallbit) do { \ if (dc_isar_feature(aa32_simd_r32, s)) { \ @@ -2674,39 +2531,15 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) }} while (0) #ifdef _MSC_VER -#define VFP_SREG_D(insn) VFP_SREG_POS(insn, 12, 22) #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22) -#define VFP_SREG_N(insn) VFP_SREG_POS(insn, 16, 7) #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) -#define VFP_SREG_M(insn) VFP_SREG_NEG(insn, 0, 5) #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) #else -#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22) #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22) -#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7) #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) -#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5) #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) #endif -static void gen_neon_dup_low16(TCGContext *tcg_ctx, TCGv_i32 var) -{ - TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ext16u_i32(tcg_ctx, var, var); - tcg_gen_shli_i32(tcg_ctx, tmp, var, 16); - tcg_gen_or_i32(tcg_ctx, var, var, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); -} - -static void gen_neon_dup_high16(TCGContext *tcg_ctx, TCGv_i32 var) -{ - TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_andi_i32(tcg_ctx, var, var, 0xffff0000); - tcg_gen_shri_i32(tcg_ctx, tmp, var, 16); - tcg_gen_or_i32(tcg_ctx, var, var, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); -} - static inline bool use_goto_tb(DisasContext *s, target_ulong dest) { struct uc_struct *uc = s->uc; @@ -3015,7 +2848,7 @@ static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn) tcg_temp_free_i32(tcg_ctx, tcg_tgtmode); tcg_temp_free_i32(tcg_ctx, tcg_regno); tcg_temp_free_i32(tcg_ctx, tcg_reg); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) @@ -3038,7 +2871,7 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) tcg_temp_free_i32(tcg_ctx, tcg_tgtmode); tcg_temp_free_i32(tcg_ctx, tcg_regno); store_reg(s, rn, tcg_reg); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } /* Store value to PC as for an exception return (ie don't @@ -3077,1047 +2910,663 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc) gen_rfe(s, pc, load_cpu_field(tcg_ctx, spsr)); } -#define CPU_V001 tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1 +static void gen_gvec_fn3_qc(TCGContext *tcg_ctx, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, + uint32_t max_sz, gen_helper_gvec_3_ptr *fn) +{ + TCGv_ptr qc_ptr = tcg_temp_new_ptr(tcg_ctx); + + tcg_gen_addi_ptr(tcg_ctx, qc_ptr, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_gvec_3_ptr(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, qc_ptr, + opr_sz, max_sz, 0, fn); + tcg_temp_free_ptr(tcg_ctx, qc_ptr); +} -static inline void gen_neon_add(TCGContext *tcg_ctx, int size, TCGv_i32 t0, TCGv_i32 t1) +void gen_gvec_sqrdmlah_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { - switch (size) { - case 0: gen_helper_neon_add_u8(tcg_ctx, t0, t0, t1); break; - case 1: gen_helper_neon_add_u16(tcg_ctx, t0, t0, t1); break; - case 2: tcg_gen_add_i32(tcg_ctx, t0, t0, t1); break; - default: abort(); - } + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); } -static inline void gen_neon_rsb(TCGContext *tcg_ctx, int size, TCGv_i32 t0, TCGv_i32 t1) +void gen_gvec_sqrdmlsh_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { - switch (size) { - case 0: gen_helper_neon_sub_u8(tcg_ctx, t0, t1, t0); break; - case 1: gen_helper_neon_sub_u16(tcg_ctx, t0, t1, t0); break; - case 2: tcg_gen_sub_i32(tcg_ctx, t0, t1, t0); break; - default: return; - } + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +#define GEN_CMP0(NAME, COND) \ + static void gen_##NAME##0_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a) \ + { \ + tcg_gen_setcondi_i32(tcg_ctx, COND, d, a, 0); \ + tcg_gen_neg_i32(tcg_ctx, d, d); \ + } \ + static void gen_##NAME##0_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a) \ + { \ + tcg_gen_setcondi_i64(tcg_ctx, COND, d, a, 0); \ + tcg_gen_neg_i64(tcg_ctx, d, d); \ + } \ + static void gen_##NAME##0_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a) \ + { \ + TCGv_vec zero = tcg_const_zeros_vec_matching(tcg_ctx, d); \ + tcg_gen_cmp_vec(tcg_ctx, COND, vece, d, a, zero); \ + tcg_temp_free_vec(tcg_ctx, zero); \ + } \ + void gen_gvec_##NAME##0(TCGContext *tcg_ctx, unsigned vece, uint32_t d, uint32_t m, \ + uint32_t opr_sz, uint32_t max_sz) \ + { \ + const GVecGen2 op[4] = { \ + { .fno = gen_helper_gvec_##NAME##0_b, \ + .fniv = gen_##NAME##0_vec, \ + .opt_opc = vecop_list_cmp, \ + .vece = MO_8 }, \ + { .fno = gen_helper_gvec_##NAME##0_h, \ + .fniv = gen_##NAME##0_vec, \ + .opt_opc = vecop_list_cmp, \ + .vece = MO_16 }, \ + { .fni4 = gen_##NAME##0_i32, \ + .fniv = gen_##NAME##0_vec, \ + .opt_opc = vecop_list_cmp, \ + .vece = MO_32 }, \ + { .fni8 = gen_##NAME##0_i64, \ + .fniv = gen_##NAME##0_vec, \ + .opt_opc = vecop_list_cmp, \ + .prefer_i64 = TCG_TARGET_REG_BITS == 64, \ + .vece = MO_64 }, \ + }; \ + tcg_gen_gvec_2(tcg_ctx, d, m, opr_sz, max_sz, &op[vece]); \ + } + +static const TCGOpcode vecop_list_cmp[] = { + INDEX_op_cmp_vec, 0 +}; + +GEN_CMP0(ceq, TCG_COND_EQ) +GEN_CMP0(cle, TCG_COND_LE) +GEN_CMP0(cge, TCG_COND_GE) +GEN_CMP0(clt, TCG_COND_LT) +GEN_CMP0(cgt, TCG_COND_GT) + +#undef GEN_CMP0 + +static void gen_ssra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(tcg_ctx, a, a, shift); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, a); } -/* 32-bit pairwise ops end up the same as the elementwise versions. */ -#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 -#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 -#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 -#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 +static void gen_ssra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(tcg_ctx, a, a, shift); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, a); +} -#define GEN_NEON_INTEGER_OP_ENV(name) do { \ - switch ((size << 1) | u) { \ - case 0: \ - gen_helper_neon_##name##_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 1: \ - gen_helper_neon_##name##_u8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 2: \ - gen_helper_neon_##name##_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 3: \ - gen_helper_neon_##name##_u16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 4: \ - gen_helper_neon_##name##_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 5: \ - gen_helper_neon_##name##_u32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - default: return 1; \ - }} while (0) +static void gen_ssra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(tcg_ctx, a, a, shift); + tcg_gen_add_i32(tcg_ctx, d, d, a); +} -#define GEN_NEON_INTEGER_OP(name) do { \ - switch ((size << 1) | u) { \ - case 0: \ - gen_helper_neon_##name##_s8(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 1: \ - gen_helper_neon_##name##_u8(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 2: \ - gen_helper_neon_##name##_s16(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 3: \ - gen_helper_neon_##name##_u16(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 4: \ - gen_helper_neon_##name##_s32(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 5: \ - gen_helper_neon_##name##_u32(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - default: return 1; \ - }} while (0) +static void gen_ssra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(tcg_ctx, a, a, shift); + tcg_gen_add_i64(tcg_ctx, d, d, a); +} -static TCGv_i32 neon_load_scratch(TCGContext *tcg_ctx, int scratch) +static void gen_ssra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ld_i32(tcg_ctx, tmp, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); - return tmp; + tcg_gen_sari_vec(tcg_ctx, vece, a, a, sh); + tcg_gen_add_vec(tcg_ctx, vece, d, d, a); } -static void neon_store_scratch(TCGContext *tcg_ctx, int scratch, TCGv_i32 var) +void gen_gvec_ssra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) { - tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); - tcg_temp_free_i32(tcg_ctx, var); + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_b, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. + */ + shift = MIN(shift, (8 << vece) - 1); + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } -static inline TCGv_i32 neon_get_scalar(TCGContext *tcg_ctx, int size, int reg) +static void gen_usra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) { - TCGv_i32 tmp; - if (size == 1) { - tmp = neon_load_reg(tcg_ctx, reg & 7, reg >> 4); - if (reg & 8) { - gen_neon_dup_high16(tcg_ctx, tmp); - } else { - gen_neon_dup_low16(tcg_ctx, tmp); - } - } else { - tmp = neon_load_reg(tcg_ctx, reg & 15, reg >> 4); - } - return tmp; + tcg_gen_vec_shr8i_i64(tcg_ctx, a, a, shift); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, a); } -static int gen_neon_unzip(TCGContext *tcg_ctx, int rd, int rm, int size, int q) +static void gen_usra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) { - TCGv_ptr pd, pm; - - if (!q && size == 2) { - return 1; - } - pd = vfp_reg_ptr(tcg_ctx, true, rd); - pm = vfp_reg_ptr(tcg_ctx, true, rm); - if (q) { - switch (size) { - case 0: - gen_helper_neon_qunzip8(tcg_ctx, pd, pm); - break; - case 1: - gen_helper_neon_qunzip16(tcg_ctx, pd, pm); - break; - case 2: - gen_helper_neon_qunzip32(tcg_ctx, pd, pm); - break; - default: - abort(); - } - } else { - switch (size) { - case 0: - gen_helper_neon_unzip8(tcg_ctx, pd, pm); - break; - case 1: - gen_helper_neon_unzip16(tcg_ctx, pd, pm); - break; - default: - abort(); - } - } - tcg_temp_free_ptr(tcg_ctx, pd); - tcg_temp_free_ptr(tcg_ctx, pm); - return 0; + tcg_gen_vec_shr16i_i64(tcg_ctx, a, a, shift); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, a); } -static int gen_neon_zip(TCGContext *tcg_ctx, int rd, int rm, int size, int q) +static void gen_usra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift) { - TCGv_ptr pd, pm; + tcg_gen_shri_i32(tcg_ctx, a, a, shift); + tcg_gen_add_i32(tcg_ctx, d, d, a); +} - if (!q && size == 2) { - return 1; - } - pd = vfp_reg_ptr(tcg_ctx, true, rd); - pm = vfp_reg_ptr(tcg_ctx, true, rm); - if (q) { - switch (size) { - case 0: - gen_helper_neon_qzip8(tcg_ctx, pd, pm); - break; - case 1: - gen_helper_neon_qzip16(tcg_ctx, pd, pm); - break; - case 2: - gen_helper_neon_qzip32(tcg_ctx, pd, pm); - break; - default: - abort(); - } +static void gen_usra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(tcg_ctx, a, a, shift); + tcg_gen_add_i64(tcg_ctx, d, d, a); +} + +static void gen_usra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(tcg_ctx, vece, a, a, sh); + tcg_gen_add_vec(tcg_ctx, vece, d, d, a); +} + +void gen_gvec_usra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64, }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Unsigned results in all zeros as input to accumulate: nop. + */ + if (shift < (8 << vece)) { + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } else { - switch (size) { - case 0: - gen_helper_neon_zip8(tcg_ctx, pd, pm); - break; - case 1: - gen_helper_neon_zip16(tcg_ctx, pd, pm); - break; - default: - abort(); - } + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz); } - tcg_temp_free_ptr(tcg_ctx, pd); - tcg_temp_free_ptr(tcg_ctx, pm); - return 0; } -static void gen_neon_trn_u8(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +/* + * Shift one less than the requested amount, and the low bit is + * the rounding bit. For the 8 and 16-bit operations, because we + * mask the low bit, we can perform a normal integer shift instead + * of a vector shift. + */ +static void gen_srshr8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - TCGv_i32 rd, tmp; + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - rd = tcg_temp_new_i32(tcg_ctx); - tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1); + tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_8, 1)); + tcg_gen_vec_sar8i_i64(tcg_ctx, d, a, sh); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - tcg_gen_shli_i32(tcg_ctx, rd, t0, 8); - tcg_gen_andi_i32(tcg_ctx, rd, rd, 0xff00ff00); - tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0x00ff00ff); - tcg_gen_or_i32(tcg_ctx, rd, rd, tmp); +static void gen_srshr16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - tcg_gen_shri_i32(tcg_ctx, t1, t1, 8); - tcg_gen_andi_i32(tcg_ctx, t1, t1, 0x00ff00ff); - tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xff00ff00); - tcg_gen_or_i32(tcg_ctx, t1, t1, tmp); - tcg_gen_mov_i32(tcg_ctx, t0, rd); + tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1); + tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_16, 1)); + tcg_gen_vec_sar16i_i64(tcg_ctx, d, a, sh); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, rd); +static void gen_srshr32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_extract_i32(tcg_ctx, t, a, sh - 1, 1); + tcg_gen_sari_i32(tcg_ctx, d, a, sh); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); } -static void gen_neon_trn_u16(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +static void gen_srshr64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - TCGv_i32 rd, tmp; + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - rd = tcg_temp_new_i32(tcg_ctx); - tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_extract_i64(tcg_ctx, t, a, sh - 1, 1); + tcg_gen_sari_i64(tcg_ctx, d, a, sh); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - tcg_gen_shli_i32(tcg_ctx, rd, t0, 16); - tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0xffff); - tcg_gen_or_i32(tcg_ctx, rd, rd, tmp); - tcg_gen_shri_i32(tcg_ctx, t1, t1, 16); - tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xffff0000); - tcg_gen_or_i32(tcg_ctx, t1, t1, tmp); - tcg_gen_mov_i32(tcg_ctx, t0, rd); +static void gen_srshr_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + TCGv_vec ones = tcg_temp_new_vec_matching(tcg_ctx, d); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, rd); -} - - -static struct { - int nregs; - int interleave; - int spacing; -} const neon_ls_element_type[11] = { - {1, 4, 1}, - {1, 4, 2}, - {4, 1, 1}, - {2, 2, 2}, - {1, 3, 1}, - {1, 3, 2}, - {3, 1, 1}, - {1, 1, 1}, - {1, 2, 1}, - {1, 2, 2}, - {2, 1, 1} -}; + tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh - 1); + tcg_gen_dupi_vec(tcg_ctx, vece, ones, 1); + tcg_gen_and_vec(tcg_ctx, vece, t, t, ones); + tcg_gen_sari_vec(tcg_ctx, vece, d, a, sh); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_vec(tcg_ctx, ones); +} -/* Translate a NEON load/store element instruction. Return nonzero if the - instruction is invalid. */ -static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) +void gen_gvec_srshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - int rd, rn, rm; - int op; - int nregs; - int interleave; - int spacing; - int stride; - int size; - int reg; - int load; - int n; - int vec_size; - int mmu_idx; - MemOp endian; - TCGv_i32 addr; - TCGv_i32 tmp; - TCGv_i32 tmp2; - TCGv_i64 tmp64; + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_srshr8_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_srshr16_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_srshr32_i32, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_srshr64_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; - /* FIXME: this access check should not take precedence over UNDEF - * for invalid encodings; we will generate incorrect syndrome information - * for attempts to execute invalid vfp/neon encodings with FP disabled. - */ - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); - if (!s->vfp_enabled) - return 1; - VFP_DREG_D(rd, insn); - rn = (insn >> 16) & 0xf; - rm = insn & 0xf; - load = (insn & (1 << 21)) != 0; - endian = s->be_data; - mmu_idx = get_mem_index(s); - if ((insn & (1 << 23)) == 0) { - /* Load store all elements. */ - op = (insn >> 8) & 0xf; - size = (insn >> 6) & 3; - if (op > 10) - return 1; - /* Catch UNDEF cases for bad values of align field */ - switch (op & 0xc) { - case 4: - if (((insn >> 5) & 1) == 1) { - return 1; - } - break; - case 8: - if (((insn >> 4) & 3) == 3) { - return 1; - } - break; - default: - break; - } - nregs = neon_ls_element_type[op].nregs; - interleave = neon_ls_element_type[op].interleave; - spacing = neon_ls_element_type[op].spacing; - if (size == 3 && (interleave | spacing) != 1) { - return 1; - } - /* For our purposes, bytes are always little-endian. */ - if (size == 0) { - endian = MO_LE; - } - /* Consecutive little-endian elements from a single register - * can be promoted to a larger little-endian operation. + if (shift == (8 << vece)) { + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. With rounding, this produces + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. + * I.e. always zero. */ - if (interleave == 1 && endian == MO_LE) { - size = 3; - } - tmp64 = tcg_temp_new_i64(tcg_ctx); - addr = tcg_temp_new_i32(tcg_ctx); - tmp2 = tcg_const_i32(tcg_ctx, 1 << size); - load_reg_var(s, addr, rn); - for (reg = 0; reg < nregs; reg++) { - for (n = 0; n < 8 >> size; n++) { - int xs; - for (xs = 0; xs < interleave; xs++) { - int tt = rd + reg + spacing * xs; - - if (load) { - gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); - neon_store_element64(tcg_ctx, tt, n, size, tmp64); - } else { - neon_load_element64(tcg_ctx, tmp64, tt, n, size); - gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); - } - tcg_gen_add_i32(tcg_ctx, addr, addr, tmp2); - } - } - } - tcg_temp_free_i32(tcg_ctx, addr); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_i64(tcg_ctx, tmp64); - stride = nregs * interleave * 8; + tcg_gen_gvec_dup_imm(tcg_ctx, vece, rd_ofs, opr_sz, max_sz, 0); } else { - size = (insn >> 10) & 3; - if (size == 3) { - /* Load single element to all lanes. */ - int a = (insn >> 4) & 1; - if (!load) { - return 1; - } - size = (insn >> 6) & 3; - nregs = ((insn >> 8) & 3) + 1; - - if (size == 3) { - if (nregs != 4 || a == 0) { - return 1; - } - /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */ - size = 2; - } - if (nregs == 1 && a == 1 && size == 0) { - return 1; - } - if (nregs == 3 && a == 1) { - return 1; - } - addr = tcg_temp_new_i32(tcg_ctx); - load_reg_var(s, addr, rn); - - /* VLD1 to all lanes: bit 5 indicates how many Dregs to write. - * VLD2/3/4 to all lanes: bit 5 indicates register stride. - */ - stride = (insn & (1 << 5)) ? 2 : 1; - vec_size = nregs == 1 ? stride * 8 : 8; - - tmp = tcg_temp_new_i32(tcg_ctx); - for (reg = 0; reg < nregs; reg++) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - if ((rd & 1) && vec_size == 16) { - /* We cannot write 16 bytes at once because the - * destination is unaligned. - */ - tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(rd, 0), - 8, 8, tmp); - tcg_gen_gvec_mov(tcg_ctx, 0, neon_reg_offset(rd + 1, 0), - neon_reg_offset(rd, 0), 8, 8); - } else { - tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(rd, 0), - vec_size, vec_size, tmp); - } - tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size); - rd += stride; - } - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, addr); - stride = (1 << size) * nregs; - } else { - /* Single element. */ - int idx = (insn >> 4) & 0xf; - int reg_idx; - switch (size) { - case 0: - reg_idx = (insn >> 5) & 7; - stride = 1; - break; - case 1: - reg_idx = (insn >> 6) & 3; - stride = (insn & (1 << 5)) ? 2 : 1; - break; - case 2: - reg_idx = (insn >> 7) & 1; - stride = (insn & (1 << 6)) ? 2 : 1; - break; - default: - abort(); - } - nregs = ((insn >> 8) & 3) + 1; - /* Catch the UNDEF cases. This is unavoidably a bit messy. */ - switch (nregs) { - case 1: - if (((idx & (1 << size)) != 0) || - (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) { - return 1; - } - break; - case 3: - if ((idx & 1) != 0) { - return 1; - } - /* fall through */ - case 2: - if (size == 2 && (idx & 2) != 0) { - return 1; - } - break; - case 4: - if ((size == 2) && ((idx & 3) == 3)) { - return 1; - } - break; - default: - abort(); - } - if ((rd + stride * (nregs - 1)) > 31) { - /* Attempts to write off the end of the register file - * are UNPREDICTABLE; we choose to UNDEF because otherwise - * the neon_load_reg() would write off the end of the array. - */ - return 1; - } - tmp = tcg_temp_new_i32(tcg_ctx); - addr = tcg_temp_new_i32(tcg_ctx); - load_reg_var(s, addr, rn); - for (reg = 0; reg < nregs; reg++) { - if (load) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - neon_store_element(tcg_ctx, rd, reg_idx, size, tmp); - } else { /* Store */ - neon_load_element(tcg_ctx, tmp, rd, reg_idx, size); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - } - rd += stride; - tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size); - } - tcg_temp_free_i32(tcg_ctx, addr); - tcg_temp_free_i32(tcg_ctx, tmp); - stride = nregs * (1 << size); - } + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } - if (rm != 15) { - TCGv_i32 base; - - base = load_reg(s, rn); - if (rm == 13) { - tcg_gen_addi_i32(tcg_ctx, base, base, stride); - } else { - TCGv_i32 index; - index = load_reg(s, rm); - tcg_gen_add_i32(tcg_ctx, base, base, index); - tcg_temp_free_i32(tcg_ctx, index); - } - store_reg(s, rn, base); - } - return 0; } -static inline void gen_neon_narrow(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src) +static void gen_srsra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_narrow_u8(tcg_ctx, dest, src); break; - case 1: gen_helper_neon_narrow_u16(tcg_ctx, dest, src); break; - case 2: tcg_gen_extrl_i64_i32(tcg_ctx, dest, src); break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + gen_srshr8_i64(tcg_ctx, t, a, sh); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_narrow_sats(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src) +static void gen_srsra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_narrow_sat_s8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 1: gen_helper_neon_narrow_sat_s16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 2: gen_helper_neon_narrow_sat_s32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + gen_srshr16_i64(tcg_ctx, t, a, sh); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_narrow_satu(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src) +static void gen_srsra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh) { - switch (size) { - case 0: gen_helper_neon_narrow_sat_u8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 1: gen_helper_neon_narrow_sat_u16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 2: gen_helper_neon_narrow_sat_u32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - default: abort(); - } + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + gen_srshr32_i32(tcg_ctx, t, a, sh); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); } -static inline void gen_neon_unarrow_sats(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src) +static void gen_srsra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_unarrow_sat8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 1: gen_helper_neon_unarrow_sat16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 2: gen_helper_neon_unarrow_sat32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + gen_srshr64_i64(tcg_ctx, t, a, sh); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_shift_narrow(TCGContext *tcg_ctx, int size, TCGv_i32 var, TCGv_i32 shift, - int q, int u) +static void gen_srsra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - if (q) { - if (u) { - switch (size) { - case 1: gen_helper_neon_rshl_u16(tcg_ctx, var, var, shift); break; - case 2: gen_helper_neon_rshl_u32(tcg_ctx, var, var, shift); break; - default: abort(); - } - } else { - switch (size) { - case 1: gen_helper_neon_rshl_s16(tcg_ctx, var, var, shift); break; - case 2: gen_helper_neon_rshl_s32(tcg_ctx, var, var, shift); break; - default: abort(); - } - } - } else { - if (u) { - switch (size) { - case 1: gen_helper_neon_shl_u16(tcg_ctx, var, var, shift); break; - case 2: gen_ushl_i32(tcg_ctx, var, var, shift); break; - default: abort(); - } - } else { - switch (size) { - case 1: gen_helper_neon_shl_s16(tcg_ctx, var, var, shift); break; - case 2: gen_sshl_i32(tcg_ctx, var, var, shift); break; - default: abort(); - } - } - } + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + + gen_srshr_vec(tcg_ctx, vece, t, a, sh); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); } -static inline void gen_neon_widen(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_i32 src, int size, int u) +void gen_gvec_srsra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) { - if (u) { - switch (size) { - case 0: gen_helper_neon_widen_u8(tcg_ctx, dest, src); break; - case 1: gen_helper_neon_widen_u16(tcg_ctx, dest, src); break; - case 2: tcg_gen_extu_i32_i64(tcg_ctx, dest, src); break; - default: abort(); - } + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_srsra8_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fni8 = gen_srsra16_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_srsra32_i32, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_srsra64_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. With rounding, this produces + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. + * I.e. always zero. With accumulation, this leaves D unchanged. + */ + if (shift == (8 << vece)) { + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz); } else { - switch (size) { - case 0: gen_helper_neon_widen_s8(tcg_ctx, dest, src); break; - case 1: gen_helper_neon_widen_s16(tcg_ctx, dest, src); break; - case 2: tcg_gen_ext_i32_i64(tcg_ctx, dest, src); break; - default: abort(); - } + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } - tcg_temp_free_i32(tcg_ctx, src); } -static inline void gen_neon_addl(TCGContext *tcg_ctx, int size) +static void gen_urshr8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_addl_u16(tcg_ctx, CPU_V001); break; - case 1: gen_helper_neon_addl_u32(tcg_ctx, CPU_V001); break; - case 2: tcg_gen_add_i64(tcg_ctx, CPU_V001); break; - default: abort(); - } -} + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); -static inline void gen_neon_subl(TCGContext *tcg_ctx, int size) -{ - switch (size) { - case 0: gen_helper_neon_subl_u16(tcg_ctx, CPU_V001); break; - case 1: gen_helper_neon_subl_u32(tcg_ctx, CPU_V001); break; - case 2: tcg_gen_sub_i64(tcg_ctx, CPU_V001); break; - default: abort(); - } + tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1); + tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_8, 1)); + tcg_gen_vec_shr8i_i64(tcg_ctx, d, a, sh); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_negl(TCGContext *tcg_ctx, TCGv_i64 var, int size) +static void gen_urshr16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_negl_u16(tcg_ctx, var, var); break; - case 1: gen_helper_neon_negl_u32(tcg_ctx, var, var); break; - case 2: - tcg_gen_neg_i64(tcg_ctx, var, var); - break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1); + tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_16, 1)); + tcg_gen_vec_shr16i_i64(tcg_ctx, d, a, sh); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_addl_saturate(TCGContext *tcg_ctx, TCGv_i64 op0, TCGv_i64 op1, int size) +static void gen_urshr32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh) { - switch (size) { - case 1: gen_helper_neon_addl_saturate_s32(tcg_ctx, op0, tcg_ctx->cpu_env, op0, op1); break; - case 2: gen_helper_neon_addl_saturate_s64(tcg_ctx, op0, tcg_ctx->cpu_env, op0, op1); break; - default: abort(); - } + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_extract_i32(tcg_ctx, t, a, sh - 1, 1); + tcg_gen_shri_i32(tcg_ctx, d, a, sh); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); } -static inline void gen_neon_mull(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b, - int size, int u) +static void gen_urshr64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - TCGv_i64 tmp; - - switch ((size << 1) | u) { - case 0: gen_helper_neon_mull_s8(tcg_ctx, dest, a, b); break; - case 1: gen_helper_neon_mull_u8(tcg_ctx, dest, a, b); break; - case 2: gen_helper_neon_mull_s16(tcg_ctx, dest, a, b); break; - case 3: gen_helper_neon_mull_u16(tcg_ctx, dest, a, b); break; - case 4: - tmp = gen_muls_i64_i32(tcg_ctx, a, b); - tcg_gen_mov_i64(tcg_ctx, dest, tmp); - tcg_temp_free_i64(tcg_ctx, tmp); - break; - case 5: - tmp = gen_mulu_i64_i32(tcg_ctx, a, b); - tcg_gen_mov_i64(tcg_ctx, dest, tmp); - tcg_temp_free_i64(tcg_ctx, tmp); - break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - /* gen_helper_neon_mull_[su]{8|16} do not free their parameters. - Don't forget to clean them now. */ - if (size < 2) { - tcg_temp_free_i32(tcg_ctx, a); - tcg_temp_free_i32(tcg_ctx, b); - } + tcg_gen_extract_i64(tcg_ctx, t, a, sh - 1, 1); + tcg_gen_shri_i64(tcg_ctx, d, a, sh); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static void gen_neon_narrow_op(TCGContext *tcg_ctx, int op, int u, int size, - TCGv_i32 dest, TCGv_i64 src) +static void gen_urshr_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) { - if (op) { - if (u) { - gen_neon_unarrow_sats(tcg_ctx, size, dest, src); - } else { - gen_neon_narrow(tcg_ctx, size, dest, src); - } - } else { - if (u) { - gen_neon_narrow_satu(tcg_ctx, size, dest, src); - } else { - gen_neon_narrow_sats(tcg_ctx, size, dest, src); - } - } -} + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + TCGv_vec ones = tcg_temp_new_vec_matching(tcg_ctx, d); -/* Symbolic constants for op fields for Neon 3-register same-length. - * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B - * table A7-9. - */ -#define NEON_3R_VHADD 0 -#define NEON_3R_VQADD 1 -#define NEON_3R_VRHADD 2 -#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */ -#define NEON_3R_VHSUB 4 -#define NEON_3R_VQSUB 5 -#define NEON_3R_VCGT 6 -#define NEON_3R_VCGE 7 -#define NEON_3R_VSHL 8 -#define NEON_3R_VQSHL 9 -#define NEON_3R_VRSHL 10 -#define NEON_3R_VQRSHL 11 -#define NEON_3R_VMAX 12 -#define NEON_3R_VMIN 13 -#define NEON_3R_VABD 14 -#define NEON_3R_VABA 15 -#define NEON_3R_VADD_VSUB 16 -#define NEON_3R_VTST_VCEQ 17 -#define NEON_3R_VML 18 /* VMLA, VMLS */ -#define NEON_3R_VMUL 19 -#define NEON_3R_VPMAX 20 -#define NEON_3R_VPMIN 21 -#define NEON_3R_VQDMULH_VQRDMULH 22 -#define NEON_3R_VPADD_VQRDMLAH 23 -#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */ -#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */ -#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */ -#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */ -#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */ -#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */ -#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */ -#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */ - -static const uint8_t neon_3r_sizes[] = { - [NEON_3R_VHADD] = 0x7, - [NEON_3R_VQADD] = 0xf, - [NEON_3R_VRHADD] = 0x7, - [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */ - [NEON_3R_VHSUB] = 0x7, - [NEON_3R_VQSUB] = 0xf, - [NEON_3R_VCGT] = 0x7, - [NEON_3R_VCGE] = 0x7, - [NEON_3R_VSHL] = 0xf, - [NEON_3R_VQSHL] = 0xf, - [NEON_3R_VRSHL] = 0xf, - [NEON_3R_VQRSHL] = 0xf, - [NEON_3R_VMAX] = 0x7, - [NEON_3R_VMIN] = 0x7, - [NEON_3R_VABD] = 0x7, - [NEON_3R_VABA] = 0x7, - [NEON_3R_VADD_VSUB] = 0xf, - [NEON_3R_VTST_VCEQ] = 0x7, - [NEON_3R_VML] = 0x7, - [NEON_3R_VMUL] = 0x7, - [NEON_3R_VPMAX] = 0x7, - [NEON_3R_VPMIN] = 0x7, - [NEON_3R_VQDMULH_VQRDMULH] = 0x6, - [NEON_3R_VPADD_VQRDMLAH] = 0x7, - [NEON_3R_SHA] = 0xf, /* size field encodes op type */ - [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */ - [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */ -}; + tcg_gen_shri_vec(tcg_ctx, vece, t, a, shift - 1); + tcg_gen_dupi_vec(tcg_ctx, vece, ones, 1); + tcg_gen_and_vec(tcg_ctx, vece, t, t, ones); + tcg_gen_shri_vec(tcg_ctx, vece, d, a, shift); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); -/* Symbolic constants for op fields for Neon 2-register miscellaneous. - * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B - * table A7-13. - */ -#define NEON_2RM_VREV64 0 -#define NEON_2RM_VREV32 1 -#define NEON_2RM_VREV16 2 -#define NEON_2RM_VPADDL 4 -#define NEON_2RM_VPADDL_U 5 -#define NEON_2RM_AESE 6 /* Includes AESD */ -#define NEON_2RM_AESMC 7 /* Includes AESIMC */ -#define NEON_2RM_VCLS 8 -#define NEON_2RM_VCLZ 9 -#define NEON_2RM_VCNT 10 -#define NEON_2RM_VMVN 11 -#define NEON_2RM_VPADAL 12 -#define NEON_2RM_VPADAL_U 13 -#define NEON_2RM_VQABS 14 -#define NEON_2RM_VQNEG 15 -#define NEON_2RM_VCGT0 16 -#define NEON_2RM_VCGE0 17 -#define NEON_2RM_VCEQ0 18 -#define NEON_2RM_VCLE0 19 -#define NEON_2RM_VCLT0 20 -#define NEON_2RM_SHA1H 21 -#define NEON_2RM_VABS 22 -#define NEON_2RM_VNEG 23 -#define NEON_2RM_VCGT0_F 24 -#define NEON_2RM_VCGE0_F 25 -#define NEON_2RM_VCEQ0_F 26 -#define NEON_2RM_VCLE0_F 27 -#define NEON_2RM_VCLT0_F 28 -#define NEON_2RM_VABS_F 30 -#define NEON_2RM_VNEG_F 31 -#define NEON_2RM_VSWP 32 -#define NEON_2RM_VTRN 33 -#define NEON_2RM_VUZP 34 -#define NEON_2RM_VZIP 35 -#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ -#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ -#define NEON_2RM_VSHLL 38 -#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */ -#define NEON_2RM_VRINTN 40 -#define NEON_2RM_VRINTX 41 -#define NEON_2RM_VRINTA 42 -#define NEON_2RM_VRINTZ 43 -#define NEON_2RM_VCVT_F16_F32 44 -#define NEON_2RM_VRINTM 45 -#define NEON_2RM_VCVT_F32_F16 46 -#define NEON_2RM_VRINTP 47 -#define NEON_2RM_VCVTAU 48 -#define NEON_2RM_VCVTAS 49 -#define NEON_2RM_VCVTNU 50 -#define NEON_2RM_VCVTNS 51 -#define NEON_2RM_VCVTPU 52 -#define NEON_2RM_VCVTPS 53 -#define NEON_2RM_VCVTMU 54 -#define NEON_2RM_VCVTMS 55 -#define NEON_2RM_VRECPE 56 -#define NEON_2RM_VRSQRTE 57 -#define NEON_2RM_VRECPE_F 58 -#define NEON_2RM_VRSQRTE_F 59 -#define NEON_2RM_VCVT_FS 60 -#define NEON_2RM_VCVT_FU 61 -#define NEON_2RM_VCVT_SF 62 -#define NEON_2RM_VCVT_UF 63 - -static bool neon_2rm_is_v8_op(int op) -{ - /* Return true if this neon 2reg-misc op is ARMv8 and up */ - switch (op) { - case NEON_2RM_VRINTN: - case NEON_2RM_VRINTA: - case NEON_2RM_VRINTM: - case NEON_2RM_VRINTP: - case NEON_2RM_VRINTZ: - case NEON_2RM_VRINTX: - case NEON_2RM_VCVTAU: - case NEON_2RM_VCVTAS: - case NEON_2RM_VCVTNU: - case NEON_2RM_VCVTNS: - case NEON_2RM_VCVTPU: - case NEON_2RM_VCVTPS: - case NEON_2RM_VCVTMU: - case NEON_2RM_VCVTMS: - return true; - default: - return false; - } + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_vec(tcg_ctx, ones); } -/* Each entry in this array has bit n set if the insn allows - * size value n (otherwise it will UNDEF). Since unallocated - * op values will have no bits set they always UNDEF. - */ -static const uint8_t neon_2rm_sizes[] = { - [NEON_2RM_VREV64] = 0x7, - [NEON_2RM_VREV32] = 0x3, - [NEON_2RM_VREV16] = 0x1, - [NEON_2RM_VPADDL] = 0x7, - [NEON_2RM_VPADDL_U] = 0x7, - [NEON_2RM_AESE] = 0x1, - [NEON_2RM_AESMC] = 0x1, - [NEON_2RM_VCLS] = 0x7, - [NEON_2RM_VCLZ] = 0x7, - [NEON_2RM_VCNT] = 0x1, - [NEON_2RM_VMVN] = 0x1, - [NEON_2RM_VPADAL] = 0x7, - [NEON_2RM_VPADAL_U] = 0x7, - [NEON_2RM_VQABS] = 0x7, - [NEON_2RM_VQNEG] = 0x7, - [NEON_2RM_VCGT0] = 0x7, - [NEON_2RM_VCGE0] = 0x7, - [NEON_2RM_VCEQ0] = 0x7, - [NEON_2RM_VCLE0] = 0x7, - [NEON_2RM_VCLT0] = 0x7, - [NEON_2RM_SHA1H] = 0x4, - [NEON_2RM_VABS] = 0x7, - [NEON_2RM_VNEG] = 0x7, - [NEON_2RM_VCGT0_F] = 0x4, - [NEON_2RM_VCGE0_F] = 0x4, - [NEON_2RM_VCEQ0_F] = 0x4, - [NEON_2RM_VCLE0_F] = 0x4, - [NEON_2RM_VCLT0_F] = 0x4, - [NEON_2RM_VABS_F] = 0x4, - [NEON_2RM_VNEG_F] = 0x4, - [NEON_2RM_VSWP] = 0x1, - [NEON_2RM_VTRN] = 0x7, - [NEON_2RM_VUZP] = 0x7, - [NEON_2RM_VZIP] = 0x7, - [NEON_2RM_VMOVN] = 0x7, - [NEON_2RM_VQMOVN] = 0x7, - [NEON_2RM_VSHLL] = 0x7, - [NEON_2RM_SHA1SU1] = 0x4, - [NEON_2RM_VRINTN] = 0x4, - [NEON_2RM_VRINTX] = 0x4, - [NEON_2RM_VRINTA] = 0x4, - [NEON_2RM_VRINTZ] = 0x4, - [NEON_2RM_VCVT_F16_F32] = 0x2, - [NEON_2RM_VRINTM] = 0x4, - [NEON_2RM_VCVT_F32_F16] = 0x2, - [NEON_2RM_VRINTP] = 0x4, - [NEON_2RM_VCVTAU] = 0x4, - [NEON_2RM_VCVTAS] = 0x4, - [NEON_2RM_VCVTNU] = 0x4, - [NEON_2RM_VCVTNS] = 0x4, - [NEON_2RM_VCVTPU] = 0x4, - [NEON_2RM_VCVTPS] = 0x4, - [NEON_2RM_VCVTMU] = 0x4, - [NEON_2RM_VCVTMS] = 0x4, - [NEON_2RM_VRECPE] = 0x4, - [NEON_2RM_VRSQRTE] = 0x4, - [NEON_2RM_VRECPE_F] = 0x4, - [NEON_2RM_VRSQRTE_F] = 0x4, - [NEON_2RM_VCVT_FS] = 0x4, - [NEON_2RM_VCVT_FU] = 0x4, - [NEON_2RM_VCVT_SF] = 0x4, - [NEON_2RM_VCVT_UF] = 0x4, -}; +void gen_gvec_urshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_urshr8_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_urshr16_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_urshr32_i32, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_urshr64_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); -/* Expand v8.1 simd helper. */ -static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, - int q, int rd, int rn, int rm) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - if (dc_isar_feature(aa32_rdm, s)) { - int opr_sz = (1 + q) * 8; - tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), - vfp_reg_offset(1, rn), - vfp_reg_offset(1, rm), tcg_ctx->cpu_env, - opr_sz, opr_sz, 0, fn); - return 0; + if (shift == (8 << vece)) { + /* + * Shifts larger than the element size are architecturally valid. + * Unsigned results in zero. With rounding, this produces a + * copy of the most significant bit. + */ + tcg_gen_gvec_shri(tcg_ctx, vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); + } else { + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } - return 1; } -static void gen_ssra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +static void gen_ursra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - tcg_gen_vec_sar8i_i64(tcg_ctx, a, a, shift); - tcg_gen_vec_add8_i64(tcg_ctx, d, d, a); -} + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); -static void gen_ssra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(tcg_ctx, a, a, shift); - tcg_gen_vec_add16_i64(tcg_ctx, d, d, a); + if (sh == 8) { + tcg_gen_vec_shr8i_i64(tcg_ctx, t, a, 7); + } else { + gen_urshr8_i64(tcg_ctx, t, a, sh); + } + tcg_gen_vec_add8_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static void gen_ssra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift) +static void gen_ursra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - tcg_gen_sari_i32(tcg_ctx, a, a, shift); - tcg_gen_add_i32(tcg_ctx, d, d, a); -} + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); -static void gen_ssra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(tcg_ctx, a, a, shift); - tcg_gen_add_i64(tcg_ctx, d, d, a); + if (sh == 16) { + tcg_gen_vec_shr16i_i64(tcg_ctx, t, a, 15); + } else { + gen_urshr16_i64(tcg_ctx, t, a, sh); + } + tcg_gen_vec_add16_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static void gen_ssra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +static void gen_ursra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh) { - tcg_gen_sari_vec(tcg_ctx, vece, a, a, sh); - tcg_gen_add_vec(tcg_ctx, vece, d, d, a); -} - -static const TCGOpcode vecop_list_ssra[] = { - INDEX_op_sari_vec, INDEX_op_add_vec, 0 -}; - -const GVecGen2i ssra_op[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opt_opc = vecop_list_ssra, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opt_opc = vecop_list_ssra, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .load_dest = true, - .opt_opc = vecop_list_ssra, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list_ssra, - .load_dest = true, - .vece = MO_64 }, -}; + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); -static void gen_usra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(tcg_ctx, a, a, shift); - tcg_gen_vec_add8_i64(tcg_ctx, d, d, a); + if (sh == 32) { + tcg_gen_shri_i32(tcg_ctx, t, a, 31); + } else { + gen_urshr32_i32(tcg_ctx, t, a, sh); + } + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); } -static void gen_usra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +static void gen_ursra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - tcg_gen_vec_shr16i_i64(tcg_ctx, a, a, shift); - tcg_gen_vec_add16_i64(tcg_ctx, d, d, a); -} + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); -static void gen_usra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(tcg_ctx, a, a, shift); - tcg_gen_add_i32(tcg_ctx, d, d, a); + if (sh == 64) { + tcg_gen_shri_i64(tcg_ctx, t, a, 63); + } else { + gen_urshr64_i64(tcg_ctx, t, a, sh); + } + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static void gen_usra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +static void gen_ursra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - tcg_gen_shri_i64(tcg_ctx, a, a, shift); - tcg_gen_add_i64(tcg_ctx, d, d, a); + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + + if (sh == (8 << vece)) { + tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh - 1); + } else { + gen_urshr_vec(tcg_ctx, vece, t, a, sh); + } + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); } -static void gen_usra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +void gen_gvec_ursra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) { - tcg_gen_shri_vec(tcg_ctx, vece, a, a, sh); - tcg_gen_add_vec(tcg_ctx, vece, d, d, a); -} + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_ursra8_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fni8 = gen_ursra16_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_ursra32_i32, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_ursra64_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; -static const TCGOpcode vecop_list_usra[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 -}; + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); -const GVecGen2i usra_op[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opt_opc = vecop_list_usra, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opt_opc = vecop_list_usra, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .load_dest = true, - .opt_opc = vecop_list_usra, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_usra, - .vece = MO_64, }, -}; + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); +} static void gen_shr8_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) { @@ -4157,47 +3606,62 @@ static void gen_shr64_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64 static void gen_shr_ins_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - if (sh == 0) { - tcg_gen_mov_vec(tcg_ctx, d, a); + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d); + + tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); + tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh); + tcg_gen_and_vec(tcg_ctx, vece, d, d, m); + tcg_gen_or_vec(tcg_ctx, vece, d, d, t); + + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_vec(tcg_ctx, m); +} + +void gen_gvec_sri(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; + const GVecGen2i ops[4] = { + { .fni8 = gen_shr8_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shr16_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shr32_ins_i32, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_shr64_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* Shift of esize leaves destination unchanged. */ + if (shift < (8 << vece)) { + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } else { - TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); - TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d); - - tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); - tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh); - tcg_gen_and_vec(tcg_ctx, vece, d, d, m); - tcg_gen_or_vec(tcg_ctx, vece, d, d, t); - - tcg_temp_free_vec(tcg_ctx, t); - tcg_temp_free_vec(tcg_ctx, m); - } -} - -static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 }; - -const GVecGen2i sri_op[4] = { - { .fni8 = gen_shr8_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sri, - .vece = MO_8 }, - { .fni8 = gen_shr16_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sri, - .vece = MO_16 }, - { .fni4 = gen_shr32_ins_i32, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sri, - .vece = MO_32 }, - { .fni8 = gen_shr64_ins_i64, - .fniv = gen_shr_ins_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_sri, - .vece = MO_64 }, -}; + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz); + } +} static void gen_shl8_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) { @@ -4235,47 +3699,60 @@ static void gen_shl64_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64 static void gen_shl_ins_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - if (sh == 0) { - tcg_gen_mov_vec(tcg_ctx, d, a); + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d); + + tcg_gen_shli_vec(tcg_ctx, vece, t, a, sh); + tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK(0, sh)); + tcg_gen_and_vec(tcg_ctx, vece, d, d, m); + tcg_gen_or_vec(tcg_ctx, vece, d, d, t); + + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_vec(tcg_ctx, m); +} + +void gen_gvec_sli(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; + const GVecGen2i ops[4] = { + { .fni8 = gen_shl8_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shl16_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shl32_ins_i32, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_shl64_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [0..esize-1]. */ + tcg_debug_assert(shift >= 0); + tcg_debug_assert(shift < (8 << vece)); + + if (shift == 0) { + tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rm_ofs, opr_sz, max_sz); } else { - TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); - TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d); - - tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK(0, sh)); - tcg_gen_shli_vec(tcg_ctx, vece, t, a, sh); - tcg_gen_and_vec(tcg_ctx, vece, d, d, m); - tcg_gen_or_vec(tcg_ctx, vece, d, d, t); - - tcg_temp_free_vec(tcg_ctx, t); - tcg_temp_free_vec(tcg_ctx, m); - } -} - -static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 }; - -const GVecGen2i sli_op[4] = { - { .fni8 = gen_shl8_ins_i64, - .fniv = gen_shl_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sli, - .vece = MO_8 }, - { .fni8 = gen_shl16_ins_i64, - .fniv = gen_shl_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sli, - .vece = MO_16 }, - { .fni4 = gen_shl32_ins_i32, - .fniv = gen_shl_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sli, - .vece = MO_32 }, - { .fni8 = gen_shl64_ins_i64, - .fniv = gen_shl_ins_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_sli, - .vece = MO_64 }, -}; + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} static void gen_mla8_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { @@ -4340,62 +3817,69 @@ static void gen_mls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, * these tables are shared with AArch64 which does support them. */ +void gen_gvec_mla(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} -static const TCGOpcode vecop_list_mla[] = { - INDEX_op_mul_vec, INDEX_op_add_vec, 0 -}; - -static const TCGOpcode vecop_list_mls[] = { - INDEX_op_mul_vec, INDEX_op_sub_vec, 0 -}; - -const GVecGen3 mla_op[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list_mla, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list_mla, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list_mla, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_mla, - .vece = MO_64 }, -}; - -const GVecGen3 mls_op[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list_mls, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list_mls, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list_mls, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_mls, - .vece = MO_64 }, -}; +void gen_gvec_mls(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} /* CMTST : test is "if (X & Y != 0)". */ static void gen_cmtst_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) @@ -4419,27 +3903,31 @@ static void gen_cmtst_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_v tcg_gen_cmp_vec(tcg_ctx, TCG_COND_NE, vece, d, d, a); } -static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 }; - -const GVecGen3 cmtst_op[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list_cmtst, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list_cmtst, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list_cmtst, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list_cmtst, - .vece = MO_64 }, -}; +void gen_gvec_cmtst(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} void gen_ushl_i32(TCGContext *tcg_ctx, TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) { @@ -4557,29 +4045,33 @@ static void gen_ushl_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec dst, tcg_temp_free_vec(tcg_ctx, rsh); } -static const TCGOpcode ushl_list[] = { - INDEX_op_neg_vec, INDEX_op_shlv_vec, - INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 -}; - -const GVecGen3 ushl_op[4] = { - { .fniv = gen_ushl_vec, - .fno = gen_helper_gvec_ushl_b, - .opt_opc = ushl_list, - .vece = MO_8 }, - { .fniv = gen_ushl_vec, - .fno = gen_helper_gvec_ushl_h, - .opt_opc = ushl_list, - .vece = MO_16 }, - { .fni4 = gen_ushl_i32, - .fniv = gen_ushl_vec, - .opt_opc = ushl_list, - .vece = MO_32 }, - { .fni8 = gen_ushl_i64, - .fniv = gen_ushl_vec, - .opt_opc = ushl_list, - .vece = MO_64 }, -}; +void gen_gvec_ushl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_shlv_vec, + INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_ushl_i32, + .fniv = gen_ushl_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_ushl_i64, + .fniv = gen_ushl_vec, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} void gen_sshl_i32(TCGContext *tcg_ctx, TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) { @@ -4691,29 +4183,33 @@ static void gen_sshl_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec dst, tcg_temp_free_vec(tcg_ctx, tmp); } -static const TCGOpcode sshl_list[] = { - INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, - INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 -}; - -const GVecGen3 sshl_op[4] = { - { .fniv = gen_sshl_vec, - .fno = gen_helper_gvec_sshl_b, - .opt_opc = sshl_list, - .vece = MO_8 }, - { .fniv = gen_sshl_vec, - .fno = gen_helper_gvec_sshl_h, - .opt_opc = sshl_list, - .vece = MO_16 }, - { .fni4 = gen_sshl_i32, - .fniv = gen_sshl_vec, - .opt_opc = sshl_list, - .vece = MO_32 }, - { .fni8 = gen_sshl_i64, - .fniv = gen_sshl_vec, - .opt_opc = sshl_list, - .vece = MO_64 }, -}; +void gen_gvec_sshl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, + INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_sshl_i32, + .fniv = gen_sshl_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_sshl_i64, + .fniv = gen_sshl_vec, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} static void gen_uqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) @@ -4726,32 +4222,37 @@ static void gen_uqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v tcg_temp_free_vec(tcg_ctx, x); } -static const TCGOpcode vecop_list_uqadd[] = { - INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 -}; - -const GVecGen4 uqadd_op[4] = { - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_b, - .write_aofs = true, - .opt_opc = vecop_list_uqadd, - .vece = MO_8 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_h, - .write_aofs = true, - .opt_opc = vecop_list_uqadd, - .vece = MO_16 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_s, - .write_aofs = true, - .opt_opc = vecop_list_uqadd, - .vece = MO_32 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_d, - .write_aofs = true, - .opt_opc = vecop_list_uqadd, - .vece = MO_64 }, -}; +void gen_gvec_uqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_b, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_h, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_s, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_d, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} static void gen_sqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) @@ -4764,32 +4265,37 @@ static void gen_sqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v tcg_temp_free_vec(tcg_ctx, x); } -static const TCGOpcode vecop_list_sqadd[] = { - INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 -}; - -const GVecGen4 sqadd_op[4] = { - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_b, - .opt_opc = vecop_list_sqadd, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_h, - .opt_opc = vecop_list_sqadd, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_s, - .opt_opc = vecop_list_sqadd, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_d, - .opt_opc = vecop_list_sqadd, - .write_aofs = true, - .vece = MO_64 }, -}; +void gen_gvec_sqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} static void gen_uqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) @@ -4802,32 +4308,37 @@ static void gen_uqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v tcg_temp_free_vec(tcg_ctx, x); } -static const TCGOpcode vecop_list_uqsub[] = { - INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 -}; - -const GVecGen4 uqsub_op[4] = { - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_b, - .opt_opc = vecop_list_uqsub, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_h, - .opt_opc = vecop_list_uqsub, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_s, - .opt_opc = vecop_list_uqsub, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_d, - .opt_opc = vecop_list_uqsub, - .write_aofs = true, - .vece = MO_64 }, -}; +void gen_gvec_uqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} static void gen_sqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) @@ -4840,2321 +4351,274 @@ static void gen_sqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v tcg_temp_free_vec(tcg_ctx, x); } -static const TCGOpcode vecop_list_sqsub[] = { - INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 -}; - -const GVecGen4 sqsub_op[4] = { - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_b, - .opt_opc = vecop_list_sqsub, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_h, - .opt_opc = vecop_list_sqsub, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_s, - .opt_opc = vecop_list_sqsub, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_d, - .opt_opc = vecop_list_sqsub, - .write_aofs = true, - .vece = MO_64 }, -}; - -/* Translate a NEON data processing instruction. Return nonzero if the - instruction is invalid. - We process data in a mixture of 32-bit and 64-bit chunks. - Mostly we use 32-bit chunks so we can use normal scalar instructions. */ - -static int disas_neon_data_insn(DisasContext *s, uint32_t insn) +void gen_gvec_sqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - int op; - int q; - int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; - int size; - int shift; - int pass; - int count; - int pairwise; - int u; - int vec_size; - uint32_t imm; - TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; - TCGv_ptr ptr1, ptr2, ptr3; - TCGv_i64 tmp64; - - /* FIXME: this access check should not take precedence over UNDEF - * for invalid encodings; we will generate incorrect syndrome information - * for attempts to execute invalid vfp/neon encodings with FP disabled. - */ - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - - if (!s->vfp_enabled) - return 1; - q = (insn & (1 << 6)) != 0; - u = (insn >> 24) & 1; - VFP_DREG_D(rd, insn); - VFP_DREG_N(rn, insn); - VFP_DREG_M(rm, insn); - size = (insn >> 20) & 3; - vec_size = q ? 16 : 8; - rd_ofs = neon_reg_offset(rd, 0); - rn_ofs = neon_reg_offset(rn, 0); - rm_ofs = neon_reg_offset(rm, 0); - - if ((insn & (1 << 23)) == 0) { - /* Three register same length. */ - op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); - /* Catch invalid op and bad size combinations: UNDEF */ - if ((neon_3r_sizes[op] & (1 << size)) == 0) { - return 1; - } - /* All insns of this form UNDEF for either this condition or the - * superset of cases "Q==1"; we catch the latter later. - */ - if (q && ((rd | rn | rm) & 1)) { - return 1; - } - switch (op) { - case NEON_3R_SHA: - /* The SHA-1/SHA-256 3-register instructions require special - * treatment here, as their size field is overloaded as an - * op type selector, and they all consume their input in a - * single pass. - */ - if (!q) { - return 1; - } - if (!u) { /* SHA-1 */ - if (!dc_isar_feature(aa32_sha1, s)) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rn); - ptr3 = vfp_reg_ptr(tcg_ctx, true, rm); - tmp4 = tcg_const_i32(tcg_ctx, size); - gen_helper_crypto_sha1_3reg(tcg_ctx, ptr1, ptr2, ptr3, tmp4); - tcg_temp_free_i32(tcg_ctx, tmp4); - } else { /* SHA-256 */ - if (!dc_isar_feature(aa32_sha2, s) || size == 3) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rn); - ptr3 = vfp_reg_ptr(tcg_ctx, true, rm); - switch (size) { - case 0: - gen_helper_crypto_sha256h(tcg_ctx, ptr1, ptr2, ptr3); - break; - case 1: - gen_helper_crypto_sha256h2(tcg_ctx, ptr1, ptr2, ptr3); - break; - case 2: - gen_helper_crypto_sha256su1(tcg_ctx, ptr1, ptr2, ptr3); - break; - } - } - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - tcg_temp_free_ptr(tcg_ctx, ptr3); - return 0; - - case NEON_3R_VPADD_VQRDMLAH: - if (!u) { - break; /* VPADD */ - } - /* VQRDMLAH */ - switch (size) { - case 1: - return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16, - q, rd, rn, rm); - case 2: - return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32, - q, rd, rn, rm); - } - return 1; - - case NEON_3R_VFM_VQRDMLSH: - if (!u) { - /* VFM, VFMS */ - if (size == 1) { - return 1; - } - break; - } - /* VQRDMLSH */ - switch (size) { - case 1: - return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16, - q, rd, rn, rm); - case 2: - return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32, - q, rd, rn, rm); - } - return 1; - - case NEON_3R_LOGIC: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_gvec_and(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 1: /* VBIC */ - tcg_gen_gvec_andc(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 2: /* VORR */ - tcg_gen_gvec_or(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 3: /* VORN */ - tcg_gen_gvec_orc(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 4: /* VEOR */ - tcg_gen_gvec_xor(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 5: /* VBSL */ - tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 6: /* VBIT */ - tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs, - vec_size, vec_size); - break; - case 7: /* VBIF */ - tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs, - vec_size, vec_size); - break; - } - return 0; - - case NEON_3R_VADD_VSUB: - if (u) { - tcg_gen_gvec_sub(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_add(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - - case NEON_3R_VQADD: - tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, vec_size, vec_size, - (u ? uqadd_op : sqadd_op) + size); - return 0; - - case NEON_3R_VQSUB: - tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, vec_size, vec_size, - (u ? uqsub_op : sqsub_op) + size); - return 0; - - case NEON_3R_VMUL: /* VMUL */ - if (u) { - /* Polynomial case allows only P8. */ - if (size != 0) { - return 1; - } - tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, - 0, gen_helper_gvec_pmul_b); - } else { - tcg_gen_gvec_mul(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - - case NEON_3R_VML: /* VMLA, VMLS */ - tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, - u ? &mls_op[size] : &mla_op[size]); - return 0; - - case NEON_3R_VTST_VCEQ: - if (u) { /* VCEQ */ - tcg_gen_gvec_cmp(tcg_ctx, TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { /* VTST */ - tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size, &cmtst_op[size]); - } - return 0; - - case NEON_3R_VCGT: - tcg_gen_gvec_cmp(tcg_ctx, u ? TCG_COND_GTU : TCG_COND_GT, size, - rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); - return 0; - - case NEON_3R_VCGE: - tcg_gen_gvec_cmp(tcg_ctx, u ? TCG_COND_GEU : TCG_COND_GE, size, - rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); - return 0; - - case NEON_3R_VMAX: - if (u) { - tcg_gen_gvec_umax(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_smax(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - case NEON_3R_VMIN: - if (u) { - tcg_gen_gvec_umin(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_smin(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - - case NEON_3R_VSHL: - /* Note the operation is vshl vd,vm,vn */ - tcg_gen_gvec_3(tcg_ctx, rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size, - u ? &ushl_op[size] : &sshl_op[size]); - return 0; - } - - if (size == 3) { - /* 64-bit element instructions. */ - for (pass = 0; pass < (q ? 2 : 1); pass++) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + pass); - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + pass); - switch (op) { - case NEON_3R_VQSHL: - if (u) { - gen_helper_neon_qshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } else { - gen_helper_neon_qshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } - break; - case NEON_3R_VRSHL: - if (u) { - gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } else { - gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } - break; - case NEON_3R_VQRSHL: - if (u) { - gen_helper_neon_qrshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } else { - gen_helper_neon_qrshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } - break; - default: - abort(); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - return 0; - } - pairwise = 0; - switch (op) { - case NEON_3R_VQSHL: - case NEON_3R_VRSHL: - case NEON_3R_VQRSHL: - { - int rtmp; - /* Shift instruction operands are reversed. */ - rtmp = rn; - rn = rm; - rm = rtmp; - } - break; - case NEON_3R_VPADD_VQRDMLAH: - case NEON_3R_VPMAX: - case NEON_3R_VPMIN: - pairwise = 1; - break; - case NEON_3R_FLOAT_ARITH: - pairwise = (u && size < 2); /* if VPADD (float) */ - break; - case NEON_3R_FLOAT_MINMAX: - pairwise = u; /* if VPMIN/VPMAX (float) */ - break; - case NEON_3R_FLOAT_CMP: - if (!u && size) { - /* no encoding for U=0 C=1x */ - return 1; - } - break; - case NEON_3R_FLOAT_ACMP: - if (!u) { - return 1; - } - break; - case NEON_3R_FLOAT_MISC: - /* VMAXNM/VMINNM in ARMv8 */ - if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) { - return 1; - } - break; - case NEON_3R_VFM_VQRDMLSH: - if (!dc_isar_feature(aa32_simdfmac, s)) { - return 1; - } - break; - default: - break; - } - - if (pairwise && q) { - /* All the pairwise insns UNDEF if Q is set */ - return 1; - } - - for (pass = 0; pass < (q ? 4 : 2); pass++) { - - if (pairwise) { - /* Pairwise. */ - if (pass < 1) { - tmp = neon_load_reg(tcg_ctx, rn, 0); - tmp2 = neon_load_reg(tcg_ctx, rn, 1); - } else { - tmp = neon_load_reg(tcg_ctx, rm, 0); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - } - } else { - /* Elementwise. */ - tmp = neon_load_reg(tcg_ctx, rn, pass); - tmp2 = neon_load_reg(tcg_ctx, rm, pass); - } - switch (op) { - case NEON_3R_VHADD: - GEN_NEON_INTEGER_OP(hadd); - break; - case NEON_3R_VRHADD: - GEN_NEON_INTEGER_OP(rhadd); - break; - case NEON_3R_VHSUB: - GEN_NEON_INTEGER_OP(hsub); - break; - case NEON_3R_VQSHL: - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - case NEON_3R_VRSHL: - GEN_NEON_INTEGER_OP(rshl); - break; - case NEON_3R_VQRSHL: - GEN_NEON_INTEGER_OP_ENV(qrshl); - break; - case NEON_3R_VABD: - GEN_NEON_INTEGER_OP(abd); - break; - case NEON_3R_VABA: - GEN_NEON_INTEGER_OP(abd); - tcg_temp_free_i32(tcg_ctx, tmp2); - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - gen_neon_add(tcg_ctx, size, tmp, tmp2); - break; - case NEON_3R_VPMAX: - GEN_NEON_INTEGER_OP(pmax); - break; - case NEON_3R_VPMIN: - GEN_NEON_INTEGER_OP(pmin); - break; - case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */ - if (!u) { /* VQDMULH */ - switch (size) { - case 1: - gen_helper_neon_qdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - break; - case 2: - gen_helper_neon_qdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - break; - default: abort(); - } - } else { /* VQRDMULH */ - switch (size) { - case 1: - gen_helper_neon_qrdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - break; - case 2: - gen_helper_neon_qrdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - break; - default: abort(); - } - } - break; - case NEON_3R_VPADD_VQRDMLAH: - switch (size) { - case 0: gen_helper_neon_padd_u8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_padd_u16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: tcg_gen_add_i32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - break; - case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - switch ((u << 2) | size) { - case 0: /* VADD */ - case 4: /* VPADD */ - gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus); - break; - case 2: /* VSUB */ - gen_helper_vfp_subs(tcg_ctx, tmp, tmp, tmp2, fpstatus); - break; - case 6: /* VABD */ - gen_helper_neon_abd_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - break; - default: - abort(); - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_MULTIPLY: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus); - if (!u) { - tcg_temp_free_i32(tcg_ctx, tmp2); - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - if (size == 0) { - gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus); - } - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_CMP: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - if (!u) { - gen_helper_neon_ceq_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - if (size == 0) { - gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_ACMP: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - if (size == 0) { - gen_helper_neon_acge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_neon_acgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_MINMAX: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - if (size == 0) { - gen_helper_vfp_maxs(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_vfp_mins(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_MISC: - if (u) { - /* VMAXNM/VMINNM */ - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - if (size == 0) { - gen_helper_vfp_maxnums(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_vfp_minnums(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - } else { - if (size == 0) { - gen_helper_recps_f32(tcg_ctx, tmp, tmp, tmp2, tcg_ctx->cpu_env); - } else { - gen_helper_rsqrts_f32(tcg_ctx, tmp, tmp, tmp2, tcg_ctx->cpu_env); - } - } - break; - case NEON_3R_VFM_VQRDMLSH: - { - /* VFMA, VFMS: fused multiply-add */ - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - TCGv_i32 tmp3 = neon_load_reg(tcg_ctx, rd, pass); - if (size) { - /* VFMS */ - gen_helper_vfp_negs(tcg_ctx, tmp, tmp); - } - gen_helper_vfp_muladds(tcg_ctx, tmp, tmp, tmp2, tmp3, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp3); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - default: - abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - - /* Save the result. For elementwise operations we can put it - straight into the destination register. For pairwise operations - we have to be careful to avoid clobbering the source operands. */ - if (pairwise && rd == rm) { - neon_store_scratch(tcg_ctx, pass, tmp); - } else { - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - - } /* for pass */ - if (pairwise && rd == rm) { - for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp = neon_load_scratch(tcg_ctx, pass); - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - } - /* End of 3 register same size operations. */ - } else if (insn & (1 << 4)) { - if ((insn & 0x00380080) != 0) { - /* Two registers and shift. */ - op = (insn >> 8) & 0xf; - if (insn & (1 << 7)) { - /* 64-bit shift. */ - if (op > 7) { - return 1; - } - size = 3; - } else { - size = 2; - while ((insn & (1 << (size + 19))) == 0) - size--; - } - shift = (insn >> 16) & ((1 << (3 + size)) - 1); - if (op < 8) { - /* Shift by immediate: - VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ - if (q && ((rd | rm) & 1)) { - return 1; - } - if (!u && (op == 4 || op == 6)) { - return 1; - } - /* Right shifts are encoded as N - shift, where N is the - element size in bits. */ - if (op <= 4) { - shift = shift - (1 << (size + 3)); - } - - switch (op) { - case 0: /* VSHR */ - /* Right shift comes here negative. */ - shift = -shift; - /* Shifts larger than the element size are architecturally - * valid. Unsigned results in all zeros; signed results - * in all sign bits. - */ - if (!u) { - tcg_gen_gvec_sari(tcg_ctx, size, rd_ofs, rm_ofs, - MIN(shift, (8 << size) - 1), - vec_size, vec_size); - } else if (shift >= 8 << size) { - tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0); - } else { - tcg_gen_gvec_shri(tcg_ctx, size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 1: /* VSRA */ - /* Right shift comes here negative. */ - shift = -shift; - /* Shifts larger than the element size are architecturally - * valid. Unsigned results in all zeros; signed results - * in all sign bits. - */ - if (!u) { - tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size, - MIN(shift, (8 << size) - 1), - &ssra_op[size]); - } else if (shift >= 8 << size) { - /* rd += 0 */ - } else { - tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size, - shift, &usra_op[size]); - } - return 0; - - case 4: /* VSRI */ - if (!u) { - return 1; - } - /* Right shift comes here negative. */ - shift = -shift; - /* Shift out of range leaves destination unchanged. */ - if (shift < 8 << size) { - tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size, - shift, &sri_op[size]); - } - return 0; - - case 5: /* VSHL, VSLI */ - if (u) { /* VSLI */ - /* Shift out of range leaves destination unchanged. */ - if (shift < 8 << size) { - tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, - vec_size, shift, &sli_op[size]); - } - } else { /* VSHL */ - /* Shifts larger than the element size are - * architecturally valid and results in zero. - */ - if (shift >= 8 << size) { - tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0); - } else { - tcg_gen_gvec_shli(tcg_ctx, size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - } - return 0; - } - - if (size == 3) { - count = q + 1; - } else { - count = q ? 4: 2; - } - - /* To avoid excessive duplication of ops we implement shift - * by immediate using the variable shift operations. - */ - imm = dup_const(size, shift); - - for (pass = 0; pass < count; pass++) { - if (size == 3) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm + pass); - tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_V1, imm); - switch (op) { - case 2: /* VRSHR */ - case 3: /* VRSRA */ - if (u) - gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - else - gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - break; - case 6: /* VQSHLU */ - gen_helper_neon_qshlu_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - break; - case 7: /* VQSHL */ - if (u) { - gen_helper_neon_qshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - } else { - gen_helper_neon_qshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - } - break; - default: - g_assert_not_reached(); - break; - } - if (op == 3) { - /* Accumulate. */ - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass); - tcg_gen_add_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } else { /* size < 3 */ - /* Operands in T0 and T1. */ - tmp = neon_load_reg(tcg_ctx, rm, pass); - tmp2 = tcg_temp_new_i32(tcg_ctx); - tcg_gen_movi_i32(tcg_ctx, tmp2, imm); - switch (op) { - case 2: /* VRSHR */ - case 3: /* VRSRA */ - GEN_NEON_INTEGER_OP(rshl); - break; - case 6: /* VQSHLU */ - switch (size) { - case 0: - gen_helper_neon_qshlu_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, - tmp, tmp2); - break; - case 1: - gen_helper_neon_qshlu_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, - tmp, tmp2); - break; - case 2: - gen_helper_neon_qshlu_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, - tmp, tmp2); - break; - default: - abort(); - } - break; - case 7: /* VQSHL */ - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - default: - g_assert_not_reached(); - break; - } - tcg_temp_free_i32(tcg_ctx, tmp2); - - if (op == 3) { - /* Accumulate. */ - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - gen_neon_add(tcg_ctx, size, tmp, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp2); - } - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - } /* for pass */ - } else if (op < 10) { - /* Shift by immediate and narrow: - VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ - int input_unsigned = (op == 8) ? !u : u; - if (rm & 1) { - return 1; - } - shift = shift - (1 << (size + 3)); - size++; - if (size == 3) { - tmp64 = tcg_const_i64(tcg_ctx, shift); - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm); - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + 1); - for (pass = 0; pass < 2; pass++) { - TCGv_i64 in; - if (pass == 0) { - in = tcg_ctx->cpu_V0; - } else { - in = tcg_ctx->cpu_V1; - } - if (q) { - if (input_unsigned) { - gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64); - } else { - gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64); - } - } else { - if (input_unsigned) { - gen_ushl_i64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64); - } else { - gen_sshl_i64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64); - } - } - tmp = tcg_temp_new_i32(tcg_ctx); - gen_neon_narrow_op(tcg_ctx, op == 8, u, size - 1, tmp, tcg_ctx->cpu_V0); - neon_store_reg(tcg_ctx, rd, pass, tmp); - } /* for pass */ - tcg_temp_free_i64(tcg_ctx, tmp64); - } else { - if (size == 1) { - imm = (uint16_t)shift; - imm |= imm << 16; - } else { - /* size == 2 */ - imm = (uint32_t)shift; - } - tmp2 = tcg_const_i32(tcg_ctx, imm); - tmp4 = neon_load_reg(tcg_ctx, rm + 1, 0); - tmp5 = neon_load_reg(tcg_ctx, rm + 1, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - tmp = neon_load_reg(tcg_ctx, rm, 0); - } else { - tmp = tmp4; - } - gen_neon_shift_narrow(tcg_ctx, size, tmp, tmp2, q, - input_unsigned); - if (pass == 0) { - tmp3 = neon_load_reg(tcg_ctx, rm, 1); - } else { - tmp3 = tmp5; - } - gen_neon_shift_narrow(tcg_ctx, size, tmp3, tmp2, q, - input_unsigned); - tcg_gen_concat_i32_i64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, tmp3); - tmp = tcg_temp_new_i32(tcg_ctx); - gen_neon_narrow_op(tcg_ctx, op == 8, u, size - 1, tmp, tcg_ctx->cpu_V0); - neon_store_reg(tcg_ctx, rd, pass, tmp); - } /* for pass */ - tcg_temp_free_i32(tcg_ctx, tmp2); - } - } else if (op == 10) { - /* VSHLL, VMOVL */ - if (q || (rd & 1)) { - return 1; - } - tmp = neon_load_reg(tcg_ctx, rm, 0); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 1) - tmp = tmp2; - - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, u); - - if (shift != 0) { - /* The shift is less than the width of the source - type, so we can just shift the whole register. */ - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, shift); - /* Widen the result of shift: we need to clear - * the potential overflow bits resulting from - * left bits of the narrow input appearing as - * right bits of left the neighbour narrow - * input. */ - if (size < 2 || !u) { - uint64_t imm64; - if (size == 0) { - imm = (0xffu >> (8 - shift)); - imm |= imm << 16; - } else if (size == 1) { - imm = 0xffff >> (16 - shift); - } else { - /* size == 2 */ - imm = 0xffffffff >> (32 - shift); - } - if (size < 2) { - imm64 = imm | (((uint64_t)imm) << 32); - } else { - imm64 = imm; - } - tcg_gen_andi_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, ~imm64); - } - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - } else if (op >= 14) { - /* VCVT fixed-point. */ - TCGv_ptr fpst; - TCGv_i32 shiftv; - VFPGenFixPointFn *fn; - - if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { - return 1; - } + static const TCGOpcode vecop_list[] = { + INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} - if (!(op & 1)) { - if (u) { - fn = gen_helper_vfp_ultos; - } else { - fn = gen_helper_vfp_sltos; - } - } else { - if (u) { - fn = gen_helper_vfp_touls_round_to_zero; - } else { - fn = gen_helper_vfp_tosls_round_to_zero; - } - } +static void gen_sabd_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); - /* We have already masked out the must-be-1 top bit of imm6, - * hence this 32-shift where the ARM ARM has 64-imm6. - */ - shift = 32 - shift; - fpst = get_fpstatus_ptr(tcg_ctx, 1); - shiftv = tcg_const_i32(tcg_ctx, shift); - for (pass = 0; pass < (q ? 4 : 2); pass++) { - TCGv_i32 tmpf = neon_load_reg(tcg_ctx, rm, pass); - fn(tcg_ctx, tmpf, tmpf, shiftv, fpst); - neon_store_reg(tcg_ctx, rd, pass, tmpf); - } - tcg_temp_free_ptr(tcg_ctx, fpst); - tcg_temp_free_i32(tcg_ctx, shiftv); - } else { - return 1; - } - } else { /* (insn & 0x00380080) == 0 */ - int invert, reg_ofs, vec_size; + tcg_gen_sub_i32(tcg_ctx, t, a, b); + tcg_gen_sub_i32(tcg_ctx, d, b, a); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LT, d, a, b, d, t); + tcg_temp_free_i32(tcg_ctx, t); +} - if (q && (rd & 1)) { - return 1; - } +static void gen_sabd_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - op = (insn >> 8) & 0xf; - /* One register and immediate. */ - imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf); - invert = (insn & (1 << 5)) != 0; - /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. - * We choose to not special-case this and will behave as if a - * valid constant encoding of 0 had been given. - */ - switch (op) { - case 0: case 1: - /* no-op */ - break; - case 2: case 3: - imm <<= 8; - break; - case 4: case 5: - imm <<= 16; - break; - case 6: case 7: - imm <<= 24; - break; - case 8: case 9: - imm |= imm << 16; - break; - case 10: case 11: - imm = (imm << 8) | (imm << 24); - break; - case 12: - imm = (imm << 8) | 0xff; - break; - case 13: - imm = (imm << 16) | 0xffff; - break; - case 14: - imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) { - imm = ~imm; - } - break; - case 15: - if (invert) { - return 1; - } - imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) - | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); - break; - } - if (invert) { - imm = ~imm; - } + tcg_gen_sub_i64(tcg_ctx, t, a, b); + tcg_gen_sub_i64(tcg_ctx, d, b, a); + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LT, d, a, b, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - reg_ofs = neon_reg_offset(rd, 0); - vec_size = q ? 16 : 8; +static void gen_sabd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); - if (op & 1 && op < 12) { - if (invert) { - /* The immediate value has already been inverted, - * so BIC becomes AND. - */ - tcg_gen_gvec_andi(tcg_ctx, MO_32, reg_ofs, reg_ofs, imm, - vec_size, vec_size); - } else { - tcg_gen_gvec_ori(tcg_ctx, MO_32, reg_ofs, reg_ofs, imm, - vec_size, vec_size); - } - } else { - /* VMOV, VMVN. */ - if (op == 14 && invert) { - TCGv_i64 t64 = tcg_temp_new_i64(tcg_ctx); - - for (pass = 0; pass <= q; ++pass) { - uint64_t val = 0; - int n; - - for (n = 0; n < 8; n++) { - if (imm & (1 << (n + pass * 8))) { - val |= 0xffull << (n * 8); - } - } - tcg_gen_movi_i64(tcg_ctx, t64, val); - neon_store_reg64(tcg_ctx, t64, rd + pass); - } - tcg_temp_free_i64(tcg_ctx, t64); - } else { - tcg_gen_gvec_dup32i(tcg_ctx, reg_ofs, vec_size, vec_size, imm); - } - } - } - } else { /* (insn & 0x00800010 == 0x00800000) */ - if (size != 3) { - op = (insn >> 8) & 0xf; - if ((insn & (1 << 6)) == 0) { - /* Three registers of different lengths. */ - int src1_wide; - int src2_wide; - int prewiden; - /* undefreq: bit 0 : UNDEF if size == 0 - * bit 1 : UNDEF if size == 1 - * bit 2 : UNDEF if size == 2 - * bit 3 : UNDEF if U == 1 - * Note that [2:0] set implies 'always UNDEF' - */ - int undefreq; - /* prewiden, src1_wide, src2_wide, undefreq */ - static const int neon_3reg_wide[16][4] = { - {1, 0, 0, 0}, /* VADDL */ - {1, 1, 0, 0}, /* VADDW */ - {1, 0, 0, 0}, /* VSUBL */ - {1, 1, 0, 0}, /* VSUBW */ - {0, 1, 1, 0}, /* VADDHN */ - {0, 0, 0, 0}, /* VABAL */ - {0, 1, 1, 0}, /* VSUBHN */ - {0, 0, 0, 0}, /* VABDL */ - {0, 0, 0, 0}, /* VMLAL */ - {0, 0, 0, 9}, /* VQDMLAL */ - {0, 0, 0, 0}, /* VMLSL */ - {0, 0, 0, 9}, /* VQDMLSL */ - {0, 0, 0, 0}, /* Integer VMULL */ - {0, 0, 0, 1}, /* VQDMULL */ - {0, 0, 0, 0xa}, /* Polynomial VMULL */ - {0, 0, 0, 7}, /* Reserved: always UNDEF */ - }; - - prewiden = neon_3reg_wide[op][0]; - src1_wide = neon_3reg_wide[op][1]; - src2_wide = neon_3reg_wide[op][2]; - undefreq = neon_3reg_wide[op][3]; - - if ((undefreq & (1 << size)) || - ((undefreq & 8) && u)) { - return 1; - } - if ((src1_wide && (rn & 1)) || - (src2_wide && (rm & 1)) || - (!src2_wide && (rd & 1))) { - return 1; - } + tcg_gen_smin_vec(tcg_ctx, vece, t, a, b); + tcg_gen_smax_vec(tcg_ctx, vece, d, a, b); + tcg_gen_sub_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); +} - /* Handle polynomial VMULL in a single pass. */ - if (op == 14) { - if (size == 0) { - /* VMULL.P8 */ - tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16, - 0, gen_helper_neon_pmull_h); - } else { - /* VMULL.P64 */ - if (!dc_isar_feature(aa32_pmull, s)) { - return 1; - } - tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16, - 0, gen_helper_gvec_pmull_q); - } - return 0; - } +void gen_gvec_sabd(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_sabd_i32, + .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_sabd_i64, + .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} - /* Avoid overlapping operands. Wide source operands are - always aligned so will never overlap with wide - destinations in problematic ways. */ - if (rd == rm && !src2_wide) { - tmp = neon_load_reg(tcg_ctx, rm, 1); - neon_store_scratch(tcg_ctx, 2, tmp); - } else if (rd == rn && !src1_wide) { - tmp = neon_load_reg(tcg_ctx, rn, 1); - neon_store_scratch(tcg_ctx, 2, tmp); - } - tmp3 = NULL; - for (pass = 0; pass < 2; pass++) { - if (src1_wide) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + pass); - tmp = NULL; - } else { - if (pass == 1 && rd == rn) { - tmp = neon_load_scratch(tcg_ctx, 2); - } else { - tmp = neon_load_reg(tcg_ctx, rn, pass); - } - if (prewiden) { - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, u); - } - } - if (src2_wide) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + pass); - tmp2 = NULL; - } else { - if (pass == 1 && rd == rm) { - tmp2 = neon_load_scratch(tcg_ctx, 2); - } else { - tmp2 = neon_load_reg(tcg_ctx, rm, pass); - } - if (prewiden) { - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V1, tmp2, size, u); - } - } - switch (op) { - case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */ - gen_neon_addl(tcg_ctx, size); - break; - case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */ - gen_neon_subl(tcg_ctx, size); - break; - case 5: case 7: /* VABAL, VABDL */ - switch ((size << 1) | u) { - case 0: - gen_helper_neon_abdl_s16(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 1: - gen_helper_neon_abdl_u16(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 2: - gen_helper_neon_abdl_s32(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 3: - gen_helper_neon_abdl_u32(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 4: - gen_helper_neon_abdl_s64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 5: - gen_helper_neon_abdl_u64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - default: abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp); - break; - case 8: case 9: case 10: case 11: case 12: case 13: - /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ - gen_neon_mull(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2, size, u); - break; - default: /* 15 is RESERVED: caught earlier */ - abort(); - } - if (op == 13) { - /* VQDMULL */ - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size); - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } else if (op == 5 || (op >= 8 && op <= 11)) { - /* Accumulate. */ - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass); - switch (op) { - case 10: /* VMLSL */ - gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size); - /* Fall through */ - case 5: case 8: /* VABAL, VMLAL */ - gen_neon_addl(tcg_ctx, size); - break; - case 9: case 11: /* VQDMLAL, VQDMLSL */ - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size); - if (op == 11) { - gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size); - } - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, size); - break; - default: - abort(); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } else if (op == 4 || op == 6) { - /* Narrowing operation. */ - tmp = tcg_temp_new_i32(tcg_ctx); - if (!u) { - switch (size) { - case 0: - gen_helper_neon_narrow_high_u8(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - case 1: - gen_helper_neon_narrow_high_u16(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - case 2: - tcg_gen_extrh_i64_i32(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - default: abort(); - } - } else { - switch (size) { - case 0: - gen_helper_neon_narrow_round_high_u8(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - case 1: - gen_helper_neon_narrow_round_high_u16(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - case 2: - tcg_gen_addi_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, 1u << 31); - tcg_gen_extrh_i64_i32(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - default: abort(); - } - } - if (pass == 0) { - tmp3 = tmp; - } else { - neon_store_reg(tcg_ctx, rd, 0, tmp3); - neon_store_reg(tcg_ctx, rd, 1, tmp); - } - } else { - /* Write back the result. */ - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - } - } else { - /* Two registers and a scalar. NB that for ops of this form - * the ARM ARM labels bit 24 as Q, but it is in our variable - * 'u', not 'q'. - */ - if (size == 0) { - return 1; - } - switch (op) { - case 1: /* Float VMLA scalar */ - case 5: /* Floating point VMLS scalar */ - case 9: /* Floating point VMUL scalar */ - if (size == 1) { - return 1; - } - /* fall through */ - case 0: /* Integer VMLA scalar */ - case 4: /* Integer VMLS scalar */ - case 8: /* Integer VMUL scalar */ - case 12: /* VQDMULH scalar */ - case 13: /* VQRDMULH scalar */ - if (u && ((rd | rn) & 1)) { - return 1; - } - tmp = neon_get_scalar(tcg_ctx, size, rm); - neon_store_scratch(tcg_ctx, 0, tmp); - for (pass = 0; pass < (u ? 4 : 2); pass++) { - tmp = neon_load_scratch(tcg_ctx, 0); - tmp2 = neon_load_reg(tcg_ctx, rn, pass); - if (op == 12) { - if (size == 1) { - gen_helper_neon_qdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - } else { - gen_helper_neon_qdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - } - } else if (op == 13) { - if (size == 1) { - gen_helper_neon_qrdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - } else { - gen_helper_neon_qrdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - } - } else if (op & 1) { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - } else { - switch (size) { - case 0: gen_helper_neon_mul_u8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - } - tcg_temp_free_i32(tcg_ctx, tmp2); - if (op < 8) { - /* Accumulate. */ - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - switch (op) { - case 0: - gen_neon_add(tcg_ctx, size, tmp, tmp2); - break; - case 1: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case 4: - gen_neon_rsb(tcg_ctx, size, tmp, tmp2); - break; - case 5: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - default: - abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - } - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - break; - case 3: /* VQDMLAL scalar */ - case 7: /* VQDMLSL scalar */ - case 11: /* VQDMULL scalar */ - if (u == 1) { - return 1; - } - /* fall through */ - case 2: /* VMLAL sclar */ - case 6: /* VMLSL scalar */ - case 10: /* VMULL scalar */ - if (rd & 1) { - return 1; - } - tmp2 = neon_get_scalar(tcg_ctx, size, rm); - /* We need a copy of tmp2 because gen_neon_mull - * deletes it during pass 0. */ - tmp4 = tcg_temp_new_i32(tcg_ctx); - tcg_gen_mov_i32(tcg_ctx, tmp4, tmp2); - tmp3 = neon_load_reg(tcg_ctx, rn, 1); - - for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - tmp = neon_load_reg(tcg_ctx, rn, 0); - } else { - tmp = tmp3; - tmp2 = tmp4; - } - gen_neon_mull(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2, size, u); - if (op != 11) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass); - } - switch (op) { - case 6: - gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size); - /* Fall through */ - case 2: - gen_neon_addl(tcg_ctx, size); - break; - case 3: case 7: - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size); - if (op == 7) { - gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size); - } - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, size); - break; - case 10: - /* no-op */ - break; - case 11: - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size); - break; - default: - abort(); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - break; - case 14: /* VQRDMLAH scalar */ - case 15: /* VQRDMLSH scalar */ - { - NeonGenThreeOpEnvFn *fn; - - if (!dc_isar_feature(aa32_rdm, s)) { - return 1; - } - if (u && ((rd | rn) & 1)) { - return 1; - } - if (op == 14) { - if (size == 1) { - fn = gen_helper_neon_qrdmlah_s16; - } else { - fn = gen_helper_neon_qrdmlah_s32; - } - } else { - if (size == 1) { - fn = gen_helper_neon_qrdmlsh_s16; - } else { - fn = gen_helper_neon_qrdmlsh_s32; - } - } - - tmp2 = neon_get_scalar(tcg_ctx, size, rm); - for (pass = 0; pass < (u ? 4 : 2); pass++) { - tmp = neon_load_reg(tcg_ctx, rn, pass); - tmp3 = neon_load_reg(tcg_ctx, rd, pass); - fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp3); - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - } - break; - default: - g_assert_not_reached(); - break; - } - } - } else { /* size == 3 */ - if (!u) { - /* Extract. */ - imm = (insn >> 8) & 0xf; +static void gen_uabd_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); - if (imm > 7 && !q) - return 1; + tcg_gen_sub_i32(tcg_ctx, t, a, b); + tcg_gen_sub_i32(tcg_ctx, d, b, a); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LTU, d, a, b, d, t); + tcg_temp_free_i32(tcg_ctx, t); +} - if (q && ((rd | rn | rm) & 1)) { - return 1; - } +static void gen_uabd_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - if (imm == 0) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn); - if (q) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rn + 1); - } - } else if (imm == 8) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + 1); - if (q) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm); - } - } else if (q) { - tmp64 = tcg_temp_new_i64(tcg_ctx); - if (imm < 8) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn); - neon_load_reg64(tcg_ctx, tmp64, rn + 1); - } else { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + 1); - neon_load_reg64(tcg_ctx, tmp64, rm); - } - tcg_gen_shri_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, (imm & 7) * 8); - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tmp64, 64 - ((imm & 7) * 8)); - tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - if (imm < 8) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm); - } else { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + 1); - imm -= 8; - } - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, 64 - (imm * 8)); - tcg_gen_shri_i64(tcg_ctx, tmp64, tmp64, imm * 8); - tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, tmp64); - tcg_temp_free_i64(tcg_ctx, tmp64); - } else { - /* BUGFIX */ - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn); - tcg_gen_shri_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, imm * 8); - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm); - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, 64 - (imm * 8)); - tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd); - if (q) { - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + 1); - } - } else if ((insn & (1 << 11)) == 0) { - /* Two register misc. */ - op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); - size = (insn >> 18) & 3; - /* UNDEF for unknown op values and bad op-size combinations */ - if ((neon_2rm_sizes[op] & (1 << size)) == 0) { - return 1; - } - if (neon_2rm_is_v8_op(op) && - !arm_dc_feature(s, ARM_FEATURE_V8)) { - return 1; - } - if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) && - q && ((rm | rd) & 1)) { - return 1; - } - switch (op) { - case NEON_2RM_VREV64: - for (pass = 0; pass < (q ? 2 : 1); pass++) { - tmp = neon_load_reg(tcg_ctx, rm, pass * 2); - tmp2 = neon_load_reg(tcg_ctx, rm, pass * 2 + 1); - switch (size) { - case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp, tmp); break; - case 1: gen_swap_half(tcg_ctx, tmp); break; - case 2: /* no-op */ break; - default: abort(); - } - neon_store_reg(tcg_ctx, rd, pass * 2 + 1, tmp); - if (size == 2) { - neon_store_reg(tcg_ctx, rd, pass * 2, tmp2); - } else { - switch (size) { - case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp2, tmp2); break; - case 1: gen_swap_half(tcg_ctx, tmp2); break; - default: abort(); - } - neon_store_reg(tcg_ctx, rd, pass * 2, tmp2); - } - } - break; - case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U: - case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U: - for (pass = 0; pass < q + 1; pass++) { - tmp = neon_load_reg(tcg_ctx, rm, pass * 2); - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, op & 1); - tmp = neon_load_reg(tcg_ctx, rm, pass * 2 + 1); - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V1, tmp, size, op & 1); - switch (size) { - case 0: gen_helper_neon_paddl_u16(tcg_ctx, CPU_V001); break; - case 1: gen_helper_neon_paddl_u32(tcg_ctx, CPU_V001); break; - case 2: tcg_gen_add_i64(tcg_ctx, CPU_V001); break; - default: abort(); - } - if (op >= NEON_2RM_VPADAL) { - /* Accumulate. */ - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass); - gen_neon_addl(tcg_ctx, size); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - break; - case NEON_2RM_VTRN: - if (size == 2) { - int n; - for (n = 0; n < (q ? 4 : 2); n += 2) { - tmp = neon_load_reg(tcg_ctx, rm, n); - tmp2 = neon_load_reg(tcg_ctx, rd, n + 1); - neon_store_reg(tcg_ctx, rm, n, tmp2); - neon_store_reg(tcg_ctx, rd, n + 1, tmp); - } - } else { - goto elementwise; - } - break; - case NEON_2RM_VUZP: - if (gen_neon_unzip(tcg_ctx, rd, rm, size, q)) { - return 1; - } - break; - case NEON_2RM_VZIP: - if (gen_neon_zip(tcg_ctx, rd, rm, size, q)) { - return 1; - } - break; - case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN: - /* also VQMOVUN; op field and mnemonics don't line up */ - if (rm & 1) { - return 1; - } - tmp2 = NULL; - for (pass = 0; pass < 2; pass++) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm + pass); - tmp = tcg_temp_new_i32(tcg_ctx); - gen_neon_narrow_op(tcg_ctx, op == NEON_2RM_VMOVN, q, size, - tmp, tcg_ctx->cpu_V0); - if (pass == 0) { - tmp2 = tmp; - } else { - neon_store_reg(tcg_ctx, rd, 0, tmp2); - neon_store_reg(tcg_ctx, rd, 1, tmp); - } - } - break; - case NEON_2RM_VSHLL: - if (q || (rd & 1)) { - return 1; - } - tmp = neon_load_reg(tcg_ctx, rm, 0); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 1) - tmp = tmp2; - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, 1); - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, 8 << size); - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - break; - case NEON_2RM_VCVT_F16_F32: - { - TCGv_ptr fpst; - TCGv_i32 ahp; - - if (!dc_isar_feature(aa32_fp16_spconv, s) || - q || (rm & 1)) { - return 1; - } - fpst = get_fpstatus_ptr(tcg_ctx, true); - ahp = get_ahp_flag(tcg_ctx); - tmp = neon_load_reg(tcg_ctx, rm, 0); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp); - tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16); - tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); - tmp = neon_load_reg(tcg_ctx, rm, 2); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); - tmp3 = neon_load_reg(tcg_ctx, rm, 3); - neon_store_reg(tcg_ctx, rd, 0, tmp2); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp); - tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16); - tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp); - neon_store_reg(tcg_ctx, rd, 1, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, ahp); - tcg_temp_free_ptr(tcg_ctx, fpst); - break; - } - case NEON_2RM_VCVT_F32_F16: - { - TCGv_ptr fpst; - TCGv_i32 ahp; - if (!dc_isar_feature(aa32_fp16_spconv, s) || - q || (rd & 1)) { - return 1; - } - fpst = get_fpstatus_ptr(tcg_ctx, true); - ahp = get_ahp_flag(tcg_ctx); - tmp3 = tcg_temp_new_i32(tcg_ctx); - tmp = neon_load_reg(tcg_ctx, rm, 0); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); - neon_store_reg(tcg_ctx, rd, 0, tmp3); - tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp); - neon_store_reg(tcg_ctx, rd, 1, tmp); - tmp3 = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); - neon_store_reg(tcg_ctx, rd, 2, tmp3); - tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp); - neon_store_reg(tcg_ctx, rd, 3, tmp2); - tcg_temp_free_i32(tcg_ctx, ahp); - tcg_temp_free_ptr(tcg_ctx, fpst); - break; - } - case NEON_2RM_AESE: case NEON_2RM_AESMC: - if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rm); - - /* Bit 6 is the lowest opcode bit; it distinguishes between - * encryption (AESE/AESMC) and decryption (AESD/AESIMC) - */ - tmp3 = tcg_const_i32(tcg_ctx, extract32(insn, 6, 1)); - - if (op == NEON_2RM_AESE) { - gen_helper_crypto_aese(tcg_ctx, ptr1, ptr2, tmp3); - } else { - gen_helper_crypto_aesmc(tcg_ctx, ptr1, ptr2, tmp3); - } - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - tcg_temp_free_i32(tcg_ctx, tmp3); - break; - case NEON_2RM_SHA1H: - if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rm); + tcg_gen_sub_i64(tcg_ctx, t, a, b); + tcg_gen_sub_i64(tcg_ctx, d, b, a); + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LTU, d, a, b, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - gen_helper_crypto_sha1h(tcg_ctx, ptr1, ptr2); +static void gen_uabd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - break; - case NEON_2RM_SHA1SU1: - if ((rm | rd) & 1) { - return 1; - } - /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ - if (q) { - if (!dc_isar_feature(aa32_sha2, s)) { - return 1; - } - } else if (!dc_isar_feature(aa32_sha1, s)) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rm); - if (q) { - gen_helper_crypto_sha256su0(tcg_ctx, ptr1, ptr2); - } else { - gen_helper_crypto_sha1su1(tcg_ctx, ptr1, ptr2); - } - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - break; - - case NEON_2RM_VMVN: - tcg_gen_gvec_not(tcg_ctx, 0, rd_ofs, rm_ofs, vec_size, vec_size); - break; - case NEON_2RM_VNEG: - tcg_gen_gvec_neg(tcg_ctx, size, rd_ofs, rm_ofs, vec_size, vec_size); - break; - case NEON_2RM_VABS: - tcg_gen_gvec_abs(tcg_ctx, size, rd_ofs, rm_ofs, vec_size, vec_size); - break; - - default: - elementwise: - for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp = neon_load_reg(tcg_ctx, rm, pass); - switch (op) { - case NEON_2RM_VREV32: - switch (size) { - case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp, tmp); break; - case 1: gen_swap_half(tcg_ctx, tmp); break; - default: abort(); - } - break; - case NEON_2RM_VREV16: - gen_rev16(tcg_ctx, tmp, tmp); - break; - case NEON_2RM_VCLS: - switch (size) { - case 0: gen_helper_neon_cls_s8(tcg_ctx, tmp, tmp); break; - case 1: gen_helper_neon_cls_s16(tcg_ctx, tmp, tmp); break; - case 2: gen_helper_neon_cls_s32(tcg_ctx, tmp, tmp); break; - default: abort(); - } - break; - case NEON_2RM_VCLZ: - switch (size) { - case 0: gen_helper_neon_clz_u8(tcg_ctx, tmp, tmp); break; - case 1: gen_helper_neon_clz_u16(tcg_ctx, tmp, tmp); break; - case 2: tcg_gen_clzi_i32(tcg_ctx, tmp, tmp, 32); break; - default: abort(); - } - break; - case NEON_2RM_VCNT: - gen_helper_neon_cnt_u8(tcg_ctx, tmp, tmp); - break; - case NEON_2RM_VQABS: - switch (size) { - case 0: - gen_helper_neon_qabs_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - case 1: - gen_helper_neon_qabs_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - case 2: - gen_helper_neon_qabs_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - default: abort(); - } - break; - case NEON_2RM_VQNEG: - switch (size) { - case 0: - gen_helper_neon_qneg_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - case 1: - gen_helper_neon_qneg_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - case 2: - gen_helper_neon_qneg_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - default: abort(); - } - break; - case NEON_2RM_VCGT0: case NEON_2RM_VCLE0: - tmp2 = tcg_const_i32(tcg_ctx, 0); - switch(size) { - case 0: gen_helper_neon_cgt_s8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_cgt_s16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: gen_helper_neon_cgt_s32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - if (op == NEON_2RM_VCLE0) { - tcg_gen_not_i32(tcg_ctx, tmp, tmp); - } - break; - case NEON_2RM_VCGE0: case NEON_2RM_VCLT0: - tmp2 = tcg_const_i32(tcg_ctx, 0); - switch(size) { - case 0: gen_helper_neon_cge_s8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_cge_s16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: gen_helper_neon_cge_s32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - if (op == NEON_2RM_VCLT0) { - tcg_gen_not_i32(tcg_ctx, tmp, tmp); - } - break; - case NEON_2RM_VCEQ0: - tmp2 = tcg_const_i32(tcg_ctx, 0); - switch(size) { - case 0: gen_helper_neon_ceq_u8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - break; - case NEON_2RM_VCGT0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCGE0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCEQ0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_ceq_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCLE0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp2, tmp, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCLT0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp2, tmp, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VABS_F: - gen_helper_vfp_abss(tcg_ctx, tmp, tmp); - break; - case NEON_2RM_VNEG_F: - gen_helper_vfp_negs(tcg_ctx, tmp, tmp); - break; - case NEON_2RM_VSWP: - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - neon_store_reg(tcg_ctx, rm, pass, tmp2); - break; - case NEON_2RM_VTRN: - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - switch (size) { - case 0: gen_neon_trn_u8(tcg_ctx, tmp, tmp2); break; - case 1: gen_neon_trn_u16(tcg_ctx, tmp, tmp2); break; - default: abort(); - } - neon_store_reg(tcg_ctx, rm, pass, tmp2); - break; - case NEON_2RM_VRINTN: - case NEON_2RM_VRINTA: - case NEON_2RM_VRINTM: - case NEON_2RM_VRINTP: - case NEON_2RM_VRINTZ: - { - TCGv_i32 tcg_rmode; - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - int rmode; - - if (op == NEON_2RM_VRINTZ) { - rmode = FPROUNDING_ZERO; - } else { - rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1]; - } - - tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - gen_helper_rints(tcg_ctx, tmp, tmp, fpstatus); - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - tcg_temp_free_i32(tcg_ctx, tcg_rmode); - break; - } - case NEON_2RM_VRINTX: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_rints_exact(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVTAU: - case NEON_2RM_VCVTAS: - case NEON_2RM_VCVTNU: - case NEON_2RM_VCVTNS: - case NEON_2RM_VCVTPU: - case NEON_2RM_VCVTPS: - case NEON_2RM_VCVTMU: - case NEON_2RM_VCVTMS: - { - bool is_signed = !extract32(insn, 7, 1); - TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, 1); - TCGv_i32 tcg_rmode, tcg_shift; - int rmode = fp_decode_rm[extract32(insn, 8, 2)]; - - tcg_shift = tcg_const_i32(tcg_ctx, 0); - tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - - if (is_signed) { - gen_helper_vfp_tosls(tcg_ctx, tmp, tmp, - tcg_shift, fpst); - } else { - gen_helper_vfp_touls(tcg_ctx, tmp, tmp, - tcg_shift, fpst); - } - - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - tcg_temp_free_i32(tcg_ctx, tcg_rmode); - tcg_temp_free_i32(tcg_ctx, tcg_shift); - tcg_temp_free_ptr(tcg_ctx, fpst); - break; - } - case NEON_2RM_VRECPE: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_recpe_u32(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VRSQRTE: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_rsqrte_u32(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VRECPE_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_recpe_f32(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VRSQRTE_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_rsqrte_f32(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_sitos(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_uitos(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_tosizs(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_touizs(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - default: - /* Reserved op values were caught by the - * neon_2rm_sizes[] check earlier. - */ - abort(); - } - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - break; - } - } else if ((insn & (1 << 10)) == 0) { - /* VTBL, VTBX. */ - int n = ((insn >> 8) & 3) + 1; - if ((rn + n) > 32) { - /* This is UNPREDICTABLE; we choose to UNDEF to avoid the - * helper function running off the end of the register file. - */ - return 1; - } - n <<= 3; - if (insn & (1 << 6)) { - tmp = neon_load_reg(tcg_ctx, rd, 0); - } else { - tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_movi_i32(tcg_ctx, tmp, 0); - } - tmp2 = neon_load_reg(tcg_ctx, rm, 0); - ptr1 = vfp_reg_ptr(tcg_ctx, true, rn); - tmp5 = tcg_const_i32(tcg_ctx, n); - gen_helper_neon_tbl(tcg_ctx, tmp2, tmp2, tmp, ptr1, tmp5); - tcg_temp_free_i32(tcg_ctx, tmp); - if (insn & (1 << 6)) { - tmp = neon_load_reg(tcg_ctx, rd, 1); - } else { - tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_movi_i32(tcg_ctx, tmp, 0); - } - tmp3 = neon_load_reg(tcg_ctx, rm, 1); - gen_helper_neon_tbl(tcg_ctx, tmp3, tmp3, tmp, ptr1, tmp5); - tcg_temp_free_i32(tcg_ctx, tmp5); - tcg_temp_free_ptr(tcg_ctx, ptr1); - neon_store_reg(tcg_ctx, rd, 0, tmp2); - neon_store_reg(tcg_ctx, rd, 1, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp); - } else if ((insn & 0x380) == 0) { - /* VDUP */ - int element; - MemOp size; + tcg_gen_umin_vec(tcg_ctx, vece, t, a, b); + tcg_gen_umax_vec(tcg_ctx, vece, d, a, b); + tcg_gen_sub_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); +} - if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { - return 1; - } - if (insn & (1 << 16)) { - size = MO_8; - element = (insn >> 17) & 7; - } else if (insn & (1 << 17)) { - size = MO_16; - element = (insn >> 18) & 3; - } else { - size = MO_32; - element = (insn >> 19) & 1; - } - tcg_gen_gvec_dup_mem(tcg_ctx, size, neon_reg_offset(rd, 0), - neon_element_offset(rm, element, size), - q ? 16 : 8, q ? 16 : 8); - } else { - return 1; - } - } - } - return 0; +void gen_gvec_uabd(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_uabd_i32, + .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_uabd_i64, + .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -/* Advanced SIMD three registers of the same length extension. - * 31 25 23 22 20 16 12 11 10 9 8 3 0 - * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+ - * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm | - * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+ - */ -static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) +static void gen_saba_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - gen_helper_gvec_3 *fn_gvec = NULL; - gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL; - int rd, rn, rm, opr_sz; - int data = 0; - int off_rn, off_rm; - bool is_long = false, q = extract32(insn, 6, 1); - bool ptr_is_env = false; - - if ((insn & 0xfe200f10) == 0xfc200800) { - /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ - int size = extract32(insn, 20, 1); - data = extract32(insn, 23, 2); /* rot */ - if (!dc_isar_feature(aa32_vcma, s) - || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { - return 1; - } - fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; - } else if ((insn & 0xfea00f10) == 0xfc800800) { - /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */ - int size = extract32(insn, 20, 1); - data = extract32(insn, 24, 1); /* rot */ - if (!dc_isar_feature(aa32_vcma, s) - || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { - return 1; - } - fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; - } else if ((insn & 0xfeb00f00) == 0xfc200d00) { - /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */ - bool u = extract32(insn, 4, 1); - if (!dc_isar_feature(aa32_dp, s)) { - return 1; - } - fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; - } else if ((insn & 0xff300f10) == 0xfc200810) { - /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */ - int is_s = extract32(insn, 23, 1); - if (!dc_isar_feature(aa32_fhm, s)) { - return 1; - } - is_long = true; - data = is_s; /* is_2 == 0 */ - fn_gvec_ptr = gen_helper_gvec_fmlal_a32; - ptr_is_env = true; - } else { - return 1; - } + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + gen_sabd_i32(tcg_ctx, t, a, b); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); +} - VFP_DREG_D(rd, insn); - if (rd & q) { - return 1; - } - if (q || !is_long) { - VFP_DREG_N(rn, insn); - VFP_DREG_M(rm, insn); - if ((rn | rm) & q & !is_long) { - return 1; - } - off_rn = vfp_reg_offset(1, rn); - off_rm = vfp_reg_offset(1, rm); - } else { - rn = VFP_SREG_N(insn); - rm = VFP_SREG_M(insn); - off_rn = vfp_reg_offset(0, rn); - off_rm = vfp_reg_offset(0, rm); - } +static void gen_saba_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + gen_sabd_i64(tcg_ctx, t, a, b); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - if (!s->vfp_enabled) { - return 1; - } +static void gen_saba_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + gen_sabd_vec(tcg_ctx, vece, t, a, b); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); +} - opr_sz = (1 + q) * 8; - if (fn_gvec_ptr) { - TCGv_ptr ptr; - if (ptr_is_env) { - ptr = tcg_ctx->cpu_env; - } else { - ptr = get_fpstatus_ptr(tcg_ctx, 1); - } - tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, ptr, - opr_sz, opr_sz, data, fn_gvec_ptr); - if (!ptr_is_env) { - tcg_temp_free_ptr(tcg_ctx, ptr); - } - } else { - tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, - opr_sz, opr_sz, data, fn_gvec); - } - return 0; +void gen_gvec_saba(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_add_vec, + INDEX_op_smin_vec, INDEX_op_smax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_saba_i32, + .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_saba_i64, + .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -/* Advanced SIMD two registers and a scalar extension. - * 31 24 23 22 20 16 12 11 10 9 8 3 0 - * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+ - * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm | - * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+ - * - */ +static void gen_uaba_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + gen_uabd_i32(tcg_ctx, t, a, b); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); +} -static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) +static void gen_uaba_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - gen_helper_gvec_3 *fn_gvec = NULL; - gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL; - int rd, rn, rm, opr_sz, data; - int off_rn, off_rm; - bool is_long = false, q = extract32(insn, 6, 1); - bool ptr_is_env = false; - - if ((insn & 0xff000f10) == 0xfe000800) { - /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */ - int rot = extract32(insn, 20, 2); - int size = extract32(insn, 23, 1); - int index; - - if (!dc_isar_feature(aa32_vcma, s)) { - return 1; - } - if (size == 0) { - if (!dc_isar_feature(aa32_fp16_arith, s)) { - return 1; - } - /* For fp16, rm is just Vm, and index is M. */ - rm = extract32(insn, 0, 4); - index = extract32(insn, 5, 1); - } else { - /* For fp32, rm is the usual M:Vm, and index is 0. */ - VFP_DREG_M(rm, insn); - index = 0; - } - data = (index << 2) | rot; - fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx - : gen_helper_gvec_fcmlah_idx); - } else if ((insn & 0xffb00f00) == 0xfe200d00) { - /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ - int u = extract32(insn, 4, 1); - - if (!dc_isar_feature(aa32_dp, s)) { - return 1; - } - fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; - /* rm is just Vm, and index is M. */ - data = extract32(insn, 5, 1); /* index */ - rm = extract32(insn, 0, 4); - } else if ((insn & 0xffa00f10) == 0xfe000810) { - /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */ - int is_s = extract32(insn, 20, 1); - int vm20 = extract32(insn, 0, 3); - int vm3 = extract32(insn, 3, 1); - int m = extract32(insn, 5, 1); - int index; - - if (!dc_isar_feature(aa32_fhm, s)) { - return 1; - } - if (q) { - rm = vm20; - index = m * 2 + vm3; - } else { - rm = vm20 * 2 + m; - index = vm3; - } - is_long = true; - data = (index << 2) | is_s; /* is_2 == 0 */ - fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32; - ptr_is_env = true; - } else { - return 1; - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + gen_uabd_i64(tcg_ctx, t, a, b); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - VFP_DREG_D(rd, insn); - if (rd & q) { - return 1; - } - if (q || !is_long) { - VFP_DREG_N(rn, insn); - if (rn & q & !is_long) { - return 1; - } - off_rn = vfp_reg_offset(1, rn); - off_rm = vfp_reg_offset(1, rm); - } else { - rn = VFP_SREG_N(insn); - off_rn = vfp_reg_offset(0, rn); - off_rm = vfp_reg_offset(0, rm); - } - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - if (!s->vfp_enabled) { - return 1; - } +static void gen_uaba_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + gen_uabd_vec(tcg_ctx, vece, t, a, b); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); +} - opr_sz = (1 + q) * 8; - if (fn_gvec_ptr) { - TCGv_ptr ptr; - if (ptr_is_env) { - ptr = tcg_ctx->cpu_env; - } else { - ptr = get_fpstatus_ptr(tcg_ctx, 1); - } - tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, ptr, - opr_sz, opr_sz, data, fn_gvec_ptr); - if (!ptr_is_env) { - tcg_temp_free_ptr(tcg_ctx, ptr); - } - } else { - tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, - opr_sz, opr_sz, data, fn_gvec); - } - return 0; +void gen_gvec_uaba(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_add_vec, + INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_uaba_i32, + .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_uaba_i64, + .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } static int disas_coproc_insn(DisasContext *s, uint32_t insn) @@ -7734,7 +5198,7 @@ static void gen_srs(DisasContext *s, tcg_temp_free_i32(tcg_ctx, tmp); } tcg_temp_free_i32(tcg_ctx, addr); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } /* Generate a label used for skipping this instruction */ @@ -10076,7 +7540,7 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); if (m_swap) { - gen_swap_half(tcg_ctx, t2); + gen_swap_half(tcg_ctx, t2, t2); } gen_smul_dual(tcg_ctx, t1, t2); @@ -10135,7 +7599,7 @@ static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); if (m_swap) { - gen_swap_half(tcg_ctx, t2); + gen_swap_half(tcg_ctx, t2, t2); } gen_smul_dual(tcg_ctx, t1, t2); @@ -10490,9 +7954,6 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) // gen_io_start(tcg_ctx); } gen_helper_cpsr_write_eret(tcg_ctx, tcg_ctx->cpu_env, tmp); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - // gen_io_end(tcg_ctx); - } tcg_temp_free_i32(tcg_ctx, tmp); /* Must exit loop to check un-masked IRQs */ s->base.is_jmp = DISAS_EXIT; @@ -10878,7 +8339,7 @@ static bool trans_SETEND(DisasContext *s, arg_SETEND *a) } if (a->E != (s->be_data == MO_BE)) { gen_helper_setend(tcg_ctx, tcg_ctx->cpu_env); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } return true; } @@ -10964,33 +8425,14 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) /* Unconditional instructions. */ /* TODO: Perhaps merge these into one decodetree output file. */ if (disas_a32_uncond(s, insn) || - disas_vfp_uncond(s, insn)) { + disas_vfp_uncond(s, insn) || + disas_neon_dp(s, insn) || + disas_neon_ls(s, insn) || + disas_neon_shared(s, insn)) { return; } /* fall back to legacy decoder */ - if (((insn >> 25) & 7) == 1) { - /* NEON Data processing. */ - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - goto illegal_op; - } - - if (disas_neon_data_insn(s, insn)) { - goto illegal_op; - } - return; - } - if ((insn & 0x0f100000) == 0x04000000) { - /* NEON load/store. */ - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - goto illegal_op; - } - - if (disas_neon_ls_insn(s, insn)) { - goto illegal_op; - } - return; - } if ((insn & 0x0e000f00) == 0x0c000100) { if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { /* iWMMXt register transfer. */ @@ -11000,18 +8442,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) } } } - } else if ((insn & 0x0e000a00) == 0x0c000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - if (disas_neon_insn_3same_ext(s, insn)) { - goto illegal_op; - } - return; - } else if ((insn & 0x0f000a00) == 0x0e000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - if (disas_neon_insn_2reg_scalar_ext(s, insn)) { - goto illegal_op; - } - return; } goto illegal_op; } @@ -11126,6 +8556,33 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) ARCH(6T2); } + if ((insn & 0xef000000) == 0xef000000) { + /* + * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq + * transform into + * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq + */ + uint32_t a32_insn = (insn & 0xe2ffffff) | + ((insn & (1 << 28)) >> 4) | (1 << 28); + + if (disas_neon_dp(s, a32_insn)) { + return; + } + } + + if ((insn & 0xff100000) == 0xf9000000) { + /* + * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq + * transform into + * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq + */ + uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000; + + if (disas_neon_ls(s, a32_insn)) { + return; + } + } + /* * TODO: Perhaps merge these into one decodetree output file. * Note disas_vfp is written for a32 with cond field in the @@ -11133,6 +8590,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) */ if (disas_t32(s, insn) || disas_vfp_uncond(s, insn) || + disas_neon_shared(s, insn) || ((insn >> 28) == 0xe && disas_vfp(s, insn))) { return; } @@ -11162,24 +8620,9 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) } break; } - if ((insn & 0xfe000a00) == 0xfc000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - /* The Thumb2 and ARM encodings are identical. */ - if (disas_neon_insn_3same_ext(s, insn)) { - goto illegal_op; - } - } else if ((insn & 0xff000a00) == 0xfe000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - /* The Thumb2 and ARM encodings are identical. */ - if (disas_neon_insn_2reg_scalar_ext(s, insn)) { - goto illegal_op; - } - } else if (((insn >> 24) & 3) == 3) { - /* Translate into the equivalent ARM encoding. */ - insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28); - if (disas_neon_data_insn(s, insn)) { - goto illegal_op; - } + if (((insn >> 24) & 3) == 3) { + /* Neon DP, but failed disas_neon_dp() */ + goto illegal_op; } else if (((insn >> 8) & 0xe) == 10) { /* VFP, but failed disas_vfp. */ goto illegal_op; @@ -11192,12 +8635,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) } break; case 12: - if ((insn & 0x01100000) == 0x01000000) { - if (disas_neon_ls_insn(s, insn)) { - goto illegal_op; - } - break; - } goto illegal_op; default: illegal_op: @@ -11685,7 +9122,8 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) break; case DISAS_NEXT: case DISAS_TOO_MANY: - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: + case DISAS_UPDATE_NOCHAIN: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: @@ -11709,10 +9147,13 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_TOO_MANY: gen_goto_tb(dc, 1, dc->base.pc_next); break; + case DISAS_UPDATE_NOCHAIN: + gen_set_pc_im(dc, dc->base.pc_next); + /* fall through */ case DISAS_JUMP: gen_goto_ptr(tcg_ctx); break; - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index 62ea7a5277..b0c4539267 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -29,6 +29,7 @@ typedef struct DisasContext { ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */ uint8_t tbii; /* TBI1|TBI0 for insns */ uint8_t tbid; /* TBI1|TBI0 for data */ + uint8_t tcma; /* TCMA1|TCMA0 for MTE */ bool ns; /* Use non-secure CPREG bank on access */ int fp_excp_el; /* FP exception EL or 0 if enabled */ int sve_excp_el; /* SVE exception EL or 0 if enabled */ @@ -76,6 +77,10 @@ typedef struct DisasContext { bool unpriv; /* True if v8.3-PAuth is active. */ bool pauth_active; + /* True if v8.5-MTE access to tags is enabled. */ + bool ata; + /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */ + bool mte_active[2]; /* True with v8.5-BTI and SCTLR_ELx.BT* set. */ bool bt; /* True if any CP15 access is trapped by HSTR_EL2 */ @@ -85,6 +90,8 @@ typedef struct DisasContext { * < 0, set by the current instruction. */ int8_t btype; + /* A copy of cpu->dcz_blocksize. */ + uint8_t dcz_blocksize; /* True if this page is guarded. */ bool guarded_page; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ @@ -145,7 +152,8 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) /* is_jmp field values */ #define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */ -#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */ +/* CPU state was modified dynamically; exit to main loop for interrupts. */ +#define DISAS_UPDATE_EXIT DISAS_TARGET_1 /* These instructions trap after executing, so the A32/T32 decoder must * defer them until after the conditional execution state has been updated. * WFI also needs special handling when single-stepping. @@ -161,13 +169,16 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * custom end-of-TB code) */ #define DISAS_BX_EXCRET DISAS_TARGET_8 -/* For instructions which want an immediate exit to the main loop, - * as opposed to attempting to use lookup_and_goto_ptr. Unlike - * DISAS_UPDATE this doesn't write the PC on exiting the translation - * loop so you need to ensure something (gen_a64_set_pc_im or runtime - * helper) has done so before we reach return from cpu_tb_exec. +/* + * For instructions which want an immediate exit to the main loop, as opposed + * to attempting to use lookup_and_goto_ptr. Unlike DISAS_UPDATE_EXIT, this + * doesn't write the PC on exiting the translation loop so you need to ensure + * something (gen_a64_set_pc_im or runtime helper) has done so before we reach + * return from cpu_tb_exec. */ #define DISAS_EXIT DISAS_TARGET_9 +/* CPU state was modified dynamically; no need to exit, but do not chain. */ +#define DISAS_UPDATE_NOCHAIN DISAS_TARGET_10 #ifdef TARGET_AARCH64 void a64_translate_init(struct uc_struct *uc); @@ -274,28 +285,110 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex) uint64_t vfp_expand_imm(int size, uint8_t imm8); /* Vector operations shared between ARM and AArch64. */ -extern const GVecGen3 mla_op[4]; -extern const GVecGen3 mls_op[4]; -extern const GVecGen3 cmtst_op[4]; -extern const GVecGen3 sshl_op[4]; -extern const GVecGen3 ushl_op[4]; -extern const GVecGen2i ssra_op[4]; -extern const GVecGen2i usra_op[4]; -extern const GVecGen2i sri_op[4]; -extern const GVecGen2i sli_op[4]; -extern const GVecGen4 uqadd_op[4]; -extern const GVecGen4 sqadd_op[4]; -extern const GVecGen4 uqsub_op[4]; -extern const GVecGen4 sqsub_op[4]; +void gen_gvec_ceq0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_clt0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_cgt0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_cle0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_cge0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_mla(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_mls(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_cmtst(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sshl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_ushl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + void gen_cmtst_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void gen_ushl_i32(TCGContext *, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void gen_sshl_i32(TCGContext *, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void gen_ushl_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void gen_sshl_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void gen_gvec_uqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_ssra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_usra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_srshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_urshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_srsra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_ursra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_sri(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sli(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_sqrdmlah_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqrdmlsh_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_sabd(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uabd(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_saba(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uaba(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + /* * Forward to the isar_feature_* tests given a DisasContext pointer. */ #define dc_isar_feature(name, ctx) isar_feature_##name(ctx->isar) +/* Note that the gvec expanders operate on offsets + sizes. */ +typedef void GVecGen2Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, uint32_t); +typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t, + uint32_t, uint32_t); +typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); +typedef void GVecGen4Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +/* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenOneOpFn(TCGContext *, TCGv_i32, TCGv_i32); +typedef void NeonGenOneOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32); +typedef void NeonGenTwoOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32); +typedef void NeonGenTwoOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); +typedef void NeonGenTwo64OpFn(TCGContext*, TCGv_i64, TCGv_i64, TCGv_i64); +typedef void NeonGenTwo64OpEnvFn(TCGContext *, TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); +typedef void NeonGenNarrowFn(TCGContext *, TCGv_i32, TCGv_i64); +typedef void NeonGenNarrowEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i64); +typedef void NeonGenWidenFn(TCGContext *, TCGv_i64, TCGv_i32); +typedef void NeonGenTwoOpWidenFn(TCGContext *, TCGv_i64, TCGv_i32, TCGv_i32); +typedef void NeonGenOneSingleOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_ptr); +typedef void NeonGenTwoSingleOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); +typedef void NeonGenTwoDoubleOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); +typedef void NeonGenOne64OpFn(TCGContext *, TCGv_i64, TCGv_i64); +typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); +typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); +typedef void AtomicThreeOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); + #endif /* TARGET_ARM_TRANSLATE_H */ diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c index a1839eed81..84a54cee05 100644 --- a/qemu/target/arm/vec_helper.c +++ b/qemu/target/arm/vec_helper.c @@ -22,7 +22,7 @@ #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" - +#include "vec_internal.h" /* Note that vector data is stored in host-endian 64-bit chunks, so addressing units smaller than that needs a host-endian fixup. */ @@ -36,21 +36,9 @@ #define H4(x) (x) #endif -#define SET_QC() env->vfp.qc[0] = 1 - -static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) -{ - uint64_t *d = (uint64_t *)((char *)vd + opr_sz); - uintptr_t i; - - for (i = opr_sz; i < max_sz; i += 8) { - *d++ = 0; - } -} - /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ -static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1, - int16_t src2, int16_t src3) +static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2, + int16_t src3, uint32_t *sat) { /* Simplify: * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16 @@ -60,7 +48,7 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1, ret = ((int32_t)src3 << 15) + ret + (1 << 14); ret >>= 15; if (ret != (int16_t)ret) { - SET_QC(); + *sat = 1; ret = (ret < 0 ? -0x8000 : 0x7fff); } return ret; @@ -69,30 +57,30 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1, uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { - uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3); - uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16); + uint32_t *sat = &env->vfp.qc[0]; + uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat); + uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); return deposit32(e1, 16, 16, e2); } void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm, - void *ve, uint32_t desc) + void *vq, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); int16_t *d = vd; int16_t *n = vn; int16_t *m = vm; - CPUARMState *env = ve; uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]); + d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-subtract high half, 16-bit */ -static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1, - int16_t src2, int16_t src3) +static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2, + int16_t src3, uint32_t *sat) { /* Similarly, using subtraction: * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16 @@ -102,7 +90,7 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1, ret = ((int32_t)src3 << 15) - ret + (1 << 14); ret >>= 15; if (ret != (int16_t)ret) { - SET_QC(); + *sat = 1; ret = (ret < 0 ? -0x8000 : 0x7fff); } return ret; @@ -111,85 +99,97 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1, uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { - uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3); - uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16); + uint32_t *sat = &env->vfp.qc[0]; + uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat); + uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); return deposit32(e1, 16, 16, e2); } void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm, - void *ve, uint32_t desc) + void *vq, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); int16_t *d = vd; int16_t *n = vn; int16_t *m = vm; - CPUARMState *env = ve; uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]); + d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */ -uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1, - int32_t src2, int32_t src3) +static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2, + int32_t src3, uint32_t *sat) { /* Simplify similarly to int_qrdmlah_s16 above. */ int64_t ret = (int64_t)src1 * src2; ret = ((int64_t)src3 << 31) + ret + (1 << 30); ret >>= 31; if (ret != (int32_t)ret) { - SET_QC(); + *sat = 1; ret = (ret < 0 ? INT32_MIN : INT32_MAX); } return ret; } +uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1, + int32_t src2, int32_t src3) +{ + uint32_t *sat = &env->vfp.qc[0]; + return inl_qrdmlah_s32(src1, src2, src3, sat); +} + void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, - void *ve, uint32_t desc) + void *vq, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); int32_t *d = vd; int32_t *n = vn; int32_t *m = vm; - CPUARMState *env = ve; uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]); + d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-subtract high half, 32-bit */ -uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1, - int32_t src2, int32_t src3) +static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2, + int32_t src3, uint32_t *sat) { /* Simplify similarly to int_qrdmlsh_s16 above. */ int64_t ret = (int64_t)src1 * src2; ret = ((int64_t)src3 << 31) - ret + (1 << 30); ret >>= 31; if (ret != (int32_t)ret) { - SET_QC(); + *sat = 1; ret = (ret < 0 ? INT32_MIN : INT32_MAX); } return ret; } +uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1, + int32_t src2, int32_t src3) +{ + uint32_t *sat = &env->vfp.qc[0]; + return inl_qrdmlsh_s32(src1, src2, src3, sat); +} + void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, - void *ve, uint32_t desc) + void *vq, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); int32_t *d = vd; int32_t *n = vn; int32_t *m = vm; - CPUARMState *env = ve; uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]); + d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } @@ -681,6 +681,11 @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat) return result; } +static float32 float32_abd(float32 op1, float32 op2, float_status *stat) +{ + return float32_abs(float32_sub(op1, op2, stat)); +} + #define DO_3OP(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ @@ -708,6 +713,8 @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16) DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32) DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64) +DO_3OP(gvec_fabd_s, float32_abd, float32) + #ifdef TARGET_AARCH64 DO_3OP(gvec_recps_h, helper_recpsf_f16, float16) @@ -901,6 +908,118 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, clear_tail(d, oprsz, simd_maxsz(desc)); } +#define DO_SRA(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] += n[i] >> shift; \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_SRA(gvec_ssra_b, int8_t) +DO_SRA(gvec_ssra_h, int16_t) +DO_SRA(gvec_ssra_s, int32_t) +DO_SRA(gvec_ssra_d, int64_t) + +DO_SRA(gvec_usra_b, uint8_t) +DO_SRA(gvec_usra_h, uint16_t) +DO_SRA(gvec_usra_s, uint32_t) +DO_SRA(gvec_usra_d, uint64_t) + +#undef DO_SRA + +#define DO_RSHR(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + TYPE tmp = n[i] >> (shift - 1); \ + d[i] = (tmp >> 1) + (tmp & 1); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_RSHR(gvec_srshr_b, int8_t) +DO_RSHR(gvec_srshr_h, int16_t) +DO_RSHR(gvec_srshr_s, int32_t) +DO_RSHR(gvec_srshr_d, int64_t) + +DO_RSHR(gvec_urshr_b, uint8_t) +DO_RSHR(gvec_urshr_h, uint16_t) +DO_RSHR(gvec_urshr_s, uint32_t) +DO_RSHR(gvec_urshr_d, uint64_t) + +#undef DO_RSHR + +#define DO_RSRA(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + TYPE tmp = n[i] >> (shift - 1); \ + d[i] += (tmp >> 1) + (tmp & 1); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_RSRA(gvec_srsra_b, int8_t) +DO_RSRA(gvec_srsra_h, int16_t) +DO_RSRA(gvec_srsra_s, int32_t) +DO_RSRA(gvec_srsra_d, int64_t) + +DO_RSRA(gvec_ursra_b, uint8_t) +DO_RSRA(gvec_ursra_h, uint16_t) +DO_RSRA(gvec_ursra_s, uint32_t) +DO_RSRA(gvec_ursra_d, uint64_t) + +#undef DO_RSRA + +#define DO_SRI(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_SRI(gvec_sri_b, uint8_t) +DO_SRI(gvec_sri_h, uint16_t) +DO_SRI(gvec_sri_s, uint32_t) +DO_SRI(gvec_sri_d, uint64_t) + +#undef DO_SRI + +#define DO_SLI(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_SLI(gvec_sli_b, uint8_t) +DO_SLI(gvec_sli_h, uint16_t) +DO_SLI(gvec_sli_s, uint32_t) +DO_SLI(gvec_sli_d, uint64_t) + +#undef DO_SLI + /* * Convert float16 to float32, raising no exceptions and * preserving exceptional values, including SNaN. @@ -1263,3 +1382,76 @@ void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) } } #endif + +#define DO_CMP0(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \ + TYPE nn = *(TYPE *)((char*)vn + i); \ + *(TYPE *)((char*)vd + i) = -(nn OP 0); \ + } \ + clear_tail(vd, opr_sz, simd_maxsz(desc)); \ +} + +DO_CMP0(gvec_ceq0_b, int8_t, ==) +DO_CMP0(gvec_clt0_b, int8_t, <) +DO_CMP0(gvec_cle0_b, int8_t, <=) +DO_CMP0(gvec_cgt0_b, int8_t, >) +DO_CMP0(gvec_cge0_b, int8_t, >=) + +DO_CMP0(gvec_ceq0_h, int16_t, ==) +DO_CMP0(gvec_clt0_h, int16_t, <) +DO_CMP0(gvec_cle0_h, int16_t, <=) +DO_CMP0(gvec_cgt0_h, int16_t, >) +DO_CMP0(gvec_cge0_h, int16_t, >=) + +#undef DO_CMP0 + +#define DO_ABD(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + \ + for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \ + d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +DO_ABD(gvec_sabd_b, int8_t) +DO_ABD(gvec_sabd_h, int16_t) +DO_ABD(gvec_sabd_s, int32_t) +DO_ABD(gvec_sabd_d, int64_t) + +DO_ABD(gvec_uabd_b, uint8_t) +DO_ABD(gvec_uabd_h, uint16_t) +DO_ABD(gvec_uabd_s, uint32_t) +DO_ABD(gvec_uabd_d, uint64_t) + +#undef DO_ABD + +#define DO_ABA(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + \ + for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \ + d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +DO_ABA(gvec_saba_b, int8_t) +DO_ABA(gvec_saba_h, int16_t) +DO_ABA(gvec_saba_s, int32_t) +DO_ABA(gvec_saba_d, int64_t) + +DO_ABA(gvec_uaba_b, uint8_t) +DO_ABA(gvec_uaba_h, uint16_t) +DO_ABA(gvec_uaba_s, uint32_t) +DO_ABA(gvec_uaba_d, uint64_t) + +#undef DO_ABA diff --git a/qemu/target/arm/vec_internal.h b/qemu/target/arm/vec_internal.h new file mode 100644 index 0000000000..3aa74b0151 --- /dev/null +++ b/qemu/target/arm/vec_internal.h @@ -0,0 +1,33 @@ +/* + * ARM AdvSIMD / SVE Vector Helpers + * + * Copyright (c) 2020 Linaro + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef TARGET_ARM_VEC_INTERNALS_H +#define TARGET_ARM_VEC_INTERNALS_H + +static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) +{ + uint64_t *d = (uint64_t *)((char*)vd + opr_sz); + uintptr_t i; + + for (i = opr_sz; i < max_sz; i += 8) { + *d++ = 0; + } +} + +#endif /* TARGET_ARM_VEC_INTERNALS_H */ diff --git a/qemu/target/arm/vfp_helper.c b/qemu/target/arm/vfp_helper.c index 55bce5957a..753b5ed5e2 100644 --- a/qemu/target/arm/vfp_helper.c +++ b/qemu/target/arm/vfp_helper.c @@ -262,7 +262,7 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) return float64_sqrt(a, &env->vfp.fp_status); } -static void softfloat_to_vfp_compare(CPUARMState *env, int cmp) +static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) { uint32_t flags = 0; switch (cmp) { @@ -536,7 +536,7 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode) * it would affect flushing input denormals. */ float_status *fpst = fpstp; - flag save = get_flush_inputs_to_zero(fpst); + bool save = get_flush_inputs_to_zero(fpst); set_flush_inputs_to_zero(false, fpst); float32 r = float16_to_float32(a, !ahp_mode, fpst); set_flush_inputs_to_zero(save, fpst); @@ -549,7 +549,7 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode) * it would affect flushing output denormals. */ float_status *fpst = fpstp; - flag save = get_flush_to_zero(fpst); + bool save = get_flush_to_zero(fpst); set_flush_to_zero(false, fpst); float16 r = float32_to_float16(a, !ahp_mode, fpst); set_flush_to_zero(save, fpst); @@ -562,7 +562,7 @@ float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode) * it would affect flushing input denormals. */ float_status *fpst = fpstp; - flag save = get_flush_inputs_to_zero(fpst); + bool save = get_flush_inputs_to_zero(fpst); set_flush_inputs_to_zero(false, fpst); float64 r = float16_to_float64(a, !ahp_mode, fpst); set_flush_inputs_to_zero(save, fpst); @@ -575,7 +575,7 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode) * it would affect flushing output denormals. */ float_status *fpst = fpstp; - flag save = get_flush_to_zero(fpst); + bool save = get_flush_to_zero(fpst); set_flush_to_zero(false, fpst); float16 r = float64_to_float16(a, !ahp_mode, fpst); set_flush_to_zero(save, fpst); @@ -586,7 +586,7 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode) #define float32_three make_float32(0x40400000) #define float32_one_point_five make_float32(0x3fc00000) -float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env) +float32 HELPER(recps_f32)(CPUARMState *env, float32 a, float32 b) { float_status *s = &env->vfp.standard_fp_status; if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || @@ -599,7 +599,7 @@ float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env) return float32_sub(float32_two, float32_mul(a, b, s), s); } -float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env) +float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b) { float_status *s = &env->vfp.standard_fp_status; float32 product; @@ -702,11 +702,9 @@ static bool round_to_inf(float_status *fpst, bool sign_bit) return sign_bit; case float_round_to_zero: /* Round to Zero */ return false; + default: + g_assert_not_reached(); } - - g_assert_not_reached(); - // never reach here - return false; } uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp) @@ -1030,9 +1028,8 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) return make_float64(val); } -uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp) +uint32_t HELPER(recpe_u32)(uint32_t a) { - /* float_status *s = fpstp; */ int input, estimate; if ((a & 0x80000000) == 0) { @@ -1045,7 +1042,7 @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp) return deposit32(0, (32 - 9), 9, estimate); } -uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp) +uint32_t HELPER(rsqrte_u32)(uint32_t a) { int estimate; diff --git a/qemu/target/i386/cpu.c b/qemu/target/i386/cpu.c index 86103b09e3..0cdd7a1ed5 100644 --- a/qemu/target/i386/cpu.c +++ b/qemu/target/i386/cpu.c @@ -852,10 +852,10 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = CPUID_FEATURE_WORD, .feat_names = { NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", - NULL, NULL, NULL, NULL, - NULL, NULL, "md-clear", NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL /* pconfig */, NULL, + "fsrm", NULL, NULL, NULL, + "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, "serialize", NULL, + "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "spec-ctrl", "stibp", NULL, "arch-capabilities", "core-capability", "ssbd", @@ -1001,6 +1001,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .index = MSR_IA32_CORE_CAPABILITY, }, }, + [FEAT_PERF_CAPABILITIES] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, "full-width-write", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_PERF_CAPABILITIES, + }, + }, [FEAT_VMX_PROCBASED_CTLS] = { .type = MSR_FEATURE_WORD, @@ -2722,6 +2738,13 @@ static X86CPUDefinition builtin_x86_defs[] = { { NULL /* end of list */ } } }, + { + .version = 4, + .props = (PropValue[]) { + { "vmx-eptp-switching", "on" }, + { /* end of list */ } + } + }, { 0 /* end of list */ } } }, @@ -2827,6 +2850,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .versions = (X86CPUVersionDefinition[]) { { .version = 1 }, { .version = 2, + .note = "ARCH_CAPABILITIES", .props = (PropValue[]) { { "arch-capabilities", "on" }, { "rdctl-no", "on" }, @@ -2838,12 +2862,20 @@ static X86CPUDefinition builtin_x86_defs[] = { }, { .version = 3, .alias = "Cascadelake-Server-noTSX", + .note = "ARCH_CAPABILITIES, no TSX", .props = (PropValue[]) { { "hle", "off" }, { "rtm", "off" }, { NULL /* end of list */ } }, }, + { .version = 4, + .note = "ARCH_CAPABILITIES, no TSX", + .props = (PropValue[]) { + { "vmx-eptp-switching", "on" }, + { /* end of list */ } + }, + }, { 0 /* end of list */ } } }, @@ -3059,6 +3091,7 @@ static X86CPUDefinition builtin_x86_defs[] = { { .version = 1 }, { .version = 2, + .note = "no TSX", .alias = "Icelake-Client-noTSX", .props = (PropValue[]) { { "hle", "off" }, @@ -3196,6 +3229,20 @@ static X86CPUDefinition builtin_x86_defs[] = { { NULL /* end of list */ } }, }, + { + .version = 4, + .props = (PropValue[]) { + { "sha-ni", "on" }, + { "avx512ifma", "on" }, + { "rdpid", "on" }, + { "fsrm", "on" }, + { "vmx-rdseed-exit", "on" }, + { "vmx-pml", "on" }, + { "vmx-eptp-switching", "on" }, + { "model", "106" }, + { /* end of list */ } + }, + }, { 0 /* end of list */ } } }, @@ -3296,6 +3343,7 @@ static X86CPUDefinition builtin_x86_defs[] = { { .version = 1 }, { .version = 2, + .note = "no MPX, no MONITOR", .props = (PropValue[]) { { "monitor", "off" }, { "mpx", "off" }, @@ -3828,16 +3876,6 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, } } -/* Convert all '_' in a feature string option name to '-', to make feature - * name conform to QOM property naming rule, which uses '-' instead of '_'. - */ -static inline void feat2prop(char *s) -{ - while ((s = strchr(s, '_'))) { - *s = '-'; - } -} - static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, @@ -3888,6 +3926,13 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) } x86_cpuid_set_vendor(cpu, def->vendor); + + /* + * Properties in versioned CPU model are not user specified features. + * We can simply clear env->user_features here since it will be filled later + * in x86_cpu_expand_features() based on plus_features and minus_features. + */ + memset(&env->user_features, 0, sizeof(env->user_features)); } void cpu_clear_apic_feature(CPUX86State *env) @@ -4042,6 +4087,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx |= (cs->nr_cores * cs->nr_threads) << 16; *edx |= CPUID_HT; } + if (!cpu->enable_pmu) { + *ecx &= ~CPUID_EXT_PDCM; + } break; case 2: /* cache info: needed for Pentium Pro compatibility */ @@ -4330,9 +4378,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, host_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | \ + *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); - *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | \ + *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache); *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache); @@ -4343,13 +4391,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, host_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | \ + *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | (L2_DTLB_2M_ENTRIES << 16) | \ - (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | \ + (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | (L2_ITLB_2M_ENTRIES); - *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | \ + *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | (L2_DTLB_4K_ENTRIES << 16) | \ - (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | \ + (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | (L2_ITLB_4K_ENTRIES); encode_cache_cpuid80000006(env->cache_info_amd.l2_cache, cpu->enable_l3_cache ? @@ -4376,11 +4424,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = cpu->phys_bits; } *ebx = env->features[FEAT_8000_0008_EBX]; - *ecx = 0; - *edx = 0; if (cs->nr_cores * cs->nr_threads > 1) { - *ecx |= (cs->nr_cores * cs->nr_threads) - 1; + /* + * Bits 15:12 is "The number of bits in the initial + * Core::X86::Apic::ApicId[ApicId] value that indicate + * thread ID within a package". This is already stored at + * CPUX86State::pkg_offset. + * Bits 7:0 is "The number of threads in the package is NC+1" + */ + *ecx = (env->pkg_offset << 12) | + ((cs->nr_cores * cs->nr_threads) - 1); + } else { + *ecx = 0; } + *edx = 0; break; case 0x8000000A: if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) { @@ -4478,6 +4535,7 @@ static void x86_cpu_reset(CPUState *dev) /* init to reset state */ env->hflags2 |= HF2_GIF_MASK; + env->hflags &= ~HF_GUEST_MASK; cpu_x86_update_cr0(env, 0x60000010); env->a20_mask = ~0x0; @@ -4707,7 +4765,7 @@ static void x86_cpu_expand_features(X86CPU *cpu) */ env->features[w] |= x86_cpu_get_supported_feature_word(w, cpu->migratable) & - ~env->user_features[w] & \ + ~env->user_features[w] & ~feature_word_info[w].no_autoenable_flags; } } @@ -4740,7 +4798,7 @@ static void x86_cpu_expand_features(X86CPU *cpu) // TODO: Add a warning? // mark_unavailable_features(cpu, FEAT_7_0_EBX, // CPUID_7_0_EBX_INTEL_PT, - // "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,level=0x14\""); + // "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,min-level=0x14\""); } } diff --git a/qemu/target/i386/cpu.h b/qemu/target/i386/cpu.h index 10d93b89ac..bc0664ffde 100644 --- a/qemu/target/i386/cpu.h +++ b/qemu/target/i386/cpu.h @@ -352,6 +352,8 @@ typedef enum X86Seg { #define MSR_IA32_ARCH_CAPABILITIES 0x10a #define ARCH_CAP_TSX_CTRL_MSR (1<<7) +#define MSR_IA32_PERF_CAPABILITIES 0x345 + #define MSR_IA32_TSX_CTRL 0x122 #define MSR_IA32_TSCDEADLINE 0x6e0 @@ -525,6 +527,7 @@ typedef enum FeatureWord { FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ FEAT_ARCH_CAPABILITIES, FEAT_CORE_CAPABILITY, + FEAT_PERF_CAPABILITIES, FEAT_VMX_PROCBASED_CTLS, FEAT_VMX_SECONDARY_CTLS, FEAT_VMX_PINBASED_CTLS, @@ -768,6 +771,14 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Multiply Accumulation Single Precision */ #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) +/* Fast Short Rep Mov */ +#define CPUID_7_0_EDX_FSRM (1U << 4) +/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ +#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) +/* SERIALIZE instruction */ +#define CPUID_7_0_EDX_SERIALIZE (1U << 14) +/* TSX Suspend Load Address Tracking instruction */ +#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) /* Speculation Control */ #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Single Thread Indirect Branch Predictors */ @@ -1571,6 +1582,7 @@ typedef struct CPUX86State { bool tsc_valid; int64_t tsc_khz; int64_t user_tsc_khz; /* for sanity check only */ + uint64_t apic_bus_freq; uint64_t mcg_cap; uint64_t mcg_ctl; @@ -2043,6 +2055,11 @@ static inline bool cpu_has_vmx(CPUX86State *env) return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; } +static inline bool cpu_has_svm(CPUX86State *env) +{ + return env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM; +} + /* * In order for a vCPU to enter VMX operation it must have CR4.VMXE set. * Since it was set, CR4.VMXE must remain set as long as vCPU is in @@ -2068,6 +2085,7 @@ static inline bool cpu_vmx_maybe_enabled(CPUX86State *env) /* fpu_helper.c */ void update_fp_status(CPUX86State *env); void update_mxcsr_status(CPUX86State *env); +void update_mxcsr_from_sse_status(CPUX86State *env); static inline void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr) { diff --git a/qemu/target/i386/excp_helper.c b/qemu/target/i386/excp_helper.c index cca25d322e..800c75dffd 100644 --- a/qemu/target/i386/excp_helper.c +++ b/qemu/target/i386/excp_helper.c @@ -261,8 +261,8 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type, } ptep = pde | PG_NX_MASK; - /* if PSE bit is set, then we use a 4MB page */ - if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { + /* if host cr4 PSE bit is set, then we use a 4MB page */ + if ((pde & PG_PSE_MASK) && (env->nested_pg_mode & SVM_NPT_PSE)) { page_size = 4096 * 1024; pte_addr = pde_addr; diff --git a/qemu/target/i386/fpu_helper.c b/qemu/target/i386/fpu_helper.c index b3f537000f..ea121fbfe5 100644 --- a/qemu/target/i386/fpu_helper.c +++ b/qemu/target/i386/fpu_helper.c @@ -25,6 +25,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "fpu/softfloat.h" +#include "fpu/softfloat-macros.h" #define FPU_RC_MASK 0xc00 #define FPU_RC_NEAR 0x000 @@ -55,8 +56,13 @@ #define FPUC_EM 0x3f #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) +#define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) +#define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) +#define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) +#define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) +#define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) static void cpu_clear_ignne(CPUX86State *env) { @@ -141,12 +147,32 @@ static void fpu_set_exception(CPUX86State *env, int mask) } } +static inline uint8_t save_exception_flags(CPUX86State *env) +{ + uint8_t old_flags = get_float_exception_flags(&env->fp_status); + set_float_exception_flags(0, &env->fp_status); + return old_flags; +} + +static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) +{ + uint8_t new_flags = get_float_exception_flags(&env->fp_status); + float_raise(old_flags, &env->fp_status); + fpu_set_exception(env, + ((new_flags & float_flag_invalid ? FPUS_IE : 0) | + (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | + (new_flags & float_flag_overflow ? FPUS_OE : 0) | + (new_flags & float_flag_underflow ? FPUS_UE : 0) | + (new_flags & float_flag_inexact ? FPUS_PE : 0) | + (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); +} + static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) { - if (floatx80_is_zero(b)) { - fpu_set_exception(env, FPUS_ZE); - } - return floatx80_div(a, b, &env->fp_status); + uint8_t old_flags = save_exception_flags(env); + floatx80 ret = floatx80_div(a, b, &env->fp_status); + merge_exception_flags(env, old_flags); + return ret; } static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) @@ -158,6 +184,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) void helper_flds_FT0(CPUX86State *env, uint32_t val) { + uint8_t old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -165,10 +192,12 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val) u.i = val; FT0 = float32_to_floatx80(u.f, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fldl_FT0(CPUX86State *env, uint64_t val) { + uint8_t old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -176,6 +205,7 @@ void helper_fldl_FT0(CPUX86State *env, uint64_t val) u.i = val; FT0 = float64_to_floatx80(u.f, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fildl_FT0(CPUX86State *env, int32_t val) @@ -185,6 +215,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val) void helper_flds_ST0(CPUX86State *env, uint32_t val) { + uint8_t old_flags = save_exception_flags(env); int new_fpstt; union { float32 f; @@ -196,10 +227,12 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val) env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); env->fpstt = new_fpstt; env->fptags[new_fpstt] = 0; /* validate stack entry */ + merge_exception_flags(env, old_flags); } void helper_fldl_ST0(CPUX86State *env, uint64_t val) { + uint8_t old_flags = save_exception_flags(env); int new_fpstt; union { float64 f; @@ -211,6 +244,7 @@ void helper_fldl_ST0(CPUX86State *env, uint64_t val) env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); env->fpstt = new_fpstt; env->fptags[new_fpstt] = 0; /* validate stack entry */ + merge_exception_flags(env, old_flags); } void helper_fildl_ST0(CPUX86State *env, int32_t val) @@ -235,90 +269,108 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val) uint32_t helper_fsts_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); union { float32 f; uint32_t i; } u; u.f = floatx80_to_float32(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); return u.i; } uint64_t helper_fstl_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); union { float64 f; uint64_t i; } u; u.f = floatx80_to_float64(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); return u.i; } int32_t helper_fist_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); if (val != (int16_t)val) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); val = -32768; } + merge_exception_flags(env, old_flags); return val; } int32_t helper_fistl_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int32_t val; - signed char old_exp_flags; - - old_exp_flags = get_float_exception_flags(&env->fp_status); - set_float_exception_flags(0, &env->fp_status); val = floatx80_to_int32(ST0, &env->fp_status); if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { val = 0x80000000; } - set_float_exception_flags(get_float_exception_flags(&env->fp_status) - | old_exp_flags, &env->fp_status); + merge_exception_flags(env, old_flags); return val; } int64_t helper_fistll_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int64_t val; - signed char old_exp_flags; - - old_exp_flags = get_float_exception_flags(&env->fp_status); - set_float_exception_flags(0, &env->fp_status); val = floatx80_to_int64(ST0, &env->fp_status); if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { val = 0x8000000000000000ULL; } - set_float_exception_flags(get_float_exception_flags(&env->fp_status) - | old_exp_flags, &env->fp_status); + merge_exception_flags(env, old_flags); return val; } int32_t helper_fistt_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); if (val != (int16_t)val) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); val = -32768; } + merge_exception_flags(env, old_flags); return val; } int32_t helper_fisttl_ST0(CPUX86State *env) { - return floatx80_to_int32_round_to_zero(ST0, &env->fp_status); + uint8_t old_flags = save_exception_flags(env); + int32_t val; + + val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { + val = 0x80000000; + } + merge_exception_flags(env, old_flags); + return val; } int64_t helper_fisttll_ST0(CPUX86State *env) { - return floatx80_to_int64_round_to_zero(ST0, &env->fp_status); + uint8_t old_flags = save_exception_flags(env); + int64_t val; + + val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { + val = 0x8000000000000000ULL; + } + merge_exception_flags(env, old_flags); + return val; } void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) @@ -400,62 +452,78 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { - int ret; + uint8_t old_flags = save_exception_flags(env); + FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; + merge_exception_flags(env, old_flags); } void helper_fucom_ST0_FT0(CPUX86State *env) { - int ret; + uint8_t old_flags = save_exception_flags(env); + FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; + merge_exception_flags(env, old_flags); } static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_fcomi_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int eflags; - int ret; + FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); eflags = cpu_cc_compute_all(env, CC_OP); eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; CC_SRC = eflags; + merge_exception_flags(env, old_flags); } void helper_fucomi_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int eflags; - int ret; + FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); eflags = cpu_cc_compute_all(env, CC_OP); eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; CC_SRC = eflags; + merge_exception_flags(env, old_flags); } void helper_fadd_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_add(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fmul_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_mul(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsub_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_sub(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsubr_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_sub(FT0, ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fdiv_ST0_FT0(CPUX86State *env) @@ -472,22 +540,30 @@ void helper_fdivr_ST0_FT0(CPUX86State *env) void helper_fadd_STN_ST0(CPUX86State *env, int st_index) { + uint8_t old_flags = save_exception_flags(env); ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fmul_STN_ST0(CPUX86State *env, int st_index) { + uint8_t old_flags = save_exception_flags(env); ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsub_STN_ST0(CPUX86State *env, int st_index) { + uint8_t old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) { + uint8_t old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) @@ -519,58 +595,81 @@ void helper_fabs_ST0(CPUX86State *env) void helper_fld1_ST0(CPUX86State *env) { - //ST0 = floatx80_one; - floatx80 one = { 0x8000000000000000LL, 0x3fff }; - ST0 = one; + ST0 = floatx80_one; } void helper_fldl2t_ST0(CPUX86State *env) { - //ST0 = floatx80_l2t; - floatx80 l2t = { 0xd49a784bcd1b8afeLL, 0x4000 }; - ST0 = l2t; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_UP: + ST0 = floatx80_l2t_u; + break; + default: + ST0 = floatx80_l2t; + break; + } } void helper_fldl2e_ST0(CPUX86State *env) { - //ST0 = floatx80_l2e; - floatx80 l2e = { 0xb8aa3b295c17f0bcLL, 0x3fff }; - ST0 = l2e; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_l2e_d; + break; + default: + ST0 = floatx80_l2e; + break; + } } void helper_fldpi_ST0(CPUX86State *env) { - //ST0 = floatx80_pi; - floatx80 pi = { 0xc90fdaa22168c235LL, 0x4000 }; - ST0 = pi; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_pi_d; + break; + default: + ST0 = floatx80_pi; + break; + } } void helper_fldlg2_ST0(CPUX86State *env) { - //ST0 = floatx80_lg2; - floatx80 lg2 = { 0x9a209a84fbcff799LL, 0x3ffd }; - ST0 = lg2; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_lg2_d; + break; + default: + ST0 = floatx80_lg2; + break; + } } void helper_fldln2_ST0(CPUX86State *env) { - //ST0 = floatx80_ln2; - floatx80 ln2 = { 0xb17217f7d1cf79acLL, 0x3ffe }; - ST0 = ln2; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_ln2_d; + break; + default: + ST0 = floatx80_ln2; + break; + } } void helper_fldz_ST0(CPUX86State *env) { - //ST0 = floatx80_zero; - floatx80 zero = { 0x0000000000000000LL, 0x0000 }; - ST0 = zero; + ST0 = floatx80_zero; } void helper_fldz_FT0(CPUX86State *env) { - //FT0 = floatx80_zero; - floatx80 zero = { 0x0000000000000000LL, 0x0000 }; - FT0 = zero; + FT0 = floatx80_zero; } uint32_t helper_fnstsw(CPUX86State *env) @@ -684,18 +783,31 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) { + uint8_t old_flags = save_exception_flags(env); int v; target_ulong mem_ref, mem_end; int64_t val; + CPU_LDoubleU temp; + + temp.d = ST0; val = floatx80_to_int64(ST0, &env->fp_status); mem_ref = ptr; + if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); + while (mem_ref < ptr + 7) { + cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + } + cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); + cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + merge_exception_flags(env, old_flags); + return; + } mem_end = mem_ref + 9; - if (val < 0) { + if (SIGND(temp)) { cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); - if (val != 0x8000000000000000LL) { - val = -val; - } + val = -val; } else { cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); } @@ -705,35 +817,399 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) } v = val % 100; val = val / 100; - v = (int)((unsigned int)(v / 10) << 4) | (v % 10); + v = ((v / 10) << 4) | (v % 10); cpu_stb_data_ra(env, mem_ref++, v, GETPC()); } while (mem_ref < mem_end) { cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); } + merge_exception_flags(env, old_flags); } -void helper_f2xm1(CPUX86State *env) -{ - double val = floatx80_to_double(env, ST0); +/* 128-bit significand of log(2). */ +#define ln2_sig_high 0xb17217f7d1cf79abULL +#define ln2_sig_low 0xc9e3b39803f2f6afULL - val = pow(2.0, val) - 1.0; - ST0 = double_to_floatx80(env, val); -} +/* + * Polynomial coefficients for an approximation to (2^x - 1) / x, on + * the interval [-1/64, 1/64]. + */ +#define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) +#define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) +#define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) +#define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) +#define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) +#define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) +#define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) +#define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) +#define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) + +struct f2xm1_data { + /* + * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 + * are very close to exact floatx80 values. + */ + floatx80 t; + /* The value of 2^t. */ + floatx80 exp2; + /* The value of 2^t - 1. */ + floatx80 exp2m1; +}; + +static const struct f2xm1_data f2xm1_table[65] = { + { make_floatx80_init(0xbfff, 0x8000000000000000ULL), + make_floatx80_init(0x3ffe, 0x8000000000000000ULL), + make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, + { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), + make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), + make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, + { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), + make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), + make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, + { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), + make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), + make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, + { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), + make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), + make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, + { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), + make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), + make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, + { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), + make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), + make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, + { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), + make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), + make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, + { make_floatx80_init(0xbffe, 0xc000000000006530ULL), + make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), + make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, + { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), + make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), + make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, + { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), + make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), + make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, + { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), + make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), + make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, + { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), + make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), + make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, + { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), + make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), + make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, + { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), + make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), + make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, + { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), + make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), + make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, + { make_floatx80_init(0xbffe, 0x800000000000227dULL), + make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), + make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, + { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), + make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), + make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, + { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), + make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), + make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, + { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), + make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), + make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, + { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), + make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), + make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, + { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), + make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), + make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, + { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), + make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), + make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, + { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), + make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), + make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, + { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), + make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), + make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, + { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), + make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), + make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, + { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), + make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), + make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, + { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), + make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), + make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, + { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), + make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), + make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, + { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), + make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), + make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, + { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), + make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), + make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, + { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), + make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), + make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, + { floatx80_zero_init, + make_floatx80_init(0x3fff, 0x8000000000000000ULL), + floatx80_zero_init }, + { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), + make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), + make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, + { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), + make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), + make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, + { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), + make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), + make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, + { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), + make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), + make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, + { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), + make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), + make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, + { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), + make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), + make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, + { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), + make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), + make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, + { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), + make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), + make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, + { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), + make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), + make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, + { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), + make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), + make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, + { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), + make_floatx80_init(0x3fff, 0xa27043030c49370aULL), + make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, + { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), + make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), + make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, + { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), + make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), + make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, + { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), + make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), + make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, + { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), + make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), + make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, + { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), + make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), + make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, + { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), + make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), + make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, + { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), + make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), + make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, + { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), + make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), + make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, + { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), + make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), + make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, + { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), + make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), + make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, + { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), + make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), + make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, + { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), + make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), + make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, + { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), + make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), + make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, + { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), + make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), + make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, + { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), + make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), + make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, + { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), + make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), + make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, + { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), + make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), + make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, + { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), + make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), + make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, + { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), + make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), + make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, + { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), + make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), + make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, + { make_floatx80_init(0x3fff, 0x8000000000000000ULL), + make_floatx80_init(0x4000, 0x8000000000000000ULL), + make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, +}; -void helper_fyl2x(CPUX86State *env) +void helper_f2xm1(CPUX86State *env) { - double fptemp = floatx80_to_double(env, ST0); - - if (fptemp > 0.0) { - fptemp = log(fptemp) / log(2.0); /* log2(ST) */ - fptemp *= floatx80_to_double(env, ST1); - ST1 = double_to_floatx80(env, fptemp); - fpop(env); + uint8_t old_flags = save_exception_flags(env); + uint64_t sig = extractFloatx80Frac(ST0); + int32_t exp = extractFloatx80Exp(ST0); + bool sign = extractFloatx80Sign(ST0); + + if (floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + } else if (exp > 0x3fff || + (exp == 0x3fff && sig != (0x8000000000000000ULL))) { + /* Out of range for the instruction, treat as invalid. */ + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (exp == 0x3fff) { + /* Argument 1 or -1, exact result 1 or -0.5. */ + if (sign) { + ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); + } + } else if (exp < 0x3fb0) { + if (!floatx80_is_zero(ST0)) { + /* + * Multiplying the argument by an extra-precision version + * of log(2) is sufficiently precise. Zero arguments are + * returned unchanged. + */ + uint64_t sig0, sig1, sig2; + if (exp == 0) { + normalizeFloatx80Subnormal(sig, &exp, &sig); + } + mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, + &sig2); + /* This result is inexact. */ + sig1 |= 1; + ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1, + &env->fp_status); + } } else { - env->fpus &= ~0x4700; - env->fpus |= 0x400; + floatx80 tmp, y, accum; + bool asign, bsign; + int32_t n, aexp, bexp; + uint64_t asig0, asig1, asig2, bsig0, bsig1; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = 80; + + /* Find the nearest multiple of 1/32 to the argument. */ + tmp = floatx80_scalbn(ST0, 5, &env->fp_status); + n = 32 + floatx80_to_int32(tmp, &env->fp_status); + y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); + + if (floatx80_is_zero(y)) { + /* + * Use the value of 2^t - 1 from the table, to avoid + * needing to special-case zero as a result of + * multiplication below. + */ + ST0 = f2xm1_table[n].t; + set_float_exception_flags(float_flag_inexact, &env->fp_status); + env->fp_status.float_rounding_mode = save_mode; + } else { + /* + * Compute the lower parts of a polynomial expansion for + * (2^y - 1) / y. + */ + accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); + + /* + * The full polynomial expansion is f2xm1_coeff_0 + accum + * (where accum has much lower magnitude, and so, in + * particular, carry out of the addition is not possible). + * (This expansion is only accurate to about 70 bits, not + * 128 bits.) + */ + aexp = extractFloatx80Exp(f2xm1_coeff_0); + asign = extractFloatx80Sign(f2xm1_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + bsig0 = extractFloatx80Frac(f2xm1_coeff_0); + bsig1 = 0; + if (asign == extractFloatx80Sign(accum)) { + add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } + /* And thus compute an approximation to 2^y - 1. */ + mul128By64To192(asig0, asig1, extractFloatx80Frac(y), + &asig0, &asig1, &asig2); + aexp += extractFloatx80Exp(y) - 0x3ffe; + asign ^= extractFloatx80Sign(y); + if (n != 32) { + /* + * Multiply this by the precomputed value of 2^t and + * add that of 2^t - 1. + */ + mul128By64To192(asig0, asig1, + extractFloatx80Frac(f2xm1_table[n].exp2), + &asig0, &asig1, &asig2); + aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; + bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); + bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); + bsig1 = 0; + if (bexp < aexp) { + shift128RightJamming(bsig0, bsig1, aexp - bexp, + &bsig0, &bsig1); + } else if (aexp < bexp) { + shift128RightJamming(asig0, asig1, bexp - aexp, + &asig0, &asig1); + aexp = bexp; + } + /* The sign of 2^t - 1 is always that of the result. */ + bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); + if (asign == bsign) { + /* Avoid possible carry out of the addition. */ + shift128RightJamming(asig0, asig1, 1, + &asig0, &asig1); + shift128RightJamming(bsig0, bsig1, 1, + &bsig0, &bsig1); + ++aexp; + add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + asign = bsign; + } + } + env->fp_status.float_rounding_mode = save_mode; + /* This result is inexact. */ + asig1 |= 1; + ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1, + &env->fp_status); + } + + env->fp_status.floatx80_rounding_precision = save_prec; } + merge_exception_flags(env, old_flags); } void helper_fptan(CPUX86State *env) @@ -743,194 +1219,996 @@ void helper_fptan(CPUX86State *env) if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { env->fpus |= 0x400; } else { - floatx80 one = { 0x8000000000000000LL, 0x3fff }; fptemp = tan(fptemp); ST0 = double_to_floatx80(env, fptemp); fpush(env); - ST0 = one; + ST0 = floatx80_one; env->fpus &= ~0x400; /* C2 <-- 0 */ /* the above code is for |arg| < 2**52 only */ } } +/* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ +#define pi_4_exp 0x3ffe +#define pi_4_sig_high 0xc90fdaa22168c234ULL +#define pi_4_sig_low 0xc4c6628b80dc1cd1ULL +#define pi_2_exp 0x3fff +#define pi_2_sig_high 0xc90fdaa22168c234ULL +#define pi_2_sig_low 0xc4c6628b80dc1cd1ULL +#define pi_34_exp 0x4000 +#define pi_34_sig_high 0x96cbe3f9990e91a7ULL +#define pi_34_sig_low 0x9394c9e8a0a5159dULL +#define pi_exp 0x4000 +#define pi_sig_high 0xc90fdaa22168c234ULL +#define pi_sig_low 0xc4c6628b80dc1cd1ULL + +/* + * Polynomial coefficients for an approximation to atan(x), with only + * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike + * for some other approximations, no low part is needed for the first + * coefficient here to achieve a sufficiently accurate result, because + * the coefficient in this minimax approximation is very close to + * exactly 1.) + */ +#define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) +#define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) +#define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) +#define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) +#define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) +#define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) +#define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) + +struct fpatan_data { + /* High and low parts of atan(x). */ + floatx80 atan_high, atan_low; +}; + +static const struct fpatan_data fpatan_table[9] = { + { floatx80_zero_init, + floatx80_zero_init }, + { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), + make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, + { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), + make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, + { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), + make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, + { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), + make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, + { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), + make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, + { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), + make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, + { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), + make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, + { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), + make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, +}; + void helper_fpatan(CPUX86State *env) { - double fptemp, fpsrcop; + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (floatx80_is_zero(ST1) && !arg0_sign) { + /* Pass this zero through. */ + } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || + arg0_exp - arg1_exp >= 80) && + !arg0_sign) { + /* + * Dividing ST1 by ST0 gives the correct result up to + * rounding, and avoids spurious underflow exceptions that + * might result from passing some small values through the + * polynomial approximation, but if a finite nonzero result of + * division is exact, the result of fpatan is still inexact + * (and underflowing where appropriate). + */ + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.floatx80_rounding_precision = 80; + ST1 = floatx80_div(ST1, ST0, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save_prec; + if (!floatx80_is_zero(ST1) && + !(get_float_exception_flags(&env->fp_status) & + float_flag_inexact)) { + /* + * The mathematical result is very slightly closer to zero + * than this exact result. Round a value with the + * significand adjusted accordingly to get the correct + * exceptions, and possibly an adjusted result depending + * on the rounding mode. + */ + uint64_t sig = extractFloatx80Frac(ST1); + int32_t exp = extractFloatx80Exp(ST1); + bool sign = extractFloatx80Sign(ST1); + if (exp == 0) { + normalizeFloatx80Subnormal(sig, &exp, &sig); + } + ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1, + -1, &env->fp_status); + } + } else { + /* The result is inexact. */ + bool rsign = arg1_sign; + int32_t rexp; + uint64_t rsig0, rsig1; + if (floatx80_is_zero(ST1)) { + /* + * ST0 is negative. The result is pi with the sign of + * ST1. + */ + rexp = pi_exp; + rsig0 = pi_sig_high; + rsig1 = pi_sig_low; + } else if (floatx80_is_infinity(ST1)) { + if (floatx80_is_infinity(ST0)) { + if (arg0_sign) { + rexp = pi_34_exp; + rsig0 = pi_34_sig_high; + rsig1 = pi_34_sig_low; + } else { + rexp = pi_4_exp; + rsig0 = pi_4_sig_high; + rsig1 = pi_4_sig_low; + } + } else { + rexp = pi_2_exp; + rsig0 = pi_2_sig_high; + rsig1 = pi_2_sig_low; + } + } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { + rexp = pi_2_exp; + rsig0 = pi_2_sig_high; + rsig1 = pi_2_sig_low; + } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { + /* ST0 is negative. */ + rexp = pi_exp; + rsig0 = pi_sig_high; + rsig1 = pi_sig_low; + } else { + /* + * ST0 and ST1 are finite, nonzero and with exponents not + * too far apart. + */ + int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; + int32_t azexp, axexp; + bool adj_sub, ysign, zsign; + uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; + uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; + uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; + uint64_t azsig0, azsig1; + uint64_t azsig2, azsig3, axsig0, axsig1; + floatx80 x8; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = 80; + + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + if (arg0_exp > arg1_exp || + (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { + /* Work with abs(ST1) / abs(ST0). */ + num_exp = arg1_exp; + num_sig = arg1_sig; + den_exp = arg0_exp; + den_sig = arg0_sig; + if (arg0_sign) { + /* The result is subtracted from pi. */ + adj_exp = pi_exp; + adj_sig0 = pi_sig_high; + adj_sig1 = pi_sig_low; + adj_sub = true; + } else { + /* The result is used as-is. */ + adj_exp = 0; + adj_sig0 = 0; + adj_sig1 = 0; + adj_sub = false; + } + } else { + /* Work with abs(ST0) / abs(ST1). */ + num_exp = arg0_exp; + num_sig = arg0_sig; + den_exp = arg1_exp; + den_sig = arg1_sig; + /* The result is added to or subtracted from pi/2. */ + adj_exp = pi_2_exp; + adj_sig0 = pi_2_sig_high; + adj_sig1 = pi_2_sig_low; + adj_sub = !arg0_sign; + } + + /* + * Compute x = num/den, where 0 < x <= 1 and x is not too + * small. + */ + xexp = num_exp - den_exp + 0x3ffe; + remsig0 = num_sig; + remsig1 = 0; + if (den_sig <= remsig0) { + shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); + ++xexp; + } + xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); + mul64To128(den_sig, xsig0, &msig0, &msig1); + sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); + while ((int64_t) remsig0 < 0) { + --xsig0; + add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); + } + xsig1 = estimateDiv128To64(remsig1, 0, den_sig); + /* + * No need to correct any estimation error in xsig1; even + * with such error, it is accurate enough. + */ + + /* + * Split x as x = t + y, where t = n/8 is the nearest + * multiple of 1/8 to x. + */ + x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0, + xsig1, &env->fp_status); + n = floatx80_to_int32(x8, &env->fp_status); + if (n == 0) { + ysign = false; + yexp = xexp; + ysig0 = xsig0; + ysig1 = xsig1; + texp = 0; + tsig = 0; + } else { + int shift = clz32(n) + 32; + texp = 0x403b - shift; + tsig = n; + tsig <<= shift; + if (texp == xexp) { + sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); + if ((int64_t) ysig0 >= 0) { + ysign = false; + if (ysig0 == 0) { + if (ysig1 == 0) { + yexp = 0; + } else { + shift = clz64(ysig1) + 64; + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, + &ysig0, &ysig1); + } + } else { + shift = clz64(ysig0); + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } else { + ysign = true; + sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); + if (ysig0 == 0) { + shift = clz64(ysig1) + 64; + } else { + shift = clz64(ysig0); + } + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } else { + /* + * t's exponent must be greater than x's because t + * is positive and the nearest multiple of 1/8 to + * x, and if x has a greater exponent, the power + * of 2 with that exponent is also a multiple of + * 1/8. + */ + uint64_t usig0, usig1; + shift128RightJamming(xsig0, xsig1, texp - xexp, + &usig0, &usig1); + ysign = true; + sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); + if (ysig0 == 0) { + shift = clz64(ysig1) + 64; + } else { + shift = clz64(ysig0); + } + yexp = texp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } + + /* + * Compute z = y/(1+tx), so arctan(x) = arctan(t) + + * arctan(z). + */ + zsign = ysign; + if (texp == 0 || yexp == 0) { + zexp = yexp; + zsig0 = ysig0; + zsig1 = ysig1; + } else { + /* + * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. + */ + int32_t dexp = texp + xexp - 0x3ffe; + uint64_t dsig0, dsig1, dsig2; + mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); + /* + * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 + * bit). Add 1 to produce the denominator 1+tx. + */ + shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, + &dsig0, &dsig1); + dsig0 |= 0x8000000000000000ULL; + zexp = yexp - 1; + remsig0 = ysig0; + remsig1 = ysig1; + remsig2 = 0; + if (dsig0 <= remsig0) { + shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); + ++zexp; + } + zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); + mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); + sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, + &remsig0, &remsig1, &remsig2); + while ((int64_t) remsig0 < 0) { + --zsig0; + add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, + &remsig0, &remsig1, &remsig2); + } + zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); + /* No need to correct any estimation error in zsig1. */ + } + + if (zexp == 0) { + azexp = 0; + azsig0 = 0; + azsig1 = 0; + } else { + floatx80 z2, accum; + uint64_t z2sig0, z2sig1, z2sig2, z2sig3; + /* Compute z^2. */ + mul128To256(zsig0, zsig1, zsig0, zsig1, + &z2sig0, &z2sig1, &z2sig2, &z2sig3); + z2 = normalizeRoundAndPackFloatx80(80, false, + zexp + zexp - 0x3ffe, + z2sig0, z2sig1, + &env->fp_status); + + /* Compute the lower parts of the polynomial expansion. */ + accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + + /* + * The full polynomial expansion is z*(fpatan_coeff_0 + accum). + * fpatan_coeff_0 is 1, and accum is negative and much smaller. + */ + aexp = extractFloatx80Exp(fpatan_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, + &asig0, &asig1); + /* Multiply by z to compute arctan(z). */ + azexp = aexp + zexp - 0x3ffe; + mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, + &azsig2, &azsig3); + } + + /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ + if (texp == 0) { + /* z is positive. */ + axexp = azexp; + axsig0 = azsig0; + axsig1 = azsig1; + } else { + bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); + int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); + uint64_t low_sig0 = + extractFloatx80Frac(fpatan_table[n].atan_low); + uint64_t low_sig1 = 0; + axexp = extractFloatx80Exp(fpatan_table[n].atan_high); + axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); + axsig1 = 0; + shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, + &low_sig0, &low_sig1); + if (low_sign) { + sub128(axsig0, axsig1, low_sig0, low_sig1, + &axsig0, &axsig1); + } else { + add128(axsig0, axsig1, low_sig0, low_sig1, + &axsig0, &axsig1); + } + if (azexp >= axexp) { + shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, + &axsig0, &axsig1); + axexp = azexp + 1; + shift128RightJamming(azsig0, azsig1, 1, + &azsig0, &azsig1); + } else { + shift128RightJamming(axsig0, axsig1, 1, + &axsig0, &axsig1); + shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, + &azsig0, &azsig1); + ++axexp; + } + if (zsign) { + sub128(axsig0, axsig1, azsig0, azsig1, + &axsig0, &axsig1); + } else { + add128(axsig0, axsig1, azsig0, azsig1, + &axsig0, &axsig1); + } + } + + if (adj_exp == 0) { + rexp = axexp; + rsig0 = axsig0; + rsig1 = axsig1; + } else { + /* + * Add or subtract arctan(x) (exponent axexp, + * significand axsig0 and axsig1, positive, not + * necessarily normalized) to the number given by + * adj_exp, adj_sig0 and adj_sig1, according to + * adj_sub. + */ + if (adj_exp >= axexp) { + shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, + &axsig0, &axsig1); + rexp = adj_exp + 1; + shift128RightJamming(adj_sig0, adj_sig1, 1, + &adj_sig0, &adj_sig1); + } else { + shift128RightJamming(axsig0, axsig1, 1, + &axsig0, &axsig1); + shift128RightJamming(adj_sig0, adj_sig1, + axexp - adj_exp + 1, + &adj_sig0, &adj_sig1); + rexp = axexp + 1; + } + if (adj_sub) { + sub128(adj_sig0, adj_sig1, axsig0, axsig1, + &rsig0, &rsig1); + } else { + add128(adj_sig0, adj_sig1, axsig0, axsig1, + &rsig0, &rsig1); + } + } + + env->fp_status.float_rounding_mode = save_mode; + env->fp_status.floatx80_rounding_precision = save_prec; + } + /* This result is inexact. */ + rsig1 |= 1; + ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp, + rsig0, rsig1, &env->fp_status); + } - fpsrcop = floatx80_to_double(env, ST1); - fptemp = floatx80_to_double(env, ST0); - ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp)); fpop(env); + merge_exception_flags(env, old_flags); } void helper_fxtract(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); CPU_LDoubleU temp; temp.d = ST0; if (floatx80_is_zero(ST0)) { /* Easy way to generate -inf and raising division by 0 exception */ - floatx80 zero = { 0x0000000000000000LL, 0x0000 }; - floatx80 one = { 0x8000000000000000LL, 0x3fff }; - ST0 = floatx80_div(floatx80_chs(one), zero, + ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, &env->fp_status); fpush(env); ST0 = temp.d; + } else if (floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + fpush(env); + ST0 = ST1; + } else if (floatx80_is_any_nan(ST0)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + fpush(env); + ST0 = ST1; + } else if (floatx80_is_infinity(ST0)) { + fpush(env); + ST0 = ST1; + ST1 = floatx80_infinity; } else { int expdif; - expdif = EXPD(temp) - EXPBIAS; + if (EXPD(temp) == 0) { + int shift = clz64(temp.l.lower); + temp.l.lower <<= shift; + expdif = 1 - EXPBIAS - shift; + float_raise(float_flag_input_denormal, &env->fp_status); + } else { + expdif = EXPD(temp) - EXPBIAS; + } /* DP exponent bias */ ST0 = int32_to_floatx80(expdif, &env->fp_status); fpush(env); BIASEXPONENT(temp); ST0 = temp.d; } + merge_exception_flags(env, old_flags); } -void helper_fprem1(CPUX86State *env) +static void helper_fprem_common(CPUX86State *env, bool mod) { - double st0, st1, dblq, fpsrcop, fptemp; - CPU_LDoubleU fpsrcop1, fptemp1; - int expdif; - signed long long int q; - - st0 = floatx80_to_double(env, ST0); - st1 = floatx80_to_double(env, ST1); - - if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { - ST0 = double_to_floatx80(env, NAN); /* NaN */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - return; - } - - fpsrcop = st0; - fptemp = st1; - fpsrcop1.d = ST0; - fptemp1.d = ST1; - expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + uint8_t old_flags = save_exception_flags(env); + uint64_t quotient; + CPU_LDoubleU temp0, temp1; + int exp0, exp1, expdiff; - if (expdif < 0) { - /* optimisation? taken from the AMD docs */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* ST0 is unchanged */ - return; - } + temp0.d = ST0; + temp1.d = ST1; + exp0 = EXPD(temp0); + exp1 = EXPD(temp1); - if (expdif < 53) { - dblq = fpsrcop / fptemp; - /* round dblq towards nearest integer */ - dblq = rint(dblq); - st0 = fpsrcop - fptemp * dblq; - - /* convert dblq to q by truncating towards zero */ - if (dblq < 0.0) { - q = (signed long long int)(-dblq); + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || + exp0 == 0x7fff || exp1 == 0x7fff || + floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { + ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); + } else { + if (exp0 == 0) { + exp0 = 1 - clz64(temp0.l.lower); + } + if (exp1 == 0) { + exp1 = 1 - clz64(temp1.l.lower); + } + expdiff = exp0 - exp1; + if (expdiff < 64) { + ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); + env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ } else { - q = (signed long long int)dblq; + /* + * Partial remainder. This choice of how many bits to + * process at once is specified in AMD instruction set + * manuals, and empirically is followed by Intel + * processors as well; it ensures that the final remainder + * operation in a loop does produce the correct low three + * bits of the quotient. AMD manuals specify that the + * flags other than C2 are cleared, and empirically Intel + * processors clear them as well. + */ + int n = 32 + (expdiff % 32); + temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); + ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); + env->fpus |= 0x400; /* C2 <-- 1 */ } - - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* (C0,C3,C1) <-- (q2,q1,q0) */ - env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ - env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ - env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ - } else { - env->fpus |= 0x400; /* C2 <-- 1 */ - fptemp = pow(2.0, expdif - 50); - fpsrcop = (st0 / st1) / fptemp; - /* fpsrcop = integer obtained by chopping */ - fpsrcop = (fpsrcop < 0.0) ? - -(floor(fabs(fpsrcop))) : floor(fpsrcop); - st0 -= (st1 * fpsrcop * fptemp); } - ST0 = double_to_floatx80(env, st0); + merge_exception_flags(env, old_flags); } -void helper_fprem(CPUX86State *env) +void helper_fprem1(CPUX86State *env) { - double st0, st1, dblq, fpsrcop, fptemp; - CPU_LDoubleU fpsrcop1, fptemp1; - int expdif; - signed long long int q; - - st0 = floatx80_to_double(env, ST0); - st1 = floatx80_to_double(env, ST1); - - if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { - ST0 = double_to_floatx80(env, NAN); /* NaN */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - return; - } + helper_fprem_common(env, false); +} - fpsrcop = st0; - fptemp = st1; - fpsrcop1.d = ST0; - fptemp1.d = ST1; - expdif = EXPD(fpsrcop1) - EXPD(fptemp1); +void helper_fprem(CPUX86State *env) +{ + helper_fprem_common(env, true); +} - if (expdif < 0) { - /* optimisation? taken from the AMD docs */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* ST0 is unchanged */ - return; - } +/* 128-bit significand of log2(e). */ +#define log2_e_sig_high 0xb8aa3b295c17f0bbULL +#define log2_e_sig_low 0xbe87fed0691d3e89ULL - if (expdif < 53) { - dblq = fpsrcop / fptemp; /* ST0 / ST1 */ - /* round dblq towards zero */ - dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); - st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */ +/* + * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), + * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, + * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the + * interval [sqrt(2)/2, sqrt(2)]. + */ +#define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) +#define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) +#define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) +#define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) +#define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) +#define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) +#define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) +#define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) +#define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) +#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) +#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) - /* convert dblq to q by truncating towards zero */ - if (dblq < 0.0) { - q = (signed long long int)(-dblq); - } else { - q = (signed long long int)dblq; - } +/* + * Compute an approximation of log2(1+arg), where 1+arg is in the + * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this + * function is called, rounding precision is set to 80 and the + * round-to-nearest mode is in effect. arg must not be exactly zero, + * and must not be so close to zero that underflow might occur. + */ +static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, + uint64_t *sig0, uint64_t *sig1) +{ + uint64_t arg0_sig = extractFloatx80Frac(arg); + int32_t arg0_exp = extractFloatx80Exp(arg); + bool arg0_sign = extractFloatx80Sign(arg); + bool asign; + int32_t dexp, texp, aexp; + uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; + uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; + uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; + floatx80 t2, accum; - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* (C0,C3,C1) <-- (q2,q1,q0) */ - env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ - env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ - env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ + /* + * Compute an approximation of arg/(2+arg), with extra precision, + * as the argument to a polynomial approximation. The extra + * precision is only needed for the first term of the + * approximation, with subsequent terms being significantly + * smaller; the approximation only uses odd exponents, and the + * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... + */ + if (arg0_sign) { + dexp = 0x3fff; + shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); + sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); } else { - int N = 32 + (expdif % 32); /* as per AMD docs */ + dexp = 0x4000; + shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); + dsig0 |= 0x8000000000000000ULL; + } + texp = arg0_exp - dexp + 0x3ffe; + rsig0 = arg0_sig; + rsig1 = 0; + rsig2 = 0; + if (dsig0 <= rsig0) { + shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); + ++texp; + } + tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); + mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); + sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, + &rsig0, &rsig1, &rsig2); + while ((int64_t) rsig0 < 0) { + --tsig0; + add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, + &rsig0, &rsig1, &rsig2); + } + tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); + /* + * No need to correct any estimation error in tsig1; even with + * such error, it is accurate enough. Now compute the square of + * that approximation. + */ + mul128To256(tsig0, tsig1, tsig0, tsig1, + &t2sig0, &t2sig1, &t2sig2, &t2sig3); + t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe, + t2sig0, t2sig1, &env->fp_status); + + /* Compute the lower parts of the polynomial expansion. */ + accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); - env->fpus |= 0x400; /* C2 <-- 1 */ - fptemp = pow(2.0, (double)(expdif - N)); - fpsrcop = (st0 / st1) / fptemp; - /* fpsrcop = integer obtained by chopping */ - fpsrcop = (fpsrcop < 0.0) ? - -(floor(fabs(fpsrcop))) : floor(fpsrcop); - st0 -= (st1 * fpsrcop * fptemp); + /* + * The full polynomial expansion is fyl2x_coeff_0 + accum (where + * accum has much lower magnitude, and so, in particular, carry + * out of the addition is not possible), multiplied by t. (This + * expansion is only accurate to about 70 bits, not 128 bits.) + */ + aexp = extractFloatx80Exp(fyl2x_coeff_0); + asign = extractFloatx80Sign(fyl2x_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + bsig0 = extractFloatx80Frac(fyl2x_coeff_0); + bsig1 = 0; + if (asign == extractFloatx80Sign(accum)) { + add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); } - ST0 = double_to_floatx80(env, st0); + /* Multiply by t to compute the required result. */ + mul128To256(asig0, asig1, tsig0, tsig1, + &asig0, &asig1, &asig2, &asig3); + aexp += texp - 0x3ffe; + *exp = aexp; + *sig0 = asig0; + *sig1 = asig1; } void helper_fyl2xp1(CPUX86State *env) { - double fptemp = floatx80_to_double(env, ST0); + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (arg0_exp > 0x3ffd || + (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? + 0x95f619980c4336f7ULL : + 0xd413cccfe7799211ULL))) { + /* + * Out of range for the instruction (ST0 must have absolute + * value less than 1 - sqrt(2)/2 = 0.292..., according to + * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 + * to sqrt(2) - 1, which we allow here), treat as invalid. + */ + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || + arg1_exp == 0x7fff) { + /* + * One argument is zero, or multiplying by infinity; correct + * result is exact and can be obtained by multiplying the + * arguments. + */ + ST1 = floatx80_mul(ST0, ST1, &env->fp_status); + } else if (arg0_exp < 0x3fb0) { + /* + * Multiplying both arguments and an extra-precision version + * of log2(e) is sufficiently precise. + */ + uint64_t sig0, sig1, sig2; + int32_t exp; + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, + &sig0, &sig1, &sig2); + exp = arg0_exp + 1; + mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); + exp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + sig1 |= 1; + ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp, + sig0, sig1, &env->fp_status); + } else { + int32_t aexp; + uint64_t asig0, asig1, asig2; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = 80; + + helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); + /* + * Multiply by the second argument to compute the required + * result. + */ + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); + aexp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + asig1 |= 1; + env->fp_status.float_rounding_mode = save_mode; + ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp, + asig0, asig1, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save_prec; + } + fpop(env); + merge_exception_flags(env, old_flags); +} - if ((fptemp + 1.0) > 0.0) { - fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */ - fptemp *= floatx80_to_double(env, ST1); - ST1 = double_to_floatx80(env, fptemp); - fpop(env); +void helper_fyl2x(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (arg0_sign && !floatx80_is_zero(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_infinity(ST1)) { + FloatRelation cmp = floatx80_compare(ST0, floatx80_one, + &env->fp_status); + switch (cmp) { + case float_relation_less: + ST1 = floatx80_chs(ST1); + break; + case float_relation_greater: + /* Result is infinity of the same sign as ST1. */ + break; + default: + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + break; + } + } else if (floatx80_is_infinity(ST0)) { + if (floatx80_is_zero(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (arg1_sign) { + ST1 = floatx80_chs(ST0); + } else { + ST1 = ST0; + } + } else if (floatx80_is_zero(ST0)) { + if (floatx80_is_zero(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else { + /* Result is infinity with opposite sign to ST1. */ + float_raise(float_flag_divbyzero, &env->fp_status); + ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, + 0x8000000000000000ULL); + } + } else if (floatx80_is_zero(ST1)) { + if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { + ST1 = floatx80_chs(ST1); + } + /* Otherwise, ST1 is already the correct result. */ + } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { + if (arg1_sign) { + ST1 = floatx80_chs(floatx80_zero); + } else { + ST1 = floatx80_zero; + } } else { - env->fpus &= ~0x4700; - env->fpus |= 0x400; + int32_t int_exp; + floatx80 arg0_m1; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = 80; + + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + int_exp = arg0_exp - 0x3fff; + if (arg0_sig > 0xb504f333f9de6484ULL) { + ++int_exp; + } + arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, + &env->fp_status), + floatx80_one, &env->fp_status); + if (floatx80_is_zero(arg0_m1)) { + /* Exact power of 2; multiply by ST1. */ + env->fp_status.float_rounding_mode = save_mode; + ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), + ST1, &env->fp_status); + } else { + bool asign = extractFloatx80Sign(arg0_m1); + int32_t aexp; + uint64_t asig0, asig1, asig2; + helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); + if (int_exp != 0) { + bool isign = (int_exp < 0); + int32_t iexp; + uint64_t isig; + int shift; + int_exp = isign ? -int_exp : int_exp; + shift = clz32(int_exp) + 32; + isig = int_exp; + isig <<= shift; + iexp = 0x403e - shift; + shift128RightJamming(asig0, asig1, iexp - aexp, + &asig0, &asig1); + if (asign == isign) { + add128(isig, 0, asig0, asig1, &asig0, &asig1); + } else { + sub128(isig, 0, asig0, asig1, &asig0, &asig1); + } + aexp = iexp; + asign = isign; + } + /* + * Multiply by the second argument to compute the required + * result. + */ + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); + aexp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + asig1 |= 1; + env->fp_status.float_rounding_mode = save_mode; + ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp, + asig0, asig1, &env->fp_status); + } + + env->fp_status.floatx80_rounding_precision = save_prec; } + fpop(env); + merge_exception_flags(env, old_flags); } void helper_fsqrt(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); if (floatx80_is_neg(ST0)) { env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ env->fpus |= 0x400; } ST0 = floatx80_sqrt(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsincos(CPUX86State *env) @@ -950,17 +2228,60 @@ void helper_fsincos(CPUX86State *env) void helper_frndint(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_round_to_int(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fscale(CPUX86State *env) { - if (floatx80_is_any_nan(ST1)) { + uint8_t old_flags = save_exception_flags(env); + if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST1)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + } ST0 = ST1; + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + } else if (floatx80_is_infinity(ST1) && + !floatx80_invalid_encoding(ST0) && + !floatx80_is_any_nan(ST0)) { + if (floatx80_is_neg(ST1)) { + if (floatx80_is_infinity(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else { + ST0 = (floatx80_is_neg(ST0) ? + floatx80_chs(floatx80_zero) : + floatx80_zero); + } + } else { + if (floatx80_is_zero(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else { + ST0 = (floatx80_is_neg(ST0) ? + floatx80_chs(floatx80_infinity) : + floatx80_infinity); + } + } } else { - int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); + int n; + signed char save = env->fp_status.floatx80_rounding_precision; + uint8_t save_flags = get_float_exception_flags(&env->fp_status); + set_float_exception_flags(0, &env->fp_status); + n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); + set_float_exception_flags(save_flags, &env->fp_status); + env->fp_status.floatx80_rounding_precision = 80; ST0 = floatx80_scalbn(ST0, n, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save; } + merge_exception_flags(env, old_flags); } void helper_fsin(CPUX86State *env) @@ -1010,7 +2331,7 @@ void helper_fxam_ST0(CPUX86State *env) if (expdif == MAXEXPD) { if (MANTD(temp) == 0x8000000000000000ULL) { env->fpus |= 0x500; /* Infinity */ - } else { + } else if (MANTD(temp) & 0x8000000000000000ULL) { env->fpus |= 0x100; /* NaN */ } } else if (expdif == 0) { @@ -1019,7 +2340,7 @@ void helper_fxam_ST0(CPUX86State *env) } else { env->fpus |= 0x4400; /* Denormal */ } - } else { + } else if (MANTD(temp) & 0x8000000000000000ULL) { env->fpus |= 0x400; } } @@ -1171,7 +2492,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) In 64-bit mode this is rip, rdp. But in either case we don't write actual data, just zeros. */ cpu_stq_data_ra(env, ptr + XO(legacy.fpip), env->fpip, ra); /* eip+sel; rip */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ + cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), env->fpdp, ra); /* edp+sel; rdp */ addr = ptr + XO(legacy.fpregs); for (i = 0; i < 8; i++) { @@ -1183,6 +2504,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) { + update_mxcsr_from_sse_status(env); cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); } @@ -1600,11 +2922,43 @@ void update_mxcsr_status(CPUX86State *env) } set_float_rounding_mode(rnd_type, &env->sse_status); + /* Set exception flags. */ + set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | + (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | + (mxcsr & FPUS_OE ? float_flag_overflow : 0) | + (mxcsr & FPUS_UE ? float_flag_underflow : 0) | + (mxcsr & FPUS_PE ? float_flag_inexact : 0), + &env->sse_status); + /* set denormals are zero */ set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); /* set flush to zero */ - set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status); + set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); +} + +void update_mxcsr_from_sse_status(CPUX86State *env) +{ + uint8_t flags = get_float_exception_flags(&env->sse_status); + /* + * The MXCSR denormal flag has opposite semantics to + * float_flag_input_denormal (the softfloat code sets that flag + * only when flushing input denormals to zero, but SSE sets it + * only when not flushing them to zero), so is not converted + * here. + */ + env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | + (flags & float_flag_divbyzero ? FPUS_ZE : 0) | + (flags & float_flag_overflow ? FPUS_OE : 0) | + (flags & float_flag_underflow ? FPUS_UE : 0) | + (flags & float_flag_inexact ? FPUS_PE : 0) | + (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : + 0)); +} + +void helper_update_mxcsr(CPUX86State *env) +{ + update_mxcsr_from_sse_status(env); } void helper_ldmxcsr(CPUX86State *env, uint32_t val) diff --git a/qemu/target/i386/helper.h b/qemu/target/i386/helper.h index 399cc0df99..ca55ded21a 100644 --- a/qemu/target/i386/helper.h +++ b/qemu/target/i386/helper.h @@ -210,6 +210,7 @@ DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) /* MMX/SSE */ DEF_HELPER_2(ldmxcsr, void, env, i32) +DEF_HELPER_1(update_mxcsr, void, env) DEF_HELPER_1(enter_mmx, void, env) DEF_HELPER_1(emms, void, env) DEF_HELPER_3(movq, void, env, ptr, ptr) diff --git a/qemu/target/i386/ops_sse.h b/qemu/target/i386/ops_sse.h index ec1ec745d0..027ff59300 100644 --- a/qemu/target/i386/ops_sse.h +++ b/qemu/target/i386/ops_sse.h @@ -843,6 +843,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s) void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), &env->sse_status); @@ -855,26 +856,33 @@ void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s) d->ZMM_S(3) = float32_div(float32_one, float32_sqrt(s->ZMM_S(3), &env->sse_status), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); } void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); } void helper_rcpps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status); d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status); d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); } void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); } static inline uint64_t helper_extrq(uint64_t src, int shift, int len) @@ -1031,7 +1039,7 @@ static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float32 s0, s1; s0 = d->ZMM_S(0); @@ -1042,7 +1050,7 @@ void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) void helper_comiss(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float32 s0, s1; s0 = d->ZMM_S(0); @@ -1053,7 +1061,7 @@ void helper_comiss(CPUX86State *env, Reg *d, Reg *s) void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float64 d0, d1; d0 = d->ZMM_D(0); @@ -1435,34 +1443,46 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); - d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); - XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); - XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); - d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); - d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); - XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); - XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); + Reg r; + + r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); + r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); + XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); + XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); + r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); + r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); + XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); + XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); + + *d = r; } void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); - XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); - d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); - XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); + Reg r; + + r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); + XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); + r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); + XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); + + *d = r; } void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); - d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); - XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); - XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); - d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); - d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); - XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); - XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); + Reg r; + + r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); + r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); + XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); + XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); + r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); + r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); + XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); + XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); + + *d = r; } void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1751,6 +1771,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode; @@ -1776,19 +1797,18 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status); d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status); -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode; @@ -1812,19 +1832,18 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status); -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode; @@ -1847,19 +1866,18 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status); -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode; @@ -1882,13 +1900,11 @@ void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } @@ -2076,10 +2092,10 @@ static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s, res = (2 << upper) - 1; break; } - for (j = valids - validd; j >= 0; j--) { + for (j = valids == upper ? valids : valids - validd; j >= 0; j--) { res <<= 1; v = 1; - for (i = validd; i >= 0; i--) { + for (i = MIN(valids - j, validd); i >= 0; i--) { v &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i)); } res |= v; diff --git a/qemu/target/i386/svm.h b/qemu/target/i386/svm.h index 30649ee9cb..35d0b8a0a8 100644 --- a/qemu/target/i386/svm.h +++ b/qemu/target/i386/svm.h @@ -137,6 +137,7 @@ #define SVM_NPT_PAE (1 << 0) #define SVM_NPT_LMA (1 << 1) #define SVM_NPT_NXE (1 << 2) +#define SVM_NPT_PSE (1 << 3) #define SVM_NPTEXIT_P (1ULL << 0) #define SVM_NPTEXIT_RW (1ULL << 1) diff --git a/qemu/target/i386/svm_helper.c b/qemu/target/i386/svm_helper.c index ade26593a3..4553dc6810 100644 --- a/qemu/target/i386/svm_helper.c +++ b/qemu/target/i386/svm_helper.c @@ -152,16 +152,21 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.nested_ctl)); + + env->nested_pg_mode = 0; + if (nested_ctl & SVM_NPT_ENABLED) { env->nested_cr3 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.nested_cr3)); env->hflags2 |= HF2_NPT_MASK; - env->nested_pg_mode = 0; if (env->cr[4] & CR4_PAE_MASK) { env->nested_pg_mode |= SVM_NPT_PAE; } + if (env->cr[4] & CR4_PSE_MASK) { + env->nested_pg_mode |= SVM_NPT_PSE; + } if (env->hflags & HF_LMA_MASK) { env->nested_pg_mode |= SVM_NPT_LMA; } diff --git a/qemu/target/i386/translate.c b/qemu/target/i386/translate.c index 741102bed4..b92f3f373f 100644 --- a/qemu/target/i386/translate.c +++ b/qemu/target/i386/translate.c @@ -1325,9 +1325,6 @@ static inline void gen_ins(DisasContext *s, MemOp ot) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(tcg_ctx); - } gen_string_movl_A0_EDI(s); /* Note: we must do this dummy write first to be restartable in case of page fault. */ @@ -1340,18 +1337,12 @@ static inline void gen_ins(DisasContext *s, MemOp ot) gen_op_movl_T0_Dshift(s, ot); gen_op_add_reg_T0(s, s->aflag, R_EDI); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(tcg_ctx); - } } static inline void gen_outs(DisasContext *s, MemOp ot) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(tcg_ctx); - } gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); @@ -1362,9 +1353,6 @@ static inline void gen_outs(DisasContext *s, MemOp ot) gen_op_movl_T0_Dshift(s, ot); gen_op_add_reg_T0(s, s->aflag, R_ESI); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(tcg_ctx); - } } /* same method as Valgrind : we generate jumps to current or next @@ -7017,6 +7005,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + /* jump generated by gen_repz_ins */ } else { gen_ins(s, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { @@ -7030,8 +7019,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_ext16u_tl(tcg_ctx, s->T0, tcg_ctx->cpu_regs[R_EDX]); gen_check_io(s, ot, pc_start - s->cs_base, svm_is_rep(prefixes) | 4); + if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { + gen_io_start(tcg_ctx); + } if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + /* jump generated by gen_repz_outs */ } else { gen_outs(s, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { @@ -7881,6 +7874,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) l1 = gen_new_label(tcg_ctx); l2 = gen_new_label(tcg_ctx); l3 = gen_new_label(tcg_ctx); + gen_update_cc_op(s); b &= 3; switch(b) { case 0: /* loopnz */ @@ -8314,12 +8308,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) CASE_MODRM_OP(4): /* smsw */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); tcg_gen_ld_tl(tcg_ctx, s->T0, tcg_ctx->cpu_env, offsetof(CPUX86State, cr[0])); - if (CODE64(s)) { - mod = (modrm >> 6) & 3; - ot = (mod != 3 ? MO_16 : s->dflag); - } else { - ot = MO_16; - } + /* + * In 32-bit mode, the higher 16 bits of the destination + * register are undefined. In practice CR0[31:0] is stored + * just like in 64-bit mode. + */ + mod = (modrm >> 6) & 3; + ot = (mod != 3 ? MO_16 : s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); break; case 0xee: /* rdpkru */ @@ -8774,7 +8769,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_helper_read_crN(tcg_ctx, s->T0, tcg_ctx->cpu_env, tcg_const_i32(tcg_ctx, reg)); gen_op_mov_reg_v(s, ot, rm, s->T0); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(tcg_ctx); + gen_jmp(s, s->pc - s->cs_base); } } break; @@ -8892,6 +8887,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); break; } + gen_helper_update_mxcsr(tcg_ctx, tcg_ctx->cpu_env); gen_lea_modrm(env, s, modrm); tcg_gen_ld32u_tl(tcg_ctx, s->T0, tcg_ctx->cpu_env, offsetof(CPUX86State, mxcsr)); gen_op_st_v(s, MO_32, s->T0, s->A0); diff --git a/qemu/target/m68k/cpu.c b/qemu/target/m68k/cpu.c index 6b636b80eb..5802dbdf24 100644 --- a/qemu/target/m68k/cpu.c +++ b/qemu/target/m68k/cpu.c @@ -236,12 +236,6 @@ static void m68k_cpu_class_init(CPUClass *c) cc->tcg_initialize = m68k_tcg_init; } -#define DEFINE_M68K_CPU_TYPE(cpu_model, initfn) \ - { \ - .name = cpu_model, \ - .initfn = initfn, \ - } - struct M68kCPUInfo { const char *name; void (*initfn)(CPUState *obj); diff --git a/qemu/target/m68k/fpu_helper.c b/qemu/target/m68k/fpu_helper.c index 3f544a0572..7f67fa10db 100644 --- a/qemu/target/m68k/fpu_helper.c +++ b/qemu/target/m68k/fpu_helper.c @@ -149,7 +149,7 @@ void cpu_m68k_set_fpcr(CPUM68KState *env, uint32_t val) void HELPER(fitrunc)(CPUM68KState *env, FPReg *res, FPReg *val) { - int rounding_mode = get_float_rounding_mode(&env->fp_status); + FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status); set_float_rounding_mode(float_round_to_zero, &env->fp_status); res->d = floatx80_round_to_int(val->d, &env->fp_status); set_float_rounding_mode(rounding_mode, &env->fp_status); @@ -300,7 +300,7 @@ void HELPER(fdmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) void HELPER(fsglmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) { - int rounding_mode = get_float_rounding_mode(&env->fp_status); + FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status); floatx80 a, b; PREC_BEGIN(32); @@ -333,7 +333,7 @@ void HELPER(fddiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) void HELPER(fsgldiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) { - int rounding_mode = get_float_rounding_mode(&env->fp_status); + FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status); floatx80 a, b; PREC_BEGIN(32); @@ -642,6 +642,11 @@ void HELPER(fatanh)(CPUM68KState *env, FPReg *res, FPReg *val) res->d = floatx80_atanh(val->d, &env->fp_status); } +void HELPER(fetoxm1)(CPUM68KState *env, FPReg *res, FPReg *val) +{ + res->d = floatx80_etoxm1(val->d, &env->fp_status); +} + void HELPER(ftanh)(CPUM68KState *env, FPReg *res, FPReg *val) { res->d = floatx80_tanh(val->d, &env->fp_status); diff --git a/qemu/target/m68k/helper.c b/qemu/target/m68k/helper.c index b0f2e298e7..fb441ee935 100644 --- a/qemu/target/m68k/helper.c +++ b/qemu/target/m68k/helper.c @@ -284,7 +284,7 @@ static int get_physical_address(CPUM68KState *env, hwaddr *physical, /* Transparent Translation Register bit */ env->mmu.mmusr = M68K_MMU_T_040 | M68K_MMU_R_040; } - *physical = address & TARGET_PAGE_MASK; + *physical = address; *page_size = TARGET_PAGE_SIZE; return 0; } @@ -412,7 +412,7 @@ static int get_physical_address(CPUM68KState *env, hwaddr *physical, } *page_size = 1 << page_bits; page_mask = ~(*page_size - 1); - *physical = next & page_mask; + *physical = (next & page_mask) + (address & (*page_size - 1)); if (access_type & ACCESS_PTEST) { env->mmu.mmusr |= next & M68K_MMU_SR_MASK_040; @@ -461,6 +461,7 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) if (env->sr & SR_S) { access_type |= ACCESS_SUPER; } + if (get_physical_address(env, &phys_addr, &prot, addr, access_type, &page_size) != 0) { return -1; @@ -525,10 +526,8 @@ bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size, ret = get_physical_address(&cpu->env, &physical, &prot, address, access_type, &page_size); if (likely(ret == 0)) { - address &= TARGET_PAGE_MASK; - physical += address & (page_size - 1); - tlb_set_page(cs, address, physical, - prot, mmu_idx, TARGET_PAGE_SIZE); + tlb_set_page(cs, address & TARGET_PAGE_MASK, + physical & TARGET_PAGE_MASK, prot, mmu_idx, page_size); return true; } @@ -1015,9 +1014,8 @@ void HELPER(ptest)(CPUM68KState *env, uint32_t addr, uint32_t is_read) ret = get_physical_address(env, &physical, &prot, addr, access_type, &page_size); if (ret == 0) { - addr &= TARGET_PAGE_MASK; - physical += addr & (page_size - 1); - tlb_set_page(env_cpu(env), addr, physical, + tlb_set_page(env_cpu(env), addr & TARGET_PAGE_MASK, + physical & TARGET_PAGE_MASK, prot, access_type & ACCESS_SUPER ? MMU_KERNEL_IDX : MMU_USER_IDX, page_size); } diff --git a/qemu/target/m68k/helper.h b/qemu/target/m68k/helper.h index 413f88dc65..f9978b0e20 100644 --- a/qemu/target/m68k/helper.h +++ b/qemu/target/m68k/helper.h @@ -86,6 +86,7 @@ DEF_HELPER_3(fatan, void, env, fp, fp) DEF_HELPER_3(fasin, void, env, fp, fp) DEF_HELPER_3(facos, void, env, fp, fp) DEF_HELPER_3(fatanh, void, env, fp, fp) +DEF_HELPER_3(fetoxm1, void, env, fp, fp) DEF_HELPER_3(ftanh, void, env, fp, fp) DEF_HELPER_3(fsinh, void, env, fp, fp) DEF_HELPER_3(fcosh, void, env, fp, fp) diff --git a/qemu/target/m68k/softfloat.c b/qemu/target/m68k/softfloat.c index 24c313ed69..b6d0ed7acf 100644 --- a/qemu/target/m68k/softfloat.c +++ b/qemu/target/m68k/softfloat.c @@ -42,89 +42,6 @@ static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) return a; } -/* - * Returns the modulo remainder of the extended double-precision floating-point - * value `a' with respect to the corresponding value `b'. - */ - -floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig; - uint64_t qTemp, term0, term1; - - aSig0 = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - bSig = extractFloatx80Frac(b); - bExp = extractFloatx80Exp(b); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig0 << 1) - || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { - return propagateFloatx80NaN(a, b, status); - } - goto invalid; - } - if (bExp == 0x7FFF) { - if ((uint64_t) (bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } - if (bExp == 0) { - if (bSig == 0) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - normalizeFloatx80Subnormal(bSig, &bExp, &bSig); - } - if (aExp == 0) { - if ((uint64_t) (aSig0 << 1) == 0) { - return a; - } - normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); - } - bSig |= UINT64_C(0x8000000000000000); - zSign = aSign; - expDiff = aExp - bExp; - aSig1 = 0; - if (expDiff < 0) { - return a; - } - qTemp = (bSig <= aSig0); - if (qTemp) { - aSig0 -= bSig; - } - expDiff -= 64; - while (0 < expDiff) { - qTemp = estimateDiv128To64(aSig0, aSig1, bSig); - qTemp = (2 < qTemp) ? qTemp - 2 : 0; - mul64To128(bSig, qTemp, &term0, &term1); - sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); - shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); - expDiff -= 62; - } - expDiff += 64; - if (0 < expDiff) { - qTemp = estimateDiv128To64(aSig0, aSig1, bSig); - qTemp = (2 < qTemp) ? qTemp - 2 : 0; - qTemp >>= 64 - expDiff; - mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); - sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); - shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); - while (le128(term0, term1, aSig0, aSig1)) { - ++qTemp; - sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); - } - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status); -} - /* * Returns the mantissa of the extended double-precision floating-point * value `a'. @@ -132,7 +49,7 @@ floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_getman(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -166,7 +83,7 @@ floatx80 floatx80_getman(floatx80 a, float_status *status) floatx80 floatx80_getexp(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -202,7 +119,7 @@ floatx80 floatx80_getexp(floatx80 a, float_status *status) floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; int32_t aExp, bExp, shiftCount; uint64_t aSig, bSig; @@ -258,7 +175,7 @@ floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_move(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -306,7 +223,7 @@ static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) floatx80 floatx80_lognp1(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig, fSig; @@ -505,7 +422,7 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status) floatx80 floatx80_logn(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig, fSig; @@ -673,7 +590,7 @@ floatx80 floatx80_logn(floatx80 a, float_status *status) floatx80 floatx80_log10(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -730,7 +647,7 @@ floatx80 floatx80_log10(floatx80 a, float_status *status) floatx80 floatx80_log2(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -797,7 +714,7 @@ floatx80 floatx80_log2(floatx80 a, float_status *status) floatx80 floatx80_etox(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -805,7 +722,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status) int32_t compact, n, j, k, m, m1; floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; - flag adjflag; + bool adjflag; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); @@ -981,7 +898,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status) floatx80 floatx80_twotox(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -1131,7 +1048,7 @@ floatx80 floatx80_twotox(floatx80 a, float_status *status) floatx80 floatx80_tentox(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -1286,7 +1203,7 @@ floatx80 floatx80_tentox(floatx80 a, float_status *status) floatx80 floatx80_tan(floatx80 a, float_status *status) { - flag aSign, xSign; + bool aSign, xSign; int32_t aExp, xExp; uint64_t aSig, xSig; @@ -1295,7 +1212,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status) int32_t compact, l, n, j; floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; float32 twoto63; - flag endflag; + bool endflag; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); @@ -1344,10 +1261,10 @@ floatx80 floatx80_tan(floatx80 a, float_status *status) xExp -= 0x3FFF; if (xExp <= 28) { l = 0; - endflag = 1; + endflag = true; } else { l = xExp - 27; - endflag = 0; + endflag = false; } invtwopi = packFloatx80(0, 0x3FFE - l, UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ @@ -1372,7 +1289,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status) fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ - if (endflag > 0) { + if (endflag) { n = floatx80_to_int32(fp2, status); goto tancont; } @@ -1496,7 +1413,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status) floatx80 floatx80_sin(floatx80 a, float_status *status) { - flag aSign, xSign; + bool aSign, xSign; int32_t aExp, xExp; uint64_t aSig, xSig; @@ -1505,7 +1422,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status) int32_t compact, l, n, j; floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; float32 posneg1, twoto63; - flag endflag; + bool endflag; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); @@ -1554,10 +1471,10 @@ floatx80 floatx80_sin(floatx80 a, float_status *status) xExp -= 0x3FFF; if (xExp <= 28) { l = 0; - endflag = 1; + endflag = true; } else { l = xExp - 27; - endflag = 0; + endflag = false; } invtwopi = packFloatx80(0, 0x3FFE - l, UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ @@ -1582,7 +1499,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status) fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ - if (endflag > 0) { + if (endflag) { n = floatx80_to_int32(fp2, status); goto sincont; } @@ -1735,7 +1652,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status) floatx80 floatx80_cos(floatx80 a, float_status *status) { - flag aSign, xSign; + bool aSign, xSign; int32_t aExp, xExp; uint64_t aSig, xSig; @@ -1744,7 +1661,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status) int32_t compact, l, n, j; floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; float32 posneg1, twoto63; - flag endflag; + bool endflag; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); @@ -1793,10 +1710,10 @@ floatx80 floatx80_cos(floatx80 a, float_status *status) xExp -= 0x3FFF; if (xExp <= 28) { l = 0; - endflag = 1; + endflag = true; } else { l = xExp - 27; - endflag = 0; + endflag = false; } invtwopi = packFloatx80(0, 0x3FFE - l, UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ @@ -1821,7 +1738,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status) fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ - if (endflag > 0) { + if (endflag) { n = floatx80_to_int32(fp2, status); goto sincont; } @@ -1972,7 +1889,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status) floatx80 floatx80_atan(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2169,7 +2086,7 @@ floatx80 floatx80_atan(floatx80 a, float_status *status) floatx80 floatx80_asin(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2234,7 +2151,7 @@ floatx80 floatx80_asin(floatx80 a, float_status *status) floatx80 floatx80_acos(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2303,7 +2220,7 @@ floatx80 floatx80_acos(floatx80 a, float_status *status) floatx80 floatx80_atanh(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2368,7 +2285,7 @@ floatx80 floatx80_atanh(floatx80 a, float_status *status) floatx80 floatx80_etoxm1(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2620,7 +2537,7 @@ floatx80 floatx80_etoxm1(floatx80 a, float_status *status) floatx80 floatx80_tanh(floatx80 a, float_status *status) { - flag aSign, vSign; + bool aSign, vSign; int32_t aExp, vExp; uint64_t aSig, vSig; @@ -2735,7 +2652,7 @@ floatx80 floatx80_tanh(floatx80 a, float_status *status) floatx80 floatx80_sinh(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; diff --git a/qemu/target/m68k/softfloat.h b/qemu/target/m68k/softfloat.h index 365ef6ac7a..4bb9567134 100644 --- a/qemu/target/m68k/softfloat.h +++ b/qemu/target/m68k/softfloat.h @@ -23,7 +23,6 @@ #define TARGET_M68K_SOFTFLOAT_H #include "fpu/softfloat.h" -floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status); floatx80 floatx80_getman(floatx80 a, float_status *status); floatx80 floatx80_getexp(floatx80 a, float_status *status); floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status); diff --git a/qemu/target/m68k/translate.c b/qemu/target/m68k/translate.c index 5d0fa7497a..fdb4ccb977 100644 --- a/qemu/target/m68k/translate.c +++ b/qemu/target/m68k/translate.c @@ -5077,6 +5077,20 @@ static void gen_op_fmove_fcr(CPUM68KState *env, DisasContext *s, gen_store_fcr(s, AREG(insn, 0), mask); } return; + case 7: /* Immediate */ + if (REG(insn, 0) == 4) { + if (is_write || + (mask != M68K_FPIAR && mask != M68K_FPSR && + mask != M68K_FPCR)) { + gen_exception(s, s->base.pc_next, EXCP_ILLEGAL); + return; + } + tmp = tcg_const_i32(tcg_ctx, read_im32(env, s)); + gen_store_fcr(s, tmp, mask); + tcg_temp_free(tcg_ctx, tmp); + return; + } + break; default: break; } @@ -5289,6 +5303,9 @@ DISAS_INSN(fpu) case 0x06: /* flognp1 */ gen_helper_flognp1(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src); break; + case 0x08: /* fetoxm1 */ + gen_helper_fetoxm1(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src); + break; case 0x09: /* ftanh */ gen_helper_ftanh(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src); break; diff --git a/qemu/target/mips/cp0_helper.c b/qemu/target/mips/cp0_helper.c index e3600c26d7..9665c0beb5 100644 --- a/qemu/target/mips/cp0_helper.c +++ b/qemu/target/mips/cp0_helper.c @@ -378,16 +378,9 @@ target_ulong helper_mftc0_entryhi(CPUMIPSState *env) target_ulong helper_mftc0_cause(CPUMIPSState *env) { int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC); - int32_t tccause; CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc); - if (other_tc == other->current_tc) { - tccause = other->CP0_Cause; - } else { - tccause = other->CP0_Cause; - } - - return tccause; + return other->CP0_Cause; } target_ulong helper_mftc0_status(CPUMIPSState *env) @@ -877,6 +870,7 @@ void helper_mtc0_memorymapid(CPUMIPSState *env, target_ulong arg1) void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask) { + struct uc_struct *uc = env->uc; uint64_t mask = arg1 >> (TARGET_PAGE_BITS + 1); if (!(env->insn_flags & ISA_MIPS32R6) || (arg1 == ~0) || (mask == 0x0000 || mask == 0x0003 || mask == 0x000F || @@ -1113,6 +1107,7 @@ void helper_mthc0_saar(CPUMIPSState *env, target_ulong arg1) void helper_mtc0_entryhi(CPUMIPSState *env, target_ulong arg1) { + struct uc_struct *uc = env->uc; target_ulong old, val, mask; mask = (TARGET_PAGE_MASK << 1) | env->CP0_EntryHi_ASID_mask; if (((env->CP0_Config4 >> CP0C4_IE) & 0x3) >= 2) { diff --git a/qemu/target/mips/cpu-param.h b/qemu/target/mips/cpu-param.h index f073f379fc..57caf5d588 100644 --- a/qemu/target/mips/cpu-param.h +++ b/qemu/target/mips/cpu-param.h @@ -19,7 +19,8 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 40 #define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif -#define TARGET_PAGE_BITS 12 +#define TARGET_PAGE_BITS_VARY +#define TARGET_PAGE_BITS_MIN 12 #define NB_MMU_MODES 4 #endif diff --git a/qemu/target/mips/cpu.h b/qemu/target/mips/cpu.h index 95f6bf5077..4ddd4321d3 100644 --- a/qemu/target/mips/cpu.h +++ b/qemu/target/mips/cpu.h @@ -940,7 +940,35 @@ struct CPUMIPSState { #define CP0C5_UFR 2 #define CP0C5_NFExists 0 int32_t CP0_Config6; + int32_t CP0_Config6_rw_bitmask; +#define CP0C6_BPPASS 31 +#define CP0C6_KPOS 24 +#define CP0C6_KE 23 +#define CP0C6_VTLBONLY 22 +#define CP0C6_LASX 21 +#define CP0C6_SSEN 20 +#define CP0C6_DISDRTIME 19 +#define CP0C6_PIXNUEN 18 +#define CP0C6_SCRAND 17 +#define CP0C6_LLEXCEN 16 +#define CP0C6_DISVC 15 +#define CP0C6_VCLRU 14 +#define CP0C6_DCLRU 13 +#define CP0C6_PIXUEN 12 +#define CP0C6_DISBLKLYEN 11 +#define CP0C6_UMEMUALEN 10 +#define CP0C6_SFBEN 8 +#define CP0C6_FLTINT 7 +#define CP0C6_VLTINT 6 +#define CP0C6_DISBTB 5 +#define CP0C6_STPREFCTL 2 +#define CP0C6_INSTPREF 1 +#define CP0C6_DATAPREF 0 int32_t CP0_Config7; + int64_t CP0_Config7_rw_bitmask; +#define CP0C7_NAPCGEN 2 +#define CP0C7_UNIMUEN 1 +#define CP0C7_VFPUCGEN 0 uint64_t CP0_LLAddr; uint64_t CP0_MAAR[MIPS_MAAR_MAX]; int32_t CP0_MAARI; diff --git a/qemu/target/mips/fpu_helper.c b/qemu/target/mips/fpu_helper.c index 34431468af..027d8c0fa9 100644 --- a/qemu/target/mips/fpu_helper.c +++ b/qemu/target/mips/fpu_helper.c @@ -28,7 +28,6 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/memop.h" -//#include "sysemu/kvm.h" #include "fpu/softfloat.h" @@ -188,43 +187,48 @@ void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t fs, uint32_t rt) } } -int ieee_ex_to_mips(int xcpt) +static inline int ieee_to_mips_xcpt(int ieee_xcpt) { - int ret = 0; - if (xcpt) { - if (xcpt & float_flag_invalid) { - ret |= FP_INVALID; - } - if (xcpt & float_flag_overflow) { - ret |= FP_OVERFLOW; - } - if (xcpt & float_flag_underflow) { - ret |= FP_UNDERFLOW; - } - if (xcpt & float_flag_divbyzero) { - ret |= FP_DIV0; - } - if (xcpt & float_flag_inexact) { - ret |= FP_INEXACT; - } + int mips_xcpt = 0; + + if (ieee_xcpt & float_flag_invalid) { + mips_xcpt |= FP_INVALID; + } + if (ieee_xcpt & float_flag_overflow) { + mips_xcpt |= FP_OVERFLOW; + } + if (ieee_xcpt & float_flag_underflow) { + mips_xcpt |= FP_UNDERFLOW; } - return ret; + if (ieee_xcpt & float_flag_divbyzero) { + mips_xcpt |= FP_DIV0; + } + if (ieee_xcpt & float_flag_inexact) { + mips_xcpt |= FP_INEXACT; + } + + return mips_xcpt; } static inline void update_fcr31(CPUMIPSState *env, uintptr_t pc) { - int tmp = ieee_ex_to_mips(get_float_exception_flags( - &env->active_fpu.fp_status)); + int ieee_exception_flags = get_float_exception_flags( + &env->active_fpu.fp_status); + int mips_exception_flags = 0; - SET_FP_CAUSE(env->active_fpu.fcr31, tmp); + if (ieee_exception_flags) { + mips_exception_flags = ieee_to_mips_xcpt(ieee_exception_flags); + } - if (tmp) { + SET_FP_CAUSE(env->active_fpu.fcr31, mips_exception_flags); + + if (mips_exception_flags) { set_float_exception_flags(0, &env->active_fpu.fp_status); - if (GET_FP_ENABLE(env->active_fpu.fcr31) & tmp) { + if (GET_FP_ENABLE(env->active_fpu.fcr31) & mips_exception_flags) { do_raise_exception(env, EXCP_FPE, pc); } else { - UPDATE_FP_FLAGS(env->active_fpu.fcr31, tmp); + UPDATE_FP_FLAGS(env->active_fpu.fcr31, mips_exception_flags); } } } @@ -1058,14 +1062,14 @@ uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0) uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0) { - uint32_t fst2; + uint32_t fstl2; uint32_t fsth2; - fst2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF, - &env->active_fpu.fp_status); + fstl2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF, + &env->active_fpu.fp_status); fsth2 = float32_div(float32_one, fdt0 >> 32, &env->active_fpu.fp_status); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0) @@ -1090,31 +1094,34 @@ uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0) uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0) { - uint32_t fst2; + uint32_t fstl2; uint32_t fsth2; - fst2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status); + fstl2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status); fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status); - fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status); + fstl2 = float32_div(float32_one, fstl2, &env->active_fpu.fp_status); fsth2 = float32_div(float32_one, fsth2, &env->active_fpu.fp_status); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } -#define FLOAT_RINT(name, bits) \ -uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \ - uint ## bits ## _t fs) \ -{ \ - uint ## bits ## _t fdret; \ - \ - fdret = float ## bits ## _round_to_int(fs, &env->active_fpu.fp_status); \ - update_fcr31(env, GETPC()); \ - return fdret; \ +uint64_t helper_float_rint_d(CPUMIPSState *env, uint64_t fs) +{ + uint64_t fdret; + + fdret = float64_round_to_int(fs, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return fdret; } -FLOAT_RINT(rint_s, 32) -FLOAT_RINT(rint_d, 64) -#undef FLOAT_RINT +uint32_t helper_float_rint_s(CPUMIPSState *env, uint32_t fs) +{ + uint32_t fdret; + + fdret = float32_round_to_int(fs, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return fdret; +} #define FLOAT_CLASS_SIGNALING_NAN 0x001 #define FLOAT_CLASS_QUIET_NAN 0x002 @@ -1127,91 +1134,220 @@ FLOAT_RINT(rint_d, 64) #define FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100 #define FLOAT_CLASS_POSITIVE_ZERO 0x200 -#define FLOAT_CLASS(name, bits) \ -uint ## bits ## _t float_ ## name(uint ## bits ## _t arg, \ - float_status *status) \ -{ \ - if (float ## bits ## _is_signaling_nan(arg, status)) { \ - return FLOAT_CLASS_SIGNALING_NAN; \ - } else if (float ## bits ## _is_quiet_nan(arg, status)) { \ - return FLOAT_CLASS_QUIET_NAN; \ - } else if (float ## bits ## _is_neg(arg)) { \ - if (float ## bits ## _is_infinity(arg)) { \ - return FLOAT_CLASS_NEGATIVE_INFINITY; \ - } else if (float ## bits ## _is_zero(arg)) { \ - return FLOAT_CLASS_NEGATIVE_ZERO; \ - } else if (float ## bits ## _is_zero_or_denormal(arg)) { \ - return FLOAT_CLASS_NEGATIVE_SUBNORMAL; \ - } else { \ - return FLOAT_CLASS_NEGATIVE_NORMAL; \ - } \ - } else { \ - if (float ## bits ## _is_infinity(arg)) { \ - return FLOAT_CLASS_POSITIVE_INFINITY; \ - } else if (float ## bits ## _is_zero(arg)) { \ - return FLOAT_CLASS_POSITIVE_ZERO; \ - } else if (float ## bits ## _is_zero_or_denormal(arg)) { \ - return FLOAT_CLASS_POSITIVE_SUBNORMAL; \ - } else { \ - return FLOAT_CLASS_POSITIVE_NORMAL; \ - } \ - } \ -} \ - \ -uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \ - uint ## bits ## _t arg) \ -{ \ - return float_ ## name(arg, &env->active_fpu.fp_status); \ -} - -FLOAT_CLASS(class_s, 32) -FLOAT_CLASS(class_d, 64) -#undef FLOAT_CLASS +uint64_t float_class_d(uint64_t arg, float_status *status) +{ + if (float64_is_signaling_nan(arg, status)) { + return FLOAT_CLASS_SIGNALING_NAN; + } else if (float64_is_quiet_nan(arg, status)) { + return FLOAT_CLASS_QUIET_NAN; + } else if (float64_is_neg(arg)) { + if (float64_is_infinity(arg)) { + return FLOAT_CLASS_NEGATIVE_INFINITY; + } else if (float64_is_zero(arg)) { + return FLOAT_CLASS_NEGATIVE_ZERO; + } else if (float64_is_zero_or_denormal(arg)) { + return FLOAT_CLASS_NEGATIVE_SUBNORMAL; + } else { + return FLOAT_CLASS_NEGATIVE_NORMAL; + } + } else { + if (float64_is_infinity(arg)) { + return FLOAT_CLASS_POSITIVE_INFINITY; + } else if (float64_is_zero(arg)) { + return FLOAT_CLASS_POSITIVE_ZERO; + } else if (float64_is_zero_or_denormal(arg)) { + return FLOAT_CLASS_POSITIVE_SUBNORMAL; + } else { + return FLOAT_CLASS_POSITIVE_NORMAL; + } + } +} + +uint64_t helper_float_class_d(CPUMIPSState *env, uint64_t arg) +{ + return float_class_d(arg, &env->active_fpu.fp_status); +} + +uint32_t float_class_s(uint32_t arg, float_status *status) +{ + if (float32_is_signaling_nan(arg, status)) { + return FLOAT_CLASS_SIGNALING_NAN; + } else if (float32_is_quiet_nan(arg, status)) { + return FLOAT_CLASS_QUIET_NAN; + } else if (float32_is_neg(arg)) { + if (float32_is_infinity(arg)) { + return FLOAT_CLASS_NEGATIVE_INFINITY; + } else if (float32_is_zero(arg)) { + return FLOAT_CLASS_NEGATIVE_ZERO; + } else if (float32_is_zero_or_denormal(arg)) { + return FLOAT_CLASS_NEGATIVE_SUBNORMAL; + } else { + return FLOAT_CLASS_NEGATIVE_NORMAL; + } + } else { + if (float32_is_infinity(arg)) { + return FLOAT_CLASS_POSITIVE_INFINITY; + } else if (float32_is_zero(arg)) { + return FLOAT_CLASS_POSITIVE_ZERO; + } else if (float32_is_zero_or_denormal(arg)) { + return FLOAT_CLASS_POSITIVE_SUBNORMAL; + } else { + return FLOAT_CLASS_POSITIVE_NORMAL; + } + } +} + +uint32_t helper_float_class_s(CPUMIPSState *env, uint32_t arg) +{ + return float_class_s(arg, &env->active_fpu.fp_status); +} /* binary operations */ -#define FLOAT_BINOP(name) \ -uint64_t helper_float_ ## name ## _d(CPUMIPSState *env, \ - uint64_t fdt0, uint64_t fdt1) \ -{ \ - uint64_t dt2; \ - \ - dt2 = float64_ ## name(fdt0, fdt1, &env->active_fpu.fp_status);\ - update_fcr31(env, GETPC()); \ - return dt2; \ -} \ - \ -uint32_t helper_float_ ## name ## _s(CPUMIPSState *env, \ - uint32_t fst0, uint32_t fst1) \ -{ \ - uint32_t wt2; \ - \ - wt2 = float32_ ## name(fst0, fst1, &env->active_fpu.fp_status);\ - update_fcr31(env, GETPC()); \ - return wt2; \ -} \ - \ -uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env, \ - uint64_t fdt0, \ - uint64_t fdt1) \ -{ \ - uint32_t fst0 = fdt0 & 0XFFFFFFFF; \ - uint32_t fsth0 = fdt0 >> 32; \ - uint32_t fst1 = fdt1 & 0XFFFFFFFF; \ - uint32_t fsth1 = fdt1 >> 32; \ - uint32_t wt2; \ - uint32_t wth2; \ - \ - wt2 = float32_ ## name(fst0, fst1, &env->active_fpu.fp_status); \ - wth2 = float32_ ## name(fsth0, fsth1, &env->active_fpu.fp_status); \ - update_fcr31(env, GETPC()); \ - return ((uint64_t)wth2 << 32) | wt2; \ -} - -FLOAT_BINOP(add) -FLOAT_BINOP(sub) -FLOAT_BINOP(mul) -FLOAT_BINOP(div) -#undef FLOAT_BINOP + +uint64_t helper_float_add_d(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint64_t dt2; + + dt2 = float64_add(fdt0, fdt1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return dt2; +} + +uint32_t helper_float_add_s(CPUMIPSState *env, + uint32_t fst0, uint32_t fst1) +{ + uint32_t wt2; + + wt2 = float32_add(fst0, fst1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return wt2; +} + +uint64_t helper_float_add_ps(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t wtl2; + uint32_t wth2; + + wtl2 = float32_add(fstl0, fstl1, &env->active_fpu.fp_status); + wth2 = float32_add(fsth0, fsth1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return ((uint64_t)wth2 << 32) | wtl2; +} + +uint64_t helper_float_sub_d(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint64_t dt2; + + dt2 = float64_sub(fdt0, fdt1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return dt2; +} + +uint32_t helper_float_sub_s(CPUMIPSState *env, + uint32_t fst0, uint32_t fst1) +{ + uint32_t wt2; + + wt2 = float32_sub(fst0, fst1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return wt2; +} + +uint64_t helper_float_sub_ps(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t wtl2; + uint32_t wth2; + + wtl2 = float32_sub(fstl0, fstl1, &env->active_fpu.fp_status); + wth2 = float32_sub(fsth0, fsth1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return ((uint64_t)wth2 << 32) | wtl2; +} + +uint64_t helper_float_mul_d(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint64_t dt2; + + dt2 = float64_mul(fdt0, fdt1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return dt2; +} + +uint32_t helper_float_mul_s(CPUMIPSState *env, + uint32_t fst0, uint32_t fst1) +{ + uint32_t wt2; + + wt2 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return wt2; +} + +uint64_t helper_float_mul_ps(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t wtl2; + uint32_t wth2; + + wtl2 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + wth2 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return ((uint64_t)wth2 << 32) | wtl2; +} + +uint64_t helper_float_div_d(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint64_t dt2; + + dt2 = float64_div(fdt0, fdt1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return dt2; +} + +uint32_t helper_float_div_s(CPUMIPSState *env, + uint32_t fst0, uint32_t fst1) +{ + uint32_t wt2; + + wt2 = float32_div(fst0, fst1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return wt2; +} + +uint64_t helper_float_div_ps(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t wtl2; + uint32_t wth2; + + wtl2 = float32_div(fstl0, fstl1, &env->active_fpu.fp_status); + wth2 = float32_div(fsth0, fsth1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return ((uint64_t)wth2 << 32) | wtl2; +} + /* MIPS specific binary operations */ uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2) @@ -1234,19 +1370,19 @@ uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2) uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2) { - uint32_t fst0 = fdt0 & 0XFFFFFFFF; + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; uint32_t fsth0 = fdt0 >> 32; - uint32_t fst2 = fdt2 & 0XFFFFFFFF; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; uint32_t fsth2 = fdt2 >> 32; - fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status); + fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status); fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status); - fst2 = float32_chs(float32_sub(fst2, float32_one, + fstl2 = float32_chs(float32_sub(fstl2, float32_one, &env->active_fpu.fp_status)); fsth2 = float32_chs(float32_sub(fsth2, float32_one, &env->active_fpu.fp_status)); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2) @@ -1271,51 +1407,51 @@ uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2) uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2) { - uint32_t fst0 = fdt0 & 0XFFFFFFFF; + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; uint32_t fsth0 = fdt0 >> 32; - uint32_t fst2 = fdt2 & 0XFFFFFFFF; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; uint32_t fsth2 = fdt2 >> 32; - fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status); + fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status); fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status); - fst2 = float32_sub(fst2, float32_one, &env->active_fpu.fp_status); + fstl2 = float32_sub(fstl2, float32_one, &env->active_fpu.fp_status); fsth2 = float32_sub(fsth2, float32_one, &env->active_fpu.fp_status); - fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, + fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32, &env->active_fpu.fp_status)); fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, &env->active_fpu.fp_status)); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1) { - uint32_t fst0 = fdt0 & 0XFFFFFFFF; + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; uint32_t fsth0 = fdt0 >> 32; - uint32_t fst1 = fdt1 & 0XFFFFFFFF; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; uint32_t fsth1 = fdt1 >> 32; - uint32_t fst2; + uint32_t fstl2; uint32_t fsth2; - fst2 = float32_add(fst0, fsth0, &env->active_fpu.fp_status); - fsth2 = float32_add(fst1, fsth1, &env->active_fpu.fp_status); + fstl2 = float32_add(fstl0, fsth0, &env->active_fpu.fp_status); + fsth2 = float32_add(fstl1, fsth1, &env->active_fpu.fp_status); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1) { - uint32_t fst0 = fdt0 & 0XFFFFFFFF; + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; uint32_t fsth0 = fdt0 >> 32; - uint32_t fst1 = fdt1 & 0XFFFFFFFF; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; uint32_t fsth1 = fdt1 >> 32; - uint32_t fst2; + uint32_t fstl2; uint32_t fsth2; - fst2 = float32_mul(fst0, fsth0, &env->active_fpu.fp_status); - fsth2 = float32_mul(fst1, fsth1, &env->active_fpu.fp_status); + fstl2 = float32_mul(fstl0, fsth0, &env->active_fpu.fp_status); + fsth2 = float32_mul(fstl1, fsth1, &env->active_fpu.fp_status); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } #define FLOAT_MINMAX(name, bits, minmaxfunc) \ @@ -1343,60 +1479,171 @@ FLOAT_MINMAX(mina_d, 64, minnummag) #undef FLOAT_MINMAX /* ternary operations */ -#define UNFUSED_FMA(prefix, a, b, c, flags) \ -{ \ - a = prefix##_mul(a, b, &env->active_fpu.fp_status); \ - if ((flags) & float_muladd_negate_c) { \ - a = prefix##_sub(a, c, &env->active_fpu.fp_status); \ - } else { \ - a = prefix##_add(a, c, &env->active_fpu.fp_status); \ - } \ - if ((flags) & float_muladd_negate_result) { \ - a = prefix##_chs(a); \ - } \ -} - -/* FMA based operations */ -#define FLOAT_FMA(name, type) \ -uint64_t helper_float_ ## name ## _d(CPUMIPSState *env, \ - uint64_t fdt0, uint64_t fdt1, \ - uint64_t fdt2) \ -{ \ - UNFUSED_FMA(float64, fdt0, fdt1, fdt2, type); \ - update_fcr31(env, GETPC()); \ - return fdt0; \ -} \ - \ -uint32_t helper_float_ ## name ## _s(CPUMIPSState *env, \ - uint32_t fst0, uint32_t fst1, \ - uint32_t fst2) \ -{ \ - UNFUSED_FMA(float32, fst0, fst1, fst2, type); \ - update_fcr31(env, GETPC()); \ - return fst0; \ -} \ - \ -uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env, \ - uint64_t fdt0, uint64_t fdt1, \ - uint64_t fdt2) \ -{ \ - uint32_t fst0 = fdt0 & 0XFFFFFFFF; \ - uint32_t fsth0 = fdt0 >> 32; \ - uint32_t fst1 = fdt1 & 0XFFFFFFFF; \ - uint32_t fsth1 = fdt1 >> 32; \ - uint32_t fst2 = fdt2 & 0XFFFFFFFF; \ - uint32_t fsth2 = fdt2 >> 32; \ - \ - UNFUSED_FMA(float32, fst0, fst1, fst2, type); \ - UNFUSED_FMA(float32, fsth0, fsth1, fsth2, type); \ - update_fcr31(env, GETPC()); \ - return ((uint64_t)fsth0 << 32) | fst0; \ -} -FLOAT_FMA(madd, 0) -FLOAT_FMA(msub, float_muladd_negate_c) -FLOAT_FMA(nmadd, float_muladd_negate_result) -FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c) -#undef FLOAT_FMA + +uint64_t helper_float_madd_d(CPUMIPSState *env, uint64_t fst0, + uint64_t fst1, uint64_t fst2) +{ + fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint32_t helper_float_madd_s(CPUMIPSState *env, uint32_t fst0, + uint32_t fst1, uint32_t fst2) +{ + fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint64_t helper_float_madd_ps(CPUMIPSState *env, uint64_t fdt0, + uint64_t fdt1, uint64_t fdt2) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; + uint32_t fsth2 = fdt2 >> 32; + + fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status); + fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return ((uint64_t)fsth0 << 32) | fstl0; +} + +uint64_t helper_float_msub_d(CPUMIPSState *env, uint64_t fst0, + uint64_t fst1, uint64_t fst2) +{ + fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint32_t helper_float_msub_s(CPUMIPSState *env, uint32_t fst0, + uint32_t fst1, uint32_t fst2) +{ + fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint64_t helper_float_msub_ps(CPUMIPSState *env, uint64_t fdt0, + uint64_t fdt1, uint64_t fdt2) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; + uint32_t fsth2 = fdt2 >> 32; + + fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status); + fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return ((uint64_t)fsth0 << 32) | fstl0; +} + +uint64_t helper_float_nmadd_d(CPUMIPSState *env, uint64_t fst0, + uint64_t fst1, uint64_t fst2) +{ + fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status); + fst0 = float64_chs(fst0); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint32_t helper_float_nmadd_s(CPUMIPSState *env, uint32_t fst0, + uint32_t fst1, uint32_t fst2) +{ + fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status); + fst0 = float32_chs(fst0); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint64_t helper_float_nmadd_ps(CPUMIPSState *env, uint64_t fdt0, + uint64_t fdt1, uint64_t fdt2) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; + uint32_t fsth2 = fdt2 >> 32; + + fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status); + fstl0 = float32_chs(fstl0); + fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status); + fsth0 = float32_chs(fsth0); + + update_fcr31(env, GETPC()); + return ((uint64_t)fsth0 << 32) | fstl0; +} + +uint64_t helper_float_nmsub_d(CPUMIPSState *env, uint64_t fst0, + uint64_t fst1, uint64_t fst2) +{ + fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status); + fst0 = float64_chs(fst0); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint32_t helper_float_nmsub_s(CPUMIPSState *env, uint32_t fst0, + uint32_t fst1, uint32_t fst2) +{ + fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status); + fst0 = float32_chs(fst0); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint64_t helper_float_nmsub_ps(CPUMIPSState *env, uint64_t fdt0, + uint64_t fdt1, uint64_t fdt2) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; + uint32_t fsth2 = fdt2 >> 32; + + fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status); + fstl0 = float32_chs(fstl0); + fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status); + fsth0 = float32_chs(fsth0); + + update_fcr31(env, GETPC()); + return ((uint64_t)fsth0 << 32) | fstl0; +} + #define FLOAT_FMADDSUB(name, bits, muladd_arg) \ uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \ diff --git a/qemu/target/mips/helper.c b/qemu/target/mips/helper.c index 3c2ba8cec0..f407873180 100644 --- a/qemu/target/mips/helper.c +++ b/qemu/target/mips/helper.c @@ -68,6 +68,7 @@ int fixed_mmu_map_address(CPUMIPSState *env, hwaddr *physical, int *prot, int r4k_map_address(CPUMIPSState *env, hwaddr *physical, int *prot, target_ulong address, int rw, int access_type) { + struct uc_struct *uc = env->uc; uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask; uint32_t MMID = env->CP0_MemoryMapID; bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1); @@ -461,6 +462,7 @@ void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val) static void raise_mmu_exception(CPUMIPSState *env, target_ulong address, int rw, int tlb_error) { + struct uc_struct *uc = env->uc; CPUState *cs = env_cpu(env); int exception = 0, error_code = 0; @@ -903,6 +905,7 @@ bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size, { MIPSCPU *cpu = MIPS_CPU(cs); CPUMIPSState *env = &cpu->env; + struct uc_struct *uc = env->uc; hwaddr physical; int prot; int mips_access_type; @@ -1424,6 +1427,7 @@ bool mips_cpu_exec_interrupt(CPUState *cs, int interrupt_request) void r4k_invalidate_tlb(CPUMIPSState *env, int idx, int use_extra) { + struct uc_struct *uc = env->uc; CPUState *cs = env_cpu(env); r4k_tlb_t *tlb; target_ulong addr; diff --git a/qemu/target/mips/helper.h b/qemu/target/mips/helper.h index 221e78257b..012f867e59 100644 --- a/qemu/target/mips/helper.h +++ b/qemu/target/mips/helper.h @@ -945,6 +945,21 @@ DEF_HELPER_4(msa_mod_s_h, void, env, i32, i32, i32) DEF_HELPER_4(msa_mod_s_w, void, env, i32, i32, i32) DEF_HELPER_4(msa_mod_s_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_maddv_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_maddv_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_maddv_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_maddv_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_msubv_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_msubv_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_msubv_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_msubv_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_mulv_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_mulv_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_mulv_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_mulv_d, void, env, i32, i32, i32) + DEF_HELPER_4(msa_asub_s_b, void, env, i32, i32, i32) DEF_HELPER_4(msa_asub_s_h, void, env, i32, i32, i32) DEF_HELPER_4(msa_asub_s_w, void, env, i32, i32, i32) @@ -963,6 +978,31 @@ DEF_HELPER_4(msa_hsub_u_h, void, env, i32, i32, i32) DEF_HELPER_4(msa_hsub_u_w, void, env, i32, i32, i32) DEF_HELPER_4(msa_hsub_u_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_s_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_s_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_subs_u_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_u_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_subsus_u_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsus_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsus_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsus_u_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_subsuu_s_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsuu_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsuu_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsuu_s_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_subv_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subv_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subv_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subv_d, void, env, i32, i32, i32) + DEF_HELPER_4(msa_ilvev_b, void, env, i32, i32, i32) DEF_HELPER_4(msa_ilvev_h, void, env, i32, i32, i32) DEF_HELPER_4(msa_ilvev_w, void, env, i32, i32, i32) @@ -1058,20 +1098,25 @@ DEF_HELPER_5(msa_srlri_df, void, env, i32, i32, i32, i32) DEF_HELPER_5(msa_binsl_df, void, env, i32, i32, i32, i32) DEF_HELPER_5(msa_binsr_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subv_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subs_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subs_u_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subsus_u_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subsuu_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_mulv_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_maddv_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_msubv_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dotp_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dotp_u_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dpadd_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dpadd_u_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dpsub_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dpsub_u_df, void, env, i32, i32, i32, i32) + +DEF_HELPER_4(msa_dotp_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_s_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_u_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_s_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_u_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_s_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_u_d, void, env, i32, i32, i32) DEF_HELPER_5(msa_sld_df, void, env, i32, i32, i32, i32) DEF_HELPER_5(msa_splat_df, void, env, i32, i32, i32, i32) DEF_HELPER_5(msa_vshf_df, void, env, i32, i32, i32, i32) diff --git a/qemu/target/mips/internal.h b/qemu/target/mips/internal.h index 6978801d9e..2d6032ceff 100644 --- a/qemu/target/mips/internal.h +++ b/qemu/target/mips/internal.h @@ -39,7 +39,9 @@ struct mips_def_t { int32_t CP0_Config5; int32_t CP0_Config5_rw_bitmask; int32_t CP0_Config6; + int32_t CP0_Config6_rw_bitmask; int32_t CP0_Config7; + int32_t CP0_Config7_rw_bitmask; target_ulong CP0_LLAddr_rw_bitmask; int CP0_LLAddr_shift; int32_t SYNCI_Step; @@ -217,7 +219,6 @@ uint32_t float_class_s(uint32_t arg, float_status *fst); uint64_t float_class_d(uint64_t arg, float_status *fst); extern unsigned int ieee_rm[]; -int ieee_ex_to_mips(int xcpt); void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask); static inline void restore_rounding_mode(CPUMIPSState *env) diff --git a/qemu/target/mips/mips-defs.h b/qemu/target/mips/mips-defs.h index a831bb4384..ed6a7a9e54 100644 --- a/qemu/target/mips/mips-defs.h +++ b/qemu/target/mips/mips-defs.h @@ -15,7 +15,7 @@ * ------------------------------------------------ */ /* - * bits 0-31: MIPS base instruction sets + * bits 0-23: MIPS base instruction sets */ #define ISA_MIPS1 0x0000000000000001ULL #define ISA_MIPS2 0x0000000000000002ULL @@ -34,30 +34,37 @@ #define ISA_MIPS64R6 0x0000000000004000ULL #define ISA_NANOMIPS32 0x0000000000008000ULL /* - * bits 32-47: MIPS ASEs + * bits 24-39: MIPS ASEs */ -#define ASE_MIPS16 0x0000000100000000ULL -#define ASE_MIPS3D 0x0000000200000000ULL -#define ASE_MDMX 0x0000000400000000ULL -#define ASE_DSP 0x0000000800000000ULL -#define ASE_DSP_R2 0x0000001000000000ULL -#define ASE_DSP_R3 0x0000002000000000ULL -#define ASE_MT 0x0000004000000000ULL -#define ASE_SMARTMIPS 0x0000008000000000ULL -#define ASE_MICROMIPS 0x0000010000000000ULL -#define ASE_MSA 0x0000020000000000ULL +#define ASE_MIPS16 0x0000000001000000ULL +#define ASE_MIPS3D 0x0000000002000000ULL +#define ASE_MDMX 0x0000000004000000ULL +#define ASE_DSP 0x0000000008000000ULL +#define ASE_DSP_R2 0x0000000010000000ULL +#define ASE_DSP_R3 0x0000000020000000ULL +#define ASE_MT 0x0000000040000000ULL +#define ASE_SMARTMIPS 0x0000000080000000ULL +#define ASE_MICROMIPS 0x0000000100000000ULL +#define ASE_MSA 0x0000000200000000ULL /* - * bits 48-55: vendor-specific base instruction sets + * bits 40-51: vendor-specific base instruction sets */ -#define INSN_LOONGSON2E 0x0001000000000000ULL -#define INSN_LOONGSON2F 0x0002000000000000ULL -#define INSN_VR54XX 0x0004000000000000ULL -#define INSN_R5900 0x0008000000000000ULL +#define INSN_VR54XX 0x0000010000000000ULL +#define INSN_R5900 0x0000020000000000ULL +#define INSN_LOONGSON2E 0x0000040000000000ULL +#define INSN_LOONGSON2F 0x0000080000000000ULL +#define INSN_LOONGSON3A 0x0000100000000000ULL /* - * bits 56-63: vendor-specific ASEs + * bits 52-63: vendor-specific ASEs */ -#define ASE_MMI 0x0100000000000000ULL -#define ASE_MXU 0x0200000000000000ULL +/* MultiMedia Instructions defined by R5900 */ +#define ASE_MMI 0x0010000000000000ULL +/* MIPS eXtension/enhanced Unit defined by Ingenic */ +#define ASE_MXU 0x0020000000000000ULL +/* Loongson MultiMedia Instructions */ +#define ASE_LMMI 0x0040000000000000ULL +/* Loongson EXTensions */ +#define ASE_LEXT 0x0080000000000000ULL /* MIPS CPU defines. */ #define CPU_MIPS1 (ISA_MIPS1) @@ -67,7 +74,7 @@ #define CPU_VR54XX (CPU_MIPS4 | INSN_VR54XX) #define CPU_R5900 (CPU_MIPS3 | INSN_R5900) #define CPU_LOONGSON2E (CPU_MIPS3 | INSN_LOONGSON2E) -#define CPU_LOONGSON2F (CPU_MIPS3 | INSN_LOONGSON2F) +#define CPU_LOONGSON2F (CPU_MIPS3 | INSN_LOONGSON2F | ASE_LMMI) #define CPU_MIPS5 (CPU_MIPS4 | ISA_MIPS5) @@ -94,6 +101,8 @@ /* Wave Computing: "nanoMIPS" */ #define CPU_NANOMIPS32 (CPU_MIPS32R6 | ISA_NANOMIPS32) +#define CPU_LOONGSON3A (CPU_MIPS64R2 | INSN_LOONGSON3A | ASE_LMMI | ASE_LEXT) + /* * Strictly follow the architecture standard: * - Disallow "special" instruction handling for PMON/SPIM. diff --git a/qemu/target/mips/msa_helper.c b/qemu/target/mips/msa_helper.c index c3501927ce..e83c899a93 100644 --- a/qemu/target/mips/msa_helper.c +++ b/qemu/target/mips/msa_helper.c @@ -2232,7 +2232,339 @@ void helper_msa_div_u_d(CPUMIPSState *env, * +---------------+----------------------------------------------------------+ */ -/* TODO: insert Int Dot Product group helpers here */ +#define SIGNED_EXTRACT(e, o, a, df) \ + do { \ + e = SIGNED_EVEN(a, df); \ + o = SIGNED_ODD(a, df); \ + } while (0) + +#define UNSIGNED_EXTRACT(e, o, a, df) \ + do { \ + e = UNSIGNED_EVEN(a, df); \ + o = UNSIGNED_ODD(a, df); \ + } while (0) + + +static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); +} + +void helper_msa_dotp_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dotp_s_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dotp_s_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dotp_s_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dotp_s_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dotp_s_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dotp_s_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dotp_s_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dotp_s_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_dotp_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dotp_s_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dotp_s_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dotp_s_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dotp_s_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_dotp_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dotp_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dotp_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); +} + +void helper_msa_dotp_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dotp_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dotp_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dotp_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dotp_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dotp_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dotp_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dotp_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dotp_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_dotp_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dotp_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dotp_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dotp_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dotp_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_dotp_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dotp_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dotp_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); +} + +void helper_msa_dpadd_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dpadd_s_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dpadd_s_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dpadd_s_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dpadd_s_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dpadd_s_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dpadd_s_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dpadd_s_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dpadd_s_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_dpadd_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dpadd_s_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dpadd_s_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dpadd_s_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dpadd_s_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_dpadd_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); +} + +void helper_msa_dpadd_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dpadd_u_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dpadd_u_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dpadd_u_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dpadd_u_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dpadd_u_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dpadd_u_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dpadd_u_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dpadd_u_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_dpadd_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dpadd_u_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dpadd_u_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dpadd_u_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dpadd_u_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_dpadd_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2)); +} + +void helper_msa_dpsub_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dpsub_s_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dpsub_s_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dpsub_s_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dpsub_s_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dpsub_s_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dpsub_s_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dpsub_s_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dpsub_s_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_dpsub_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dpsub_s_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dpsub_s_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dpsub_s_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dpsub_s_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_dpsub_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2)); +} + +void helper_msa_dpsub_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dpsub_u_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dpsub_u_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dpsub_u_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dpsub_u_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dpsub_u_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dpsub_u_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dpsub_u_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dpsub_u_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_dpsub_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dpsub_u_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dpsub_u_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dpsub_u_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dpsub_u_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_dpsub_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} /* @@ -2891,36 +3223,250 @@ void helper_msa_mod_u_d(CPUMIPSState *env, * +---------------+----------------------------------------------------------+ */ -/* TODO: insert Int Multiply group helpers here */ +static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + return dest + arg1 * arg2; +} +void helper_msa_maddv_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); -/* - * Int Subtract - * ------------ - * - * +---------------+----------------------------------------------------------+ - * | ASUB_S.B | Vector Absolute Values of Signed Subtract (byte) | - * | ASUB_S.H | Vector Absolute Values of Signed Subtract (halfword) | - * | ASUB_S.W | Vector Absolute Values of Signed Subtract (word) | - * | ASUB_S.D | Vector Absolute Values of Signed Subtract (doubleword) | - * | ASUB_U.B | Vector Absolute Values of Unsigned Subtract (byte) | - * | ASUB_U.H | Vector Absolute Values of Unsigned Subtract (halfword) | - * | ASUB_U.W | Vector Absolute Values of Unsigned Subtract (word) | - * | ASUB_U.D | Vector Absolute Values of Unsigned Subtract (doubleword) | - * | HSUB_S.H | Vector Signed Horizontal Subtract (halfword) | - * | HSUB_S.W | Vector Signed Horizontal Subtract (word) | - * | HSUB_S.D | Vector Signed Horizontal Subtract (doubleword) | - * | HSUB_U.H | Vector Unigned Horizontal Subtract (halfword) | - * | HSUB_U.W | Vector Unigned Horizontal Subtract (word) | - * | HSUB_U.D | Vector Unigned Horizontal Subtract (doubleword) | - * | SUBS_S.B | Vector Signed Saturated Subtract (of Signed) (byte) | - * | SUBS_S.H | Vector Signed Saturated Subtract (of Signed) (halfword) | - * | SUBS_S.W | Vector Signed Saturated Subtract (of Signed) (word) | - * | SUBS_S.D | Vector Signed Saturated Subtract (of Signed) (doubleword)| - * | SUBS_U.B | Vector Unsigned Saturated Subtract (of Uns.) (byte) | - * | SUBS_U.H | Vector Unsigned Saturated Subtract (of Uns.) (halfword) | - * | SUBS_U.W | Vector Unsigned Saturated Subtract (of Uns.) (word) | - * | SUBS_U.D | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)| + pwd->b[0] = msa_maddv_df(DF_BYTE, pwt->b[0], pws->b[0], pwt->b[0]); + pwd->b[1] = msa_maddv_df(DF_BYTE, pwt->b[1], pws->b[1], pwt->b[1]); + pwd->b[2] = msa_maddv_df(DF_BYTE, pwt->b[2], pws->b[2], pwt->b[2]); + pwd->b[3] = msa_maddv_df(DF_BYTE, pwt->b[3], pws->b[3], pwt->b[3]); + pwd->b[4] = msa_maddv_df(DF_BYTE, pwt->b[4], pws->b[4], pwt->b[4]); + pwd->b[5] = msa_maddv_df(DF_BYTE, pwt->b[5], pws->b[5], pwt->b[5]); + pwd->b[6] = msa_maddv_df(DF_BYTE, pwt->b[6], pws->b[6], pwt->b[6]); + pwd->b[7] = msa_maddv_df(DF_BYTE, pwt->b[7], pws->b[7], pwt->b[7]); + pwd->b[8] = msa_maddv_df(DF_BYTE, pwt->b[8], pws->b[8], pwt->b[8]); + pwd->b[9] = msa_maddv_df(DF_BYTE, pwt->b[9], pws->b[9], pwt->b[9]); + pwd->b[10] = msa_maddv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]); + pwd->b[11] = msa_maddv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]); + pwd->b[12] = msa_maddv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]); + pwd->b[13] = msa_maddv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]); + pwd->b[14] = msa_maddv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]); + pwd->b[15] = msa_maddv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]); +} + +void helper_msa_maddv_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_maddv_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_maddv_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_maddv_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_maddv_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_maddv_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_maddv_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_maddv_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_maddv_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_maddv_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_maddv_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_maddv_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_maddv_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_maddv_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_maddv_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_maddv_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_maddv_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + +static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + return dest - arg1 * arg2; +} + +void helper_msa_msubv_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_msubv_df(DF_BYTE, pwt->b[0], pws->b[0], pwt->b[0]); + pwd->b[1] = msa_msubv_df(DF_BYTE, pwt->b[1], pws->b[1], pwt->b[1]); + pwd->b[2] = msa_msubv_df(DF_BYTE, pwt->b[2], pws->b[2], pwt->b[2]); + pwd->b[3] = msa_msubv_df(DF_BYTE, pwt->b[3], pws->b[3], pwt->b[3]); + pwd->b[4] = msa_msubv_df(DF_BYTE, pwt->b[4], pws->b[4], pwt->b[4]); + pwd->b[5] = msa_msubv_df(DF_BYTE, pwt->b[5], pws->b[5], pwt->b[5]); + pwd->b[6] = msa_msubv_df(DF_BYTE, pwt->b[6], pws->b[6], pwt->b[6]); + pwd->b[7] = msa_msubv_df(DF_BYTE, pwt->b[7], pws->b[7], pwt->b[7]); + pwd->b[8] = msa_msubv_df(DF_BYTE, pwt->b[8], pws->b[8], pwt->b[8]); + pwd->b[9] = msa_msubv_df(DF_BYTE, pwt->b[9], pws->b[9], pwt->b[9]); + pwd->b[10] = msa_msubv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]); + pwd->b[11] = msa_msubv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]); + pwd->b[12] = msa_msubv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]); + pwd->b[13] = msa_msubv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]); + pwd->b[14] = msa_msubv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]); + pwd->b[15] = msa_msubv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]); +} + +void helper_msa_msubv_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_msubv_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_msubv_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_msubv_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_msubv_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_msubv_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_msubv_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_msubv_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_msubv_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_msubv_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_msubv_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_msubv_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_msubv_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_msubv_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_msubv_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_msubv_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_msubv_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 * arg2; +} + +void helper_msa_mulv_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_mulv_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_mulv_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_mulv_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_mulv_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_mulv_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_mulv_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_mulv_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_mulv_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_mulv_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_mulv_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]); +} + +void helper_msa_mulv_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_mulv_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_mulv_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_mulv_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_mulv_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_mulv_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_mulv_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_mulv_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_mulv_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_mulv_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_mulv_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_mulv_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_mulv_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_mulv_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_mulv_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_mulv_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_mulv_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +/* + * Int Subtract + * ------------ + * + * +---------------+----------------------------------------------------------+ + * | ASUB_S.B | Vector Absolute Values of Signed Subtract (byte) | + * | ASUB_S.H | Vector Absolute Values of Signed Subtract (halfword) | + * | ASUB_S.W | Vector Absolute Values of Signed Subtract (word) | + * | ASUB_S.D | Vector Absolute Values of Signed Subtract (doubleword) | + * | ASUB_U.B | Vector Absolute Values of Unsigned Subtract (byte) | + * | ASUB_U.H | Vector Absolute Values of Unsigned Subtract (halfword) | + * | ASUB_U.W | Vector Absolute Values of Unsigned Subtract (word) | + * | ASUB_U.D | Vector Absolute Values of Unsigned Subtract (doubleword) | + * | HSUB_S.H | Vector Signed Horizontal Subtract (halfword) | + * | HSUB_S.W | Vector Signed Horizontal Subtract (word) | + * | HSUB_S.D | Vector Signed Horizontal Subtract (doubleword) | + * | HSUB_U.H | Vector Unigned Horizontal Subtract (halfword) | + * | HSUB_U.W | Vector Unigned Horizontal Subtract (word) | + * | HSUB_U.D | Vector Unigned Horizontal Subtract (doubleword) | + * | SUBS_S.B | Vector Signed Saturated Subtract (of Signed) (byte) | + * | SUBS_S.H | Vector Signed Saturated Subtract (of Signed) (halfword) | + * | SUBS_S.W | Vector Signed Saturated Subtract (of Signed) (word) | + * | SUBS_S.D | Vector Signed Saturated Subtract (of Signed) (doubleword)| + * | SUBS_U.B | Vector Unsigned Saturated Subtract (of Uns.) (byte) | + * | SUBS_U.H | Vector Unsigned Saturated Subtract (of Uns.) (halfword) | + * | SUBS_U.W | Vector Unsigned Saturated Subtract (of Uns.) (word) | + * | SUBS_U.D | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)| * | SUBSUS_U.B | Vector Uns. Sat. Subtract (of S. from Uns.) (byte) | * | SUBSUS_U.H | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword) | * | SUBSUS_U.W | Vector Uns. Sat. Subtract (of S. from Uns.) (word) | @@ -3045,142 +3591,531 @@ void helper_msa_asub_u_b(CPUMIPSState *env, pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]); } -void helper_msa_asub_u_h(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_asub_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_asub_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_asub_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_asub_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_asub_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_asub_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_asub_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_asub_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_asub_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_asub_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_asub_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_asub_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_asub_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_asub_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_asub_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_asub_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_asub_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df); +} + +void helper_msa_hsub_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_hsub_s_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_hsub_s_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_hsub_s_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_hsub_s_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_hsub_s_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_hsub_s_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_hsub_s_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_hsub_s_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_hsub_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_hsub_s_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_hsub_s_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_hsub_s_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_hsub_s_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_hsub_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_hsub_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_hsub_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df); +} + +void helper_msa_hsub_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_hsub_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_hsub_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_hsub_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_hsub_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_hsub_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_hsub_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_hsub_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_hsub_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_hsub_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_hsub_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_hsub_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_hsub_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_hsub_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_hsub_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_hsub_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_hsub_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + int64_t max_int = DF_MAX_INT(df); + int64_t min_int = DF_MIN_INT(df); + if (arg2 > 0) { + return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int; + } else { + return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int; + } +} + +void helper_msa_subs_s_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_subs_s_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subs_s_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subs_s_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subs_s_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subs_s_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subs_s_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subs_s_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subs_s_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subs_s_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subs_s_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]); +} + +void helper_msa_subs_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_subs_s_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subs_s_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subs_s_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subs_s_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subs_s_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subs_s_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subs_s_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subs_s_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_subs_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_subs_s_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subs_s_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subs_s_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subs_s_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_subs_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_subs_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subs_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t u_arg2 = UNSIGNED(arg2, df); + return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0; +} + +void helper_msa_subs_u_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_subs_u_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subs_u_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subs_u_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subs_u_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subs_u_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subs_u_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subs_u_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subs_u_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subs_u_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subs_u_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]); +} + +void helper_msa_subs_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_subs_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subs_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subs_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subs_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subs_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subs_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subs_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subs_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_subs_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_subs_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subs_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subs_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subs_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_subs_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_subs_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subs_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t max_uint = DF_MAX_UINT(df); + if (arg2 >= 0) { + uint64_t u_arg2 = (uint64_t)arg2; + return (u_arg1 > u_arg2) ? + (int64_t)(u_arg1 - u_arg2) : + 0; + } else { + uint64_t u_arg2 = (uint64_t)(-arg2); + return (u_arg1 < max_uint - u_arg2) ? + (int64_t)(u_arg1 + u_arg2) : + (int64_t)max_uint; + } +} + +void helper_msa_subsus_u_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_subsus_u_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subsus_u_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subsus_u_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subsus_u_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subsus_u_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subsus_u_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subsus_u_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subsus_u_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subsus_u_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subsus_u_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]); +} + +void helper_msa_subsus_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_subsus_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subsus_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subsus_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subsus_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subsus_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subsus_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subsus_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subsus_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_subsus_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_subsus_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subsus_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subsus_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subsus_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_subsus_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_subsus_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subsus_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t u_arg2 = UNSIGNED(arg2, df); + int64_t max_int = DF_MAX_INT(df); + int64_t min_int = DF_MIN_INT(df); + if (u_arg1 > u_arg2) { + return u_arg1 - u_arg2 < (uint64_t)max_int ? + (int64_t)(u_arg1 - u_arg2) : + max_int; + } else { + return u_arg2 - u_arg1 < (uint64_t)(-min_int) ? + (int64_t)(u_arg1 - u_arg2) : + min_int; + } +} + +void helper_msa_subsuu_s_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->h[0] = msa_asub_u_df(DF_HALF, pws->h[0], pwt->h[0]); - pwd->h[1] = msa_asub_u_df(DF_HALF, pws->h[1], pwt->h[1]); - pwd->h[2] = msa_asub_u_df(DF_HALF, pws->h[2], pwt->h[2]); - pwd->h[3] = msa_asub_u_df(DF_HALF, pws->h[3], pwt->h[3]); - pwd->h[4] = msa_asub_u_df(DF_HALF, pws->h[4], pwt->h[4]); - pwd->h[5] = msa_asub_u_df(DF_HALF, pws->h[5], pwt->h[5]); - pwd->h[6] = msa_asub_u_df(DF_HALF, pws->h[6], pwt->h[6]); - pwd->h[7] = msa_asub_u_df(DF_HALF, pws->h[7], pwt->h[7]); + pwd->b[0] = msa_subsuu_s_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subsuu_s_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subsuu_s_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subsuu_s_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subsuu_s_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subsuu_s_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subsuu_s_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subsuu_s_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subsuu_s_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subsuu_s_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]); } -void helper_msa_asub_u_w(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subsuu_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->w[0] = msa_asub_u_df(DF_WORD, pws->w[0], pwt->w[0]); - pwd->w[1] = msa_asub_u_df(DF_WORD, pws->w[1], pwt->w[1]); - pwd->w[2] = msa_asub_u_df(DF_WORD, pws->w[2], pwt->w[2]); - pwd->w[3] = msa_asub_u_df(DF_WORD, pws->w[3], pwt->w[3]); + pwd->h[0] = msa_subsuu_s_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subsuu_s_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subsuu_s_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subsuu_s_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subsuu_s_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subsuu_s_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subsuu_s_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subsuu_s_df(DF_HALF, pws->h[7], pwt->h[7]); } -void helper_msa_asub_u_d(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subsuu_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->d[0] = msa_asub_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); - pwd->d[1] = msa_asub_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); -} - - -/* TODO: insert the rest of Int Subtract group helpers here */ - - -static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df); + pwd->w[0] = msa_subsuu_s_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subsuu_s_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subsuu_s_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subsuu_s_df(DF_WORD, pws->w[3], pwt->w[3]); } -void helper_msa_hsub_s_h(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subsuu_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->h[0] = msa_hsub_s_df(DF_HALF, pws->h[0], pwt->h[0]); - pwd->h[1] = msa_hsub_s_df(DF_HALF, pws->h[1], pwt->h[1]); - pwd->h[2] = msa_hsub_s_df(DF_HALF, pws->h[2], pwt->h[2]); - pwd->h[3] = msa_hsub_s_df(DF_HALF, pws->h[3], pwt->h[3]); - pwd->h[4] = msa_hsub_s_df(DF_HALF, pws->h[4], pwt->h[4]); - pwd->h[5] = msa_hsub_s_df(DF_HALF, pws->h[5], pwt->h[5]); - pwd->h[6] = msa_hsub_s_df(DF_HALF, pws->h[6], pwt->h[6]); - pwd->h[7] = msa_hsub_s_df(DF_HALF, pws->h[7], pwt->h[7]); + pwd->d[0] = msa_subsuu_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subsuu_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); } -void helper_msa_hsub_s_w(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) -{ - wr_t *pwd = &(env->active_fpu.fpr[wd].wr); - wr_t *pws = &(env->active_fpu.fpr[ws].wr); - wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->w[0] = msa_hsub_s_df(DF_WORD, pws->w[0], pwt->w[0]); - pwd->w[1] = msa_hsub_s_df(DF_WORD, pws->w[1], pwt->w[1]); - pwd->w[2] = msa_hsub_s_df(DF_WORD, pws->w[2], pwt->w[2]); - pwd->w[3] = msa_hsub_s_df(DF_WORD, pws->w[3], pwt->w[3]); +static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 - arg2; } -void helper_msa_hsub_s_d(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subv_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->d[0] = msa_hsub_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); - pwd->d[1] = msa_hsub_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); -} - - -static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df); + pwd->b[0] = msa_subv_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subv_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subv_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subv_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subv_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subv_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subv_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subv_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subv_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subv_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]); } -void helper_msa_hsub_u_h(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subv_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->h[0] = msa_hsub_u_df(DF_HALF, pws->h[0], pwt->h[0]); - pwd->h[1] = msa_hsub_u_df(DF_HALF, pws->h[1], pwt->h[1]); - pwd->h[2] = msa_hsub_u_df(DF_HALF, pws->h[2], pwt->h[2]); - pwd->h[3] = msa_hsub_u_df(DF_HALF, pws->h[3], pwt->h[3]); - pwd->h[4] = msa_hsub_u_df(DF_HALF, pws->h[4], pwt->h[4]); - pwd->h[5] = msa_hsub_u_df(DF_HALF, pws->h[5], pwt->h[5]); - pwd->h[6] = msa_hsub_u_df(DF_HALF, pws->h[6], pwt->h[6]); - pwd->h[7] = msa_hsub_u_df(DF_HALF, pws->h[7], pwt->h[7]); + pwd->h[0] = msa_subv_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subv_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subv_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subv_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subv_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subv_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subv_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subv_df(DF_HALF, pws->h[7], pwt->h[7]); } -void helper_msa_hsub_u_w(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subv_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->w[0] = msa_hsub_u_df(DF_WORD, pws->w[0], pwt->w[0]); - pwd->w[1] = msa_hsub_u_df(DF_WORD, pws->w[1], pwt->w[1]); - pwd->w[2] = msa_hsub_u_df(DF_WORD, pws->w[2], pwt->w[2]); - pwd->w[3] = msa_hsub_u_df(DF_WORD, pws->w[3], pwt->w[3]); + pwd->w[0] = msa_subv_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subv_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subv_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subv_df(DF_WORD, pws->w[3], pwt->w[3]); } -void helper_msa_hsub_u_d(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subv_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->d[0] = msa_hsub_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); - pwd->d[1] = msa_hsub_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); + pwd->d[0] = msa_subv_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subv_df(DF_DOUBLE, pws->d[1], pwt->d[1]); } @@ -4408,11 +5343,6 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd, msa_move_v(pwd, pwx); } -static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - return arg1 - arg2; -} - #define MSA_BINOP_IMM_DF(helper, func) \ void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \ uint32_t wd, uint32_t ws, int32_t u5) \ @@ -4594,97 +5524,6 @@ MSA_TEROP_IMMU_DF(binsli, binsl) MSA_TEROP_IMMU_DF(binsri, binsr) #undef MSA_TEROP_IMMU_DF -static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - int64_t max_int = DF_MAX_INT(df); - int64_t min_int = DF_MIN_INT(df); - if (arg2 > 0) { - return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int; - } else { - return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int; - } -} - -static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - uint64_t u_arg1 = UNSIGNED(arg1, df); - uint64_t u_arg2 = UNSIGNED(arg2, df); - return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0; -} - -static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - uint64_t u_arg1 = UNSIGNED(arg1, df); - uint64_t max_uint = DF_MAX_UINT(df); - if (arg2 >= 0) { - uint64_t u_arg2 = (uint64_t)arg2; - return (u_arg1 > u_arg2) ? - (int64_t)(u_arg1 - u_arg2) : - 0; - } else { - uint64_t u_arg2 = (uint64_t)(-arg2); - return (u_arg1 < max_uint - u_arg2) ? - (int64_t)(u_arg1 + u_arg2) : - (int64_t)max_uint; - } -} - -static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - uint64_t u_arg1 = UNSIGNED(arg1, df); - uint64_t u_arg2 = UNSIGNED(arg2, df); - int64_t max_int = DF_MAX_INT(df); - int64_t min_int = DF_MIN_INT(df); - if (u_arg1 > u_arg2) { - return u_arg1 - u_arg2 < (uint64_t)max_int ? - (int64_t)(u_arg1 - u_arg2) : - max_int; - } else { - return u_arg2 - u_arg1 < (uint64_t)(-min_int) ? - (int64_t)(u_arg1 - u_arg2) : - min_int; - } -} - -static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - return arg1 * arg2; -} - -#define SIGNED_EXTRACT(e, o, a, df) \ - do { \ - e = SIGNED_EVEN(a, df); \ - o = SIGNED_ODD(a, df); \ - } while (0) - -#define UNSIGNED_EXTRACT(e, o, a, df) \ - do { \ - e = UNSIGNED_EVEN(a, df); \ - o = UNSIGNED_ODD(a, df); \ - } while (0) - -static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); -} - -static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); -} - #define CONCATENATE_AND_SLIDE(s, k) \ do { \ for (i = 0; i < s; i++) { \ @@ -4802,15 +5641,6 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, \ } \ } -MSA_BINOP_DF(subv) -MSA_BINOP_DF(subs_s) -MSA_BINOP_DF(subs_u) -MSA_BINOP_DF(subsus_u) -MSA_BINOP_DF(subsuu_s) -MSA_BINOP_DF(mulv) -MSA_BINOP_DF(dotp_s) -MSA_BINOP_DF(dotp_u) - MSA_BINOP_DF(mul_q) MSA_BINOP_DF(mulr_q) #undef MSA_BINOP_DF @@ -4824,66 +5654,6 @@ void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd, msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]); } -static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - return dest + arg1 * arg2; -} - -static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - return dest - arg1 * arg2; -} - -static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); -} - -static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); -} - -static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2)); -} - -static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2)); -} - static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1, int64_t arg2) { @@ -5010,12 +5780,6 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \ } \ } -MSA_TEROP_DF(maddv) -MSA_TEROP_DF(msubv) -MSA_TEROP_DF(dpadd_s) -MSA_TEROP_DF(dpadd_u) -MSA_TEROP_DF(dpsub_s) -MSA_TEROP_DF(dpsub_u) MSA_TEROP_DF(binsl) MSA_TEROP_DF(binsr) MSA_TEROP_DF(madd_q) @@ -5427,54 +6191,80 @@ static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr) #define CLEAR_IS_INEXACT 2 #define RECIPROCAL_INEXACT 4 -static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) +static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt) { - int ieee_ex; + int mips_xcpt = 0; + + if (ieee_xcpt & float_flag_invalid) { + mips_xcpt |= FP_INVALID; + } + if (ieee_xcpt & float_flag_overflow) { + mips_xcpt |= FP_OVERFLOW; + } + if (ieee_xcpt & float_flag_underflow) { + mips_xcpt |= FP_UNDERFLOW; + } + if (ieee_xcpt & float_flag_divbyzero) { + mips_xcpt |= FP_DIV0; + } + if (ieee_xcpt & float_flag_inexact) { + mips_xcpt |= FP_INEXACT; + } + + return mips_xcpt; +} - int c; +static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) +{ + int ieee_exception_flags; + int mips_exception_flags = 0; int cause; int enable; - ieee_ex = get_float_exception_flags(&env->active_tc.msa_fp_status); + ieee_exception_flags = get_float_exception_flags( + &env->active_tc.msa_fp_status); /* QEMU softfloat does not signal all underflow cases */ if (denormal) { - ieee_ex |= float_flag_underflow; + ieee_exception_flags |= float_flag_underflow; + } + if (ieee_exception_flags) { + mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags); } - - c = ieee_ex_to_mips(ieee_ex); enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED; /* Set Inexact (I) when flushing inputs to zero */ - if ((ieee_ex & float_flag_input_denormal) && + if ((ieee_exception_flags & float_flag_input_denormal) && (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) { if (action & CLEAR_IS_INEXACT) { - c &= ~FP_INEXACT; + mips_exception_flags &= ~FP_INEXACT; } else { - c |= FP_INEXACT; + mips_exception_flags |= FP_INEXACT; } } /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */ - if ((ieee_ex & float_flag_output_denormal) && + if ((ieee_exception_flags & float_flag_output_denormal) && (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) { - c |= FP_INEXACT; + mips_exception_flags |= FP_INEXACT; if (action & CLEAR_FS_UNDERFLOW) { - c &= ~FP_UNDERFLOW; + mips_exception_flags &= ~FP_UNDERFLOW; } else { - c |= FP_UNDERFLOW; + mips_exception_flags |= FP_UNDERFLOW; } } /* Set Inexact (I) when Overflow (O) is not enabled */ - if ((c & FP_OVERFLOW) != 0 && (enable & FP_OVERFLOW) == 0) { - c |= FP_INEXACT; + if ((mips_exception_flags & FP_OVERFLOW) != 0 && + (enable & FP_OVERFLOW) == 0) { + mips_exception_flags |= FP_INEXACT; } /* Clear Exact Underflow when Underflow (U) is not enabled */ - if ((c & FP_UNDERFLOW) != 0 && (enable & FP_UNDERFLOW) == 0 && - (c & FP_INEXACT) == 0) { - c &= ~FP_UNDERFLOW; + if ((mips_exception_flags & FP_UNDERFLOW) != 0 && + (enable & FP_UNDERFLOW) == 0 && + (mips_exception_flags & FP_INEXACT) == 0) { + mips_exception_flags &= ~FP_UNDERFLOW; } /* @@ -5482,11 +6272,11 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) * divide by zero */ if ((action & RECIPROCAL_INEXACT) && - (c & (FP_INVALID | FP_DIV0)) == 0) { - c = FP_INEXACT; + (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) { + mips_exception_flags = FP_INEXACT; } - cause = c & enable; /* all current enabled exceptions */ + cause = mips_exception_flags & enable; /* all current enabled exceptions */ if (cause == 0) { /* @@ -5494,7 +6284,7 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) * with all current exceptions */ SET_FP_CAUSE(env->active_tc.msacsr, - (GET_FP_CAUSE(env->active_tc.msacsr) | c)); + (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags)); } else { /* Current exceptions are enabled */ if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) { @@ -5503,11 +6293,11 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) * with all enabled exceptions */ SET_FP_CAUSE(env->active_tc.msacsr, - (GET_FP_CAUSE(env->active_tc.msacsr) | c)); + (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags)); } } - return c; + return mips_exception_flags; } static inline int get_enabled_exceptions(const CPUMIPSState *env, int c) @@ -5516,7 +6306,7 @@ static inline int get_enabled_exceptions(const CPUMIPSState *env, int c) return c & enable; } -static inline float16 float16_from_float32(int32_t a, flag ieee, +static inline float16 float16_from_float32(int32_t a, bool ieee, float_status *status) { float16 f_val; @@ -5535,7 +6325,7 @@ static inline float32 float32_from_float64(int64_t a, float_status *status) return a < 0 ? (f_val | (1 << 31)) : f_val; } -static inline float32 float32_from_float16(int16_t a, flag ieee, +static inline float32 float32_from_float16(int16_t a, bool ieee, float_status *status) { float32 f_val; @@ -6572,7 +7362,7 @@ void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd, * IEEE and "ARM" format. The latter gains extra exponent * range by omitting the NaN/Inf encodings. */ - flag ieee = 1; + bool ieee = true; MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16); MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16); @@ -7186,7 +7976,7 @@ void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd, * IEEE and "ARM" format. The latter gains extra exponent * range by omitting the NaN/Inf encodings. */ - flag ieee = 1; + bool ieee = true; MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32); } @@ -7222,7 +8012,7 @@ void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd, * IEEE and "ARM" format. The latter gains extra exponent * range by omitting the NaN/Inf encodings. */ - flag ieee = 1; + bool ieee = true; MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32); } diff --git a/qemu/target/mips/op_helper.c b/qemu/target/mips/op_helper.c index 9802b9cebd..f8119b999e 100644 --- a/qemu/target/mips/op_helper.c +++ b/qemu/target/mips/op_helper.c @@ -618,6 +618,7 @@ static inline uint64_t get_tlb_pfn_from_entrylo(uint64_t entrylo) static void r4k_fill_tlb(CPUMIPSState *env, int idx) { + struct uc_struct *uc = env->uc; r4k_tlb_t *tlb; uint64_t mask = env->CP0_PageMask >> (TARGET_PAGE_BITS + 1); @@ -682,6 +683,7 @@ void r4k_helper_tlbinvf(CPUMIPSState *env) void r4k_helper_tlbwi(CPUMIPSState *env) { + struct uc_struct *uc = env->uc; bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1); target_ulong VPN; uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask; @@ -738,6 +740,7 @@ void r4k_helper_tlbwr(CPUMIPSState *env) void r4k_helper_tlbp(CPUMIPSState *env) { + struct uc_struct *uc = env->uc; bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1); r4k_tlb_t *tlb; target_ulong mask; @@ -1241,6 +1244,7 @@ static inline void ensure_writable_pages(CPUMIPSState *env, int mmu_idx, uintptr_t retaddr) { + struct uc_struct *uc = env->uc; /* FIXME: Probe the actual accesses (pass and use a size) */ if (unlikely(MSA_PAGESPAN(addr))) { /* first page */ diff --git a/qemu/target/mips/translate.c b/qemu/target/mips/translate.c index 3fab57b251..b8c82c82d8 100644 --- a/qemu/target/mips/translate.c +++ b/qemu/target/mips/translate.c @@ -1040,7 +1040,7 @@ enum { OPC_BC2NEZ = (0x0D << 21) | OPC_CP2, }; -#define MASK_LMI(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F)) +#define MASK_LMMI(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F)) enum { OPC_PADDSH = (24 << 21) | (0x00) | OPC_CP2, @@ -3384,7 +3384,8 @@ static void gen_ld(DisasContext *ctx, uint32_t opc, TCGv t0, t1, t2; int mem_idx = ctx->mem_idx; - if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)) { + if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F | + INSN_LOONGSON3A)) { /* * Loongson CPU uses a load to zero register for prefetch. * We emulate it as a NOP. On other CPU we must perform the @@ -5520,7 +5521,7 @@ static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt) TCGv_i64 t0, t1; TCGCond cond; - opc = MASK_LMI(ctx->opcode); + opc = MASK_LMMI(ctx->opcode); switch (opc) { case OPC_ADD_CP2: case OPC_SUB_CP2: @@ -5995,6 +5996,7 @@ static void gen_trap(DisasContext *ctx, uint32_t opc, static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) { + struct uc_struct *uc = ctx->uc; if (unlikely(ctx->base.singlestep_enabled)) { return false; } @@ -27207,7 +27209,7 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_MULTU_G_2F: case OPC_MOD_G_2F: case OPC_MODU_G_2F: - check_insn(ctx, INSN_LOONGSON2F); + check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT); gen_loongson_integer(ctx, op1, rd, rs, rt); break; case OPC_CLO: @@ -27240,7 +27242,7 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_DDIVU_G_2F: case OPC_DMOD_G_2F: case OPC_DMODU_G_2F: - check_insn(ctx, INSN_LOONGSON2F); + check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT); gen_loongson_integer(ctx, op1, rd, rs, rt); break; #endif @@ -29097,6 +29099,38 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx) break; } break; + case OPC_MADDV_df: + switch (df) { + case DF_BYTE: + gen_helper_msa_maddv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_maddv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_maddv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_maddv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } + break; + case OPC_MSUBV_df: + switch (df) { + case DF_BYTE: + gen_helper_msa_msubv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_msubv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_msubv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_msubv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } + break; case OPC_ASUB_S_df: switch (df) { case DF_BYTE: @@ -29306,10 +29340,36 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_SUBS_S_df: - gen_helper_msa_subs_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subs_s_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subs_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subs_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subs_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_MULV_df: - gen_helper_msa_mulv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_mulv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_mulv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_mulv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_mulv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_SLD_df: gen_helper_msa_sld_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); @@ -29318,25 +29378,71 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx) gen_helper_msa_vshf_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); break; case OPC_SUBV_df: - gen_helper_msa_subv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_SUBS_U_df: - gen_helper_msa_subs_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); - break; - case OPC_MADDV_df: - gen_helper_msa_maddv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subs_u_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subs_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subs_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subs_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_SPLAT_df: gen_helper_msa_splat_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); break; case OPC_SUBSUS_U_df: - gen_helper_msa_subsus_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); - break; - case OPC_MSUBV_df: - gen_helper_msa_msubv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subsus_u_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subsus_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subsus_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subsus_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_SUBSUU_S_df: - gen_helper_msa_subsuu_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subsuu_s_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subsuu_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subsuu_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subsuu_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DOTP_S_df: @@ -29407,22 +29513,82 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_DOTP_S_df: - gen_helper_msa_dotp_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dotp_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dotp_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dotp_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DOTP_U_df: - gen_helper_msa_dotp_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dotp_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dotp_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dotp_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DPADD_S_df: - gen_helper_msa_dpadd_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dpadd_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dpadd_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dpadd_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DPADD_U_df: - gen_helper_msa_dpadd_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dpadd_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dpadd_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dpadd_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DPSUB_S_df: - gen_helper_msa_dpsub_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dpsub_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dpsub_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dpsub_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DPSUB_U_df: - gen_helper_msa_dpsub_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dpsub_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dpsub_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dpsub_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; } break; @@ -30683,7 +30849,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_CP2: - check_insn(ctx, INSN_LOONGSON2F); + check_insn(ctx, ASE_LMMI); /* Note that these instructions use different fields. */ gen_loongson_multimedia(ctx, sa, rd, rt); break; @@ -30849,7 +31015,8 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) CPUMIPSState *env = cs->env_ptr; // unicorn setup - ctx->uc = cs->uc; + struct uc_struct *uc = cs->uc; + ctx->uc = uc; ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK; ctx->saved_pc = -1; @@ -31238,7 +31405,9 @@ void cpu_state_reset(CPUMIPSState *env) env->CP0_Config5 = env->cpu_model->CP0_Config5; env->CP0_Config5_rw_bitmask = env->cpu_model->CP0_Config5_rw_bitmask; env->CP0_Config6 = env->cpu_model->CP0_Config6; + env->CP0_Config6_rw_bitmask = env->cpu_model->CP0_Config6_rw_bitmask; env->CP0_Config7 = env->cpu_model->CP0_Config7; + env->CP0_Config7_rw_bitmask = env->cpu_model->CP0_Config7_rw_bitmask; env->CP0_LLAddr_rw_bitmask = env->cpu_model->CP0_LLAddr_rw_bitmask << env->cpu_model->CP0_LLAddr_shift; env->CP0_LLAddr_shift = env->cpu_model->CP0_LLAddr_shift; diff --git a/qemu/target/mips/translate_init.inc.c b/qemu/target/mips/translate_init.inc.c index 3e395c7e6a..02885b5c65 100644 --- a/qemu/target/mips/translate_init.inc.c +++ b/qemu/target/mips/translate_init.inc.c @@ -366,7 +366,7 @@ const mips_def_t mips_defs[] = }, { /* FIXME: - * Config3: CMGCR, PW, VZ, CTXTC, CDMM, TL + * Config3: VZ, CTXTC, CDMM, TL * Config4: MMUExtDef * Config5: MRP * FIR(FCR0): Has2008 @@ -380,10 +380,11 @@ const mips_def_t mips_defs[] = (2 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) | (1 << CP0C1_PC) | (1 << CP0C1_FP), .CP0_Config2 = MIPS_CONFIG2, - .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP) | + .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | + (1 << CP0C3_CMGCR) | (1 << CP0C3_MSAP) | (1 << CP0C3_BP) | (1 << CP0C3_BI) | (1 << CP0C3_SC) | - (1 << CP0C3_ULRI) | (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | - (1 << CP0C3_VInt), + (1 << CP0C3_PW) | (1 << CP0C3_ULRI) | (1 << CP0C3_RXI) | + (1 << CP0C3_LPA) | (1 << CP0C3_VInt), .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (2 << CP0C4_IE) | (0x1c << CP0C4_KScrExist), .CP0_Config4_rw_bitmask = 0, @@ -801,6 +802,92 @@ const mips_def_t mips_defs[] = .insn_flags = CPU_LOONGSON2F, .mmu_type = MMU_TYPE_R4000, }, + { + .name = "Loongson-3A1000", + .CP0_PRid = 0x6305, + /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size. */ + .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) | + (MMU_TYPE_R4000 << CP0C0_MT), + .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) | + (3 << CP0C1_IS) | (4 << CP0C1_IL) | (3 << CP0C1_IA) | + (3 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) | + (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP), + .CP0_Config2 = MIPS_CONFIG2 | (7 << CP0C2_SS) | (4 << CP0C2_SL) | + (3 << CP0C2_SA), + .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_LPA), + .CP0_LLAddr_rw_bitmask = 0, + .SYNCI_Step = 32, + .CCRes = 2, + .CP0_Status_rw_bitmask = 0x74D8FFFF, + .CP0_PageGrain = (1 << CP0PG_ELPA), + .CP0_PageGrain_rw_bitmask = (1 << CP0PG_ELPA), + .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV) | (0x1 << FCR0_F64) | + (0x1 << FCR0_PS) | (0x1 << FCR0_L) | (0x1 << FCR0_W) | + (0x1 << FCR0_D) | (0x1 << FCR0_S), + .CP1_fcr31 = 0, + .CP1_fcr31_rw_bitmask = 0xFF83FFFF, + .SEGBITS = 42, + .PABITS = 48, + .insn_flags = CPU_LOONGSON3A, + .mmu_type = MMU_TYPE_R4000, + }, + { + .name = "Loongson-3A4000", + .CP0_PRid = 0x14C000, + /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size. */ + .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) | + (MMU_TYPE_R4000 << CP0C0_MT), + .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) | + (2 << CP0C1_IS) | (5 << CP0C1_IL) | (3 << CP0C1_IA) | + (2 << CP0C1_DS) | (5 << CP0C1_DL) | (3 << CP0C1_DA) | + (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP), + .CP0_Config2 = MIPS_CONFIG2 | (5 << CP0C2_SS) | (5 << CP0C2_SL) | + (15 << CP0C2_SA), + .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP) | + (1 << CP0C3_BP) | (1 << CP0C3_BI) | (1 << CP0C3_ULRI) | + (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt), + .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (2 << CP0C4_IE) | + (1 << CP0C4_AE) | (0x1c << CP0C4_KScrExist), + .CP0_Config4_rw_bitmask = 0, + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_NFExists), + .CP0_Config5_rw_bitmask = (1 << CP0C5_K) | (1 << CP0C5_CV) | + (1 << CP0C5_MSAEn) | (1 << CP0C5_UFE) | + (1 << CP0C5_FRE) | (1 << CP0C5_SBRI), + .CP0_Config6 = (1 << CP0C6_VCLRU) | (1 << CP0C6_DCLRU) | + (1 << CP0C6_SFBEN) | (1 << CP0C6_VLTINT) | + (1 << CP0C6_INSTPREF) | (1 << CP0C6_DATAPREF), + .CP0_Config6_rw_bitmask = (1 << CP0C6_BPPASS) | (0x3f << CP0C6_KPOS) | + (1 << CP0C6_KE) | (1 << CP0C6_VTLBONLY) | + (1 << CP0C6_LASX) | (1 << CP0C6_SSEN) | + (1 << CP0C6_DISDRTIME) | (1 << CP0C6_PIXNUEN) | + (1 << CP0C6_SCRAND) | (1 << CP0C6_LLEXCEN) | + (1 << CP0C6_DISVC) | (1 << CP0C6_VCLRU) | + (1 << CP0C6_DCLRU) | (1 << CP0C6_PIXUEN) | + (1 << CP0C6_DISBLKLYEN) | (1 << CP0C6_UMEMUALEN) | + (1 << CP0C6_SFBEN) | (1 << CP0C6_FLTINT) | + (1 << CP0C6_VLTINT) | (1 << CP0C6_DISBTB) | + (3 << CP0C6_STPREFCTL) | (1 << CP0C6_INSTPREF) | + (1 << CP0C6_DATAPREF), + .CP0_Config7 = 0, + .CP0_Config7_rw_bitmask = (1 << CP0C7_NAPCGEN) | (1 << CP0C7_UNIMUEN) | + (1 << CP0C7_VFPUCGEN), + .CP0_LLAddr_rw_bitmask = 1, + .SYNCI_Step = 16, + .CCRes = 2, + .CP0_Status_rw_bitmask = 0x7DDBFFFF, + .CP0_PageGrain = (1 << CP0PG_ELPA), + .CP0_PageGrain_rw_bitmask = (1U << CP0PG_RIE) | (1 << CP0PG_XIE) | + (1 << CP0PG_ELPA) | (1 << CP0PG_IEC), + .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV) | (0x1 << FCR0_F64) | + (0x1 << FCR0_PS) | (0x1 << FCR0_L) | (0x1 << FCR0_W) | + (0x1 << FCR0_D) | (0x1 << FCR0_S), + .CP1_fcr31 = 0, + .CP1_fcr31_rw_bitmask = 0xFF83FFFF, + .SEGBITS = 48, + .PABITS = 48, + .insn_flags = CPU_LOONGSON3A, + .mmu_type = MMU_TYPE_R4000, + }, { /* A generic CPU providing MIPS64 DSP R2 ASE features. FIXME: Eventually this should be replaced by a real CPU model. */ diff --git a/qemu/target/ppc/cpu.h b/qemu/target/ppc/cpu.h index 26ed16808c..f7b127c9a3 100644 --- a/qemu/target/ppc/cpu.h +++ b/qemu/target/ppc/cpu.h @@ -129,8 +129,9 @@ enum { POWERPC_EXCP_SDOOR_HV = 100, /* ISA 3.00 additions */ POWERPC_EXCP_HVIRT = 101, + POWERPC_EXCP_SYSCALL_VECTORED = 102, /* scv exception */ /* EOL */ - POWERPC_EXCP_NB = 102, + POWERPC_EXCP_NB = 103, /* QEMU exceptions: used internally during code translation */ POWERPC_EXCP_STOP = 0x200, /* stop translation */ POWERPC_EXCP_BRANCH = 0x201, /* branch instruction */ @@ -460,6 +461,9 @@ typedef struct ppc_v3_pate_t { #define DSISR_AMR 0x00200000 /* Unsupported Radix Tree Configuration */ #define DSISR_R_BADCONFIG 0x00080000 +#define DSISR_ATOMIC_RC 0x00040000 +/* Unable to translate address of (guest) pde or process/page table entry */ +#define DSISR_PRTABLE_FAULT 0x00020000 /* SRR1 error code fields */ @@ -469,9 +473,31 @@ typedef struct ppc_v3_pate_t { #define SRR1_PROTFAULT DSISR_PROTFAULT #define SRR1_IAMR DSISR_AMR +/* SRR1[42:45] wakeup fields for System Reset Interrupt */ + +#define SRR1_WAKEMASK 0x003c0000 /* reason for wakeup */ + +#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ +#define SRR1_WAKEHVI 0x00240000 /* Hypervisor Virt. Interrupt (P9) */ +#define SRR1_WAKEEE 0x00200000 /* External interrupt */ +#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ +#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell */ +#define SRR1_WAKERESET 0x00100000 /* System reset */ +#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell */ +#define SRR1_WAKESCOM 0x00080000 /* SCOM not in power-saving mode */ + +/* SRR1[46:47] power-saving exit mode */ + +#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask */ + +#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */ +#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */ +#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */ + /* Facility Status and Control (FSCR) bits */ #define FSCR_EBB (63 - 56) /* Event-Based Branch Facility */ #define FSCR_TAR (63 - 55) /* Target Address Register */ +#define FSCR_SCV (63 - 51) /* System call vectored */ /* Interrupt cause mask and position in FSCR. HFSCR has the same format */ #define FSCR_IC_MASK (0xFFULL) #define FSCR_IC_POS (63 - 7) @@ -481,6 +507,7 @@ typedef struct ppc_v3_pate_t { #define FSCR_IC_TM 5 #define FSCR_IC_EBB 7 #define FSCR_IC_TAR 8 +#define FSCR_IC_SCV 12 /* Exception state register bits definition */ #define ESR_PIL PPC_BIT(36) /* Illegal Instruction */ @@ -548,6 +575,8 @@ enum { POWERPC_FLAG_VSX = 0x00080000, /* Has Transaction Memory (ISA 2.07) */ POWERPC_FLAG_TM = 0x00100000, + /* Has SCV (ISA 3.00) */ + POWERPC_FLAG_SCV = 0x00200000, }; /*****************************************************************************/ @@ -1206,7 +1235,7 @@ void ppc_cpu_do_interrupt(CPUState *cpu); bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req); hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); -void ppc_cpu_do_system_reset(CPUState *cs, target_ulong vector); +void ppc_cpu_do_system_reset(CPUState *cs); void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector); #if 0 extern const VMStateDescription vmstate_ppc_cpu; diff --git a/qemu/target/ppc/dfp_helper.c b/qemu/target/ppc/dfp_helper.c index a025ed362e..a6a398cf5c 100644 --- a/qemu/target/ppc/dfp_helper.c +++ b/qemu/target/ppc/dfp_helper.c @@ -113,7 +113,7 @@ static void dfp_set_round_mode_from_immediate(uint8_t r, uint8_t rmc, case 3: /* use FPSCR rounding mode */ return; default: - assert(0); /* cannot get here */ + g_assert_not_reached(); /* cannot get here */ } } else { /* r == 1 */ switch (rmc & 3) { @@ -130,7 +130,7 @@ static void dfp_set_round_mode_from_immediate(uint8_t r, uint8_t rmc, rnd = DEC_ROUND_HALF_DOWN; break; default: - assert(0); /* cannot get here */ + g_assert_not_reached(); /* cannot get here */ } } decContextSetRounding(&dfp->context, rnd); diff --git a/qemu/target/ppc/excp_helper.c b/qemu/target/ppc/excp_helper.c index 298b7730a1..3cfdae9ab2 100644 --- a/qemu/target/ppc/excp_helper.c +++ b/qemu/target/ppc/excp_helper.c @@ -38,12 +38,27 @@ /* Exception processing */ static inline void dump_syscall(CPUPPCState *env) { - qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64 " r3=%016" PRIx64 + qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64 + " r3=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64 + " r6=%016" PRIx64 " r7=%016" PRIx64 " r8=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64 " r6=%016" PRIx64 " nip=" TARGET_FMT_lx "\n", ppc_dump_gpr(env, 0), ppc_dump_gpr(env, 3), ppc_dump_gpr(env, 4), ppc_dump_gpr(env, 5), - ppc_dump_gpr(env, 6), env->nip); + ppc_dump_gpr(env, 6), ppc_dump_gpr(env, 7), + ppc_dump_gpr(env, 8), env->nip); +} + +static inline void dump_syscall_vectored(CPUPPCState *env) +{ + qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64 + " r3=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64 + " r6=%016" PRIx64 " r7=%016" PRIx64 " r8=%016" PRIx64 + " nip=" TARGET_FMT_lx "\n", + ppc_dump_gpr(env, 0), ppc_dump_gpr(env, 3), + ppc_dump_gpr(env, 4), ppc_dump_gpr(env, 5), + ppc_dump_gpr(env, 6), ppc_dump_gpr(env, 7), + ppc_dump_gpr(env, 8), env->nip); } static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, @@ -53,7 +68,7 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, env->resume_as_sreset = false; /* Pretend to be returning from doze always as we don't lose state */ - *msr |= (0x1ull << (63 - 47)); + *msr |= SRR1_WS_NOLOSS; /* Machine checks are sent normally */ if (excp == POWERPC_EXCP_MCHECK) { @@ -61,25 +76,25 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, } switch (excp) { case POWERPC_EXCP_RESET: - *msr |= 0x4ull << (63 - 45); + *msr |= SRR1_WAKERESET; break; case POWERPC_EXCP_EXTERNAL: - *msr |= 0x8ull << (63 - 45); + *msr |= SRR1_WAKEEE; break; case POWERPC_EXCP_DECR: - *msr |= 0x6ull << (63 - 45); + *msr |= SRR1_WAKEDEC; break; case POWERPC_EXCP_SDOOR: - *msr |= 0x5ull << (63 - 45); + *msr |= SRR1_WAKEDBELL; break; case POWERPC_EXCP_SDOOR_HV: - *msr |= 0x3ull << (63 - 45); + *msr |= SRR1_WAKEHDBELL; break; case POWERPC_EXCP_HV_MAINT: - *msr |= 0xaull << (63 - 45); + *msr |= SRR1_WAKEHMI; break; case POWERPC_EXCP_HVIRT: - *msr |= 0x9ull << (63 - 45); + *msr |= SRR1_WAKEHVI; break; default: cpu_abort(cs, "Unsupported exception %d in Power Save mode\n", @@ -149,7 +164,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; target_ulong msr, new_msr, vector; - int srr0, srr1, asrr0, asrr1, lev, ail; + int srr0, srr1, asrr0, asrr1, lev = -1, ail; bool lpes0; qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx @@ -388,6 +403,13 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) new_msr |= (target_ulong)MSR_HVB; } break; + case POWERPC_EXCP_SYSCALL_VECTORED: /* scv exception */ + lev = env->error_code; + dump_syscall_vectored(env); + env->nip += 4; + new_msr |= env->msr & ((target_ulong)1 << MSR_EE); + new_msr |= env->msr & ((target_ulong)1 << MSR_RI); + break; case POWERPC_EXCP_FPU: /* Floating-point unavailable exception */ case POWERPC_EXCP_APU: /* Auxiliary processor unavailable */ case POWERPC_EXCP_DECR: /* Decrementer exception */ @@ -476,6 +498,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) case POWERPC_EXCP_HDECR: /* Hypervisor decrementer exception */ case POWERPC_EXCP_HDSI: /* Hypervisor data storage exception */ case POWERPC_EXCP_HISI: /* Hypervisor instruction storage exception */ + msr |= env->error_code; case POWERPC_EXCP_HDSEG: /* Hypervisor data segment exception */ case POWERPC_EXCP_HISEG: /* Hypervisor instruction segment exception */ case POWERPC_EXCP_SDOOR_HV: /* Hypervisor Doorbell interrupt */ @@ -690,12 +713,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) break; } - /* Save PC */ - env->spr[srr0] = env->nip; - - /* Save MSR */ - env->spr[srr1] = msr; - /* Sanity check */ if (!(env->msr_mask & MSR_HVB)) { if (new_msr & MSR_HVB) { @@ -708,14 +725,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } } - /* If any alternate SRR register are defined, duplicate saved values */ - if (asrr0 != -1) { - env->spr[asrr0] = env->spr[srr0]; - } - if (asrr1 != -1) { - env->spr[asrr1] = env->spr[srr1]; - } - /* * Sort out endianness of interrupt, this differs depending on the * CPU, the HV mode, etc... @@ -750,18 +759,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } #endif - /* Jump to handler */ - vector = env->excp_vectors[excp]; -#ifdef _MSC_VER - if (vector == (target_ulong)(0ULL - 1ULL)) { -#else - if (vector == (target_ulong)-1ULL) { -#endif - cpu_abort(cs, "Raised an exception without defined vector %d\n", - excp); - } - vector |= env->excp_prefix; - /* * AIL only works if there is no HV transition and we are running * with translations enabled @@ -770,10 +767,21 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) ((new_msr & MSR_HVB) && !(msr & MSR_HVB))) { ail = 0; } - /* Handle AIL */ - if (ail) { - new_msr |= (1 << MSR_IR) | (1 << MSR_DR); - vector |= ppc_excp_vector_offset(cs, ail); + + vector = env->excp_vectors[excp]; + if (vector == (target_ulong)-1ULL) { + cpu_abort(cs, "Raised an exception without defined vector %d\n", + excp); + } + + vector |= env->excp_prefix; + + /* If any alternate SRR register are defined, duplicate saved values */ + if (asrr0 != -1) { + env->spr[asrr0] = env->nip; + } + if (asrr1 != -1) { + env->spr[asrr1] = msr; } #if defined(TARGET_PPC64) @@ -793,6 +801,37 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } #endif + if (excp != POWERPC_EXCP_SYSCALL_VECTORED) { + /* Save PC */ + env->spr[srr0] = env->nip; + + /* Save MSR */ + env->spr[srr1] = msr; + + /* Handle AIL */ + if (ail) { + new_msr |= (1 << MSR_IR) | (1 << MSR_DR); + vector |= ppc_excp_vector_offset(cs, ail); + } + +#if defined(TARGET_PPC64) + } else { + /* scv AIL is a little different */ + if (ail) { + new_msr |= (1 << MSR_IR) | (1 << MSR_DR); + } + if (ail == AIL_C000_0000_0000_4000) { + vector |= 0xc000000000003000ull; + } else { + vector |= 0x0000000000017000ull; + } + vector += lev * 0x20; + + env->lr = env->nip; + env->ctr = msr; +#endif + } + powerpc_set_excp_state(cpu, vector, new_msr); } @@ -954,15 +993,12 @@ static void ppc_hw_interrupt(CPUPPCState *env) } } -void ppc_cpu_do_system_reset(CPUState *cs, target_ulong vector) +void ppc_cpu_do_system_reset(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_RESET); - if (vector != -1) { - env->nip = vector; - } } void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector) @@ -1135,6 +1171,11 @@ void helper_rfid(CPUPPCState *env) do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1]); } +void helper_rfscv(CPUPPCState *env) +{ + do_rfi(env, env->lr, env->ctr); +} + void helper_hrfid(CPUPPCState *env) { do_rfi(env, env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]); diff --git a/qemu/target/ppc/helper.h b/qemu/target/ppc/helper.h index b1c4343908..77892dc80f 100644 --- a/qemu/target/ppc/helper.h +++ b/qemu/target/ppc/helper.h @@ -18,6 +18,7 @@ DEF_HELPER_1(rfmci, void, env) #if defined(TARGET_PPC64) DEF_HELPER_2(pminsn, void, env, i32) DEF_HELPER_1(rfid, void, env) +DEF_HELPER_1(rfscv, void, env) DEF_HELPER_1(hrfid, void, env) DEF_HELPER_2(store_lpcr, void, env, tl) DEF_HELPER_2(store_pcr, void, env, tl) @@ -215,10 +216,6 @@ DEF_HELPER_3(vsubuqm, void, avr, avr, avr) DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr) DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr) DEF_HELPER_3(vsubcuq, void, avr, avr, avr) -DEF_HELPER_3(vrlb, void, avr, avr, avr) -DEF_HELPER_3(vrlh, void, avr, avr, avr) -DEF_HELPER_3(vrlw, void, avr, avr, avr) -DEF_HELPER_3(vrld, void, avr, avr, avr) DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32) DEF_HELPER_3(vextractub, void, avr, avr, i32) DEF_HELPER_3(vextractuh, void, avr, avr, i32) diff --git a/qemu/target/ppc/int_helper.c b/qemu/target/ppc/int_helper.c index c6ead3e149..57ede62f78 100644 --- a/qemu/target/ppc/int_helper.c +++ b/qemu/target/ppc/int_helper.c @@ -763,7 +763,7 @@ VCMPNE(w, u32, uint32_t, 0) \ for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ uint32_t result; \ - int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ + FloatRelation rel = float32_compare_quiet(a->f32[i], b->f32[i], \ &env->vec_status); \ if (rel == float_relation_unordered) { \ result = 0; \ @@ -796,14 +796,14 @@ static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, int all_in = 0; for (i = 0; i < ARRAY_SIZE(r->f32); i++) { - int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], + FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], &env->vec_status); if (le_rel == float_relation_unordered) { r->u32[i] = 0xc0000000; all_in = 1; } else { float32 bneg = float32_chs(b->f32[i]); - int ge_rel = float32_compare_quiet(a->f32[i], bneg, + FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, &env->vec_status); int le = le_rel != float_relation_greater; int ge = ge_rel != float_relation_less; @@ -1340,23 +1340,6 @@ VRFI(p, float_round_up) VRFI(z, float_round_to_zero) #undef VRFI -#define VROTATE(suffix, element, mask) \ - void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int shift = b->element[i] & mask; \ - r->element[i] = (a->element[i] << shift) | \ - (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ - } \ - } -VROTATE(b, u8, 0x7) -VROTATE(h, u16, 0xF) -VROTATE(w, u32, 0x1F) -VROTATE(d, u64, 0x3F) -#undef VROTATE - void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) { int i; diff --git a/qemu/target/ppc/translate.c b/qemu/target/ppc/translate.c index 15c9fde4f3..8e100300c2 100644 --- a/qemu/target/ppc/translate.c +++ b/qemu/target/ppc/translate.c @@ -170,6 +170,7 @@ struct DisasContext { bool vsx_enabled; bool spe_enabled; bool tm_enabled; + bool scv_enabled; bool gtse; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; @@ -1946,6 +1947,7 @@ static void gen_rlwimi(DisasContext *ctx) tcg_gen_deposit_tl(tcg_ctx, t_ra, t_ra, t_rs, sh, me - mb + 1); } else { target_ulong mask; + bool mask_in_32b = true; TCGv t1; #if defined(TARGET_PPC64) @@ -1954,8 +1956,13 @@ static void gen_rlwimi(DisasContext *ctx) #endif mask = MASK(mb, me); +#if defined(TARGET_PPC64) + if (mask > 0xffffffffu) { + mask_in_32b = false; + } +#endif t1 = tcg_temp_new(tcg_ctx); - if (mask <= 0xffffffffu) { + if (mask_in_32b) { TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx); tcg_gen_trunc_tl_i32(tcg_ctx, t0, t_rs); tcg_gen_rotli_i32(tcg_ctx, t0, t0, sh); @@ -1998,12 +2005,18 @@ static void gen_rlwinm(DisasContext *ctx) tcg_gen_extract_tl(tcg_ctx, t_ra, t_rs, rsh, len); } else { target_ulong mask; + bool mask_in_32b = true; #if defined(TARGET_PPC64) mb += 32; me += 32; #endif mask = MASK(mb, me); - if (mask <= 0xffffffffu) { +#if defined(TARGET_PPC64) + if (mask > 0xffffffffu) { + mask_in_32b = false; + } +#endif + if (mask_in_32b) { if (sh == 0) { tcg_gen_andi_tl(tcg_ctx, t_ra, t_rs, mask); } else { @@ -2039,6 +2052,7 @@ static void gen_rlwnm(DisasContext *ctx) uint32_t mb = MB(ctx->opcode); uint32_t me = ME(ctx->opcode); target_ulong mask; + bool mask_in_32b = true; #if defined(TARGET_PPC64) mb += 32; @@ -2046,7 +2060,12 @@ static void gen_rlwnm(DisasContext *ctx) #endif mask = MASK(mb, me); - if (mask <= 0xffffffffu) { +#if defined(TARGET_PPC64) + if (mask > 0xffffffffu) { + mask_in_32b = false; + } +#endif + if (mask_in_32b) { TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx); TCGv_i32 t1 = tcg_temp_new_i32(tcg_ctx); tcg_gen_trunc_tl_i32(tcg_ctx, t0, t_rb); @@ -4112,6 +4131,18 @@ static void gen_rfid(DisasContext *ctx) gen_sync_exception(ctx); } +static void gen_rfscv(DisasContext *ctx) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + CHK_SV; + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(tcg_ctx); + } + gen_update_cfar(ctx, ctx->base.pc_next - 4); + gen_helper_rfscv(tcg_ctx, tcg_ctx->cpu_env); + gen_sync_exception(ctx); +} + static void gen_hrfid(DisasContext *ctx) { TCGContext *tcg_ctx = ctx->uc->tcg_ctx; @@ -4124,6 +4155,7 @@ static void gen_hrfid(DisasContext *ctx) /* sc */ #define POWERPC_SYSCALL POWERPC_EXCP_SYSCALL +#define POWERPC_SYSCALL_VECTORED POWERPC_EXCP_SYSCALL_VECTORED static void gen_sc(DisasContext *ctx) { uint32_t lev; @@ -4132,6 +4164,21 @@ static void gen_sc(DisasContext *ctx) gen_exception_err(ctx, POWERPC_SYSCALL, lev); } +#if defined(TARGET_PPC64) +static void gen_scv(DisasContext *ctx) +{ + uint32_t lev; + + if (unlikely(!ctx->scv_enabled)) { + gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_SCV); + return; + } + + lev = (ctx->opcode >> 5) & 0x7F; + gen_exception_err(ctx, POWERPC_SYSCALL_VECTORED, lev); +} +#endif + /*** Trap ***/ /* Check for unconditional traps (always or never) */ @@ -6988,6 +7035,10 @@ GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER), GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW), #if defined(TARGET_PPC64) GEN_HANDLER(rfid, 0x13, 0x12, 0x00, 0x03FF8001, PPC_64B), +/* Top bit of opc2 corresponds with low bit of LEV, so use two handlers */ +GEN_HANDLER_E(scv, 0x11, 0x10, 0xFF, 0x03FFF01E, PPC_NONE, PPC2_ISA300), +GEN_HANDLER_E(scv, 0x11, 0x00, 0xFF, 0x03FFF01E, PPC_NONE, PPC2_ISA300), +GEN_HANDLER_E(rfscv, 0x13, 0x12, 0x02, 0x03FF8001, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(stop, 0x13, 0x12, 0x0b, 0x03FFF801, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(doze, 0x13, 0x12, 0x0c, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206), GEN_HANDLER_E(nap, 0x13, 0x12, 0x0d, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206), @@ -6995,7 +7046,9 @@ GEN_HANDLER_E(sleep, 0x13, 0x12, 0x0e, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206), GEN_HANDLER_E(rvwinkle, 0x13, 0x12, 0x0f, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206), GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H), #endif -GEN_HANDLER(sc, 0x11, 0xFF, 0xFF, 0x03FFF01D, PPC_FLOW), +/* Top bit of opc2 corresponds with low bit of LEV, so use two handlers */ +GEN_HANDLER(sc, 0x11, 0x11, 0xFF, 0x03FFF01D, PPC_FLOW), +GEN_HANDLER(sc, 0x11, 0x01, 0xFF, 0x03FFF01D, PPC_FLOW), GEN_HANDLER(tw, 0x1F, 0x04, 0x00, 0x00000001, PPC_FLOW), GEN_HANDLER(twi, 0x03, 0xFF, 0xFF, 0x00000000, PPC_FLOW), #if defined(TARGET_PPC64) @@ -7541,6 +7594,12 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) } else { ctx->vsx_enabled = false; } + if ((env->flags & POWERPC_FLAG_SCV) + && (env->spr[SPR_FSCR] & (1ull << FSCR_SCV))) { + ctx->scv_enabled = true; + } else { + ctx->scv_enabled = false; + } #if defined(TARGET_PPC64) if ((env->flags & POWERPC_FLAG_TM) && msr_tm) { ctx->tm_enabled = !!msr_tm; diff --git a/qemu/target/ppc/translate/fp-impl.inc.c b/qemu/target/ppc/translate/fp-impl.inc.c index 58155f21eb..00a9d42dd8 100644 --- a/qemu/target/ppc/translate/fp-impl.inc.c +++ b/qemu/target/ppc/translate/fp-impl.inc.c @@ -33,170 +33,170 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx) #endif /*** Floating-Point arithmetic ***/ -#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - TCGv_i64 t3; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - t2 = tcg_temp_new_i64(tcg_ctx); \ - t3 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ - get_fpr(tcg_ctx, t1, rC(ctx->opcode)); \ - get_fpr(tcg_ctx, t2, rB(ctx->opcode)); \ - gen_helper_f##op(tcg_ctx, t3, tcg_ctx->cpu_env, t0, t1, t2); \ - if (isfloat) { \ - gen_helper_frsp(tcg_ctx, t3, tcg_ctx->cpu_env, t3); \ - } \ - set_fpr(tcg_ctx, rD(ctx->opcode), t3); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t3); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ - tcg_temp_free_i64(tcg_ctx, t2); \ - tcg_temp_free_i64(tcg_ctx, t3); \ -} +#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + TCGv_i64 t2; \ + TCGv_i64 t3; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + t2 = tcg_temp_new_i64(tcg_ctx); \ + t3 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ + get_fpr(tcg_ctx, t1, rC(ctx->opcode)); \ + get_fpr(tcg_ctx, t2, rB(ctx->opcode)); \ + gen_helper_f##op(tcg_ctx, t3, tcg_ctx->cpu_env, t0, t1, t2); \ + if (isfloat) { \ + gen_helper_frsp(tcg_ctx, t3, tcg_ctx->cpu_env, t3); \ + } \ + set_fpr(tcg_ctx, rD(ctx->opcode), t3); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t3); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + tcg_temp_free_i64(tcg_ctx, t2); \ + tcg_temp_free_i64(tcg_ctx, t3); \ + } -#define GEN_FLOAT_ACB(name, op2, set_fprf, type) \ -_GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type); \ -_GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type); - -#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - t2 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ - get_fpr(tcg_ctx, t1, rB(ctx->opcode)); \ - gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1); \ - if (isfloat) { \ - gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2); \ - } \ - set_fpr(tcg_ctx, rD(ctx->opcode), t2); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t2); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ - tcg_temp_free_i64(tcg_ctx, t2); \ -} -#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type) \ -_GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ -_GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type); - -#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - t2 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ - get_fpr(tcg_ctx, t1, rC(ctx->opcode)); \ - gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1); \ - if (isfloat) { \ - gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2); \ - } \ - set_fpr(tcg_ctx, rD(ctx->opcode), t2); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t2); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ - tcg_temp_free_i64(tcg_ctx, t2); \ -} -#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type) \ -_GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ -_GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type); - -#define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rB(ctx->opcode)); \ - gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t1); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t1); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ -} +#define GEN_FLOAT_ACB(name, op2, set_fprf, type) \ + _GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type); \ + _GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type); + +#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + TCGv_i64 t2; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + t2 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ + get_fpr(tcg_ctx, t1, rB(ctx->opcode)); \ + gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1); \ + if (isfloat) { \ + gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2); \ + } \ + set_fpr(tcg_ctx, rD(ctx->opcode), t2); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t2); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + tcg_temp_free_i64(tcg_ctx, t2); \ + } +#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type) \ + _GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ + _GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type); + +#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + TCGv_i64 t2; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + t2 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ + get_fpr(tcg_ctx, t1, rC(ctx->opcode)); \ + gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1); \ + if (isfloat) { \ + gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2); \ + } \ + set_fpr(tcg_ctx, rD(ctx->opcode), t2); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t2); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + tcg_temp_free_i64(tcg_ctx, t2); \ + } +#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type) \ + _GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ + _GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type); + +#define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rB(ctx->opcode)); \ + gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t1); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t1); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + } -#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rB(ctx->opcode)); \ - gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t1); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t1); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ -} +#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rB(ctx->opcode)); \ + gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t1); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t1); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + } /* fadd - fadds */ GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT); @@ -217,7 +217,7 @@ GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE); /* frsqrtes */ static void gen_frsqrtes(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -248,7 +248,7 @@ GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT); /* fsqrt */ static void gen_fsqrt(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -271,7 +271,7 @@ static void gen_fsqrt(DisasContext *ctx) static void gen_fsqrts(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -342,7 +342,7 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT); static void gen_ftdiv(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -360,7 +360,7 @@ static void gen_ftdiv(DisasContext *ctx) static void gen_ftsqrt(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; if (unlikely(!ctx->fpu_enabled)) { gen_exception(ctx, POWERPC_EXCP_FPU); @@ -372,14 +372,12 @@ static void gen_ftsqrt(DisasContext *ctx) tcg_temp_free_i64(tcg_ctx, t0); } - - /*** Floating-Point compare ***/ /* fcmpo */ static void gen_fcmpo(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i32 crf; TCGv_i64 t0; TCGv_i64 t1; @@ -403,7 +401,7 @@ static void gen_fcmpo(DisasContext *ctx) /* fcmpu */ static void gen_fcmpu(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i32 crf; TCGv_i64 t0; TCGv_i64 t1; @@ -429,7 +427,7 @@ static void gen_fcmpu(DisasContext *ctx) /* XXX: beware that fabs never checks for NaNs nor update FPSCR */ static void gen_fabs(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -452,7 +450,7 @@ static void gen_fabs(DisasContext *ctx) /* XXX: beware that fmr never checks for NaNs nor update FPSCR */ static void gen_fmr(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; if (unlikely(!ctx->fpu_enabled)) { gen_exception(ctx, POWERPC_EXCP_FPU); @@ -471,7 +469,7 @@ static void gen_fmr(DisasContext *ctx) /* XXX: beware that fnabs never checks for NaNs nor update FPSCR */ static void gen_fnabs(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -494,7 +492,7 @@ static void gen_fnabs(DisasContext *ctx) /* XXX: beware that fneg never checks for NaNs nor update FPSCR */ static void gen_fneg(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -517,7 +515,7 @@ static void gen_fneg(DisasContext *ctx) /* XXX: beware that fcpsgn never checks for NaNs nor update FPSCR */ static void gen_fcpsgn(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; TCGv_i64 t2; @@ -542,7 +540,7 @@ static void gen_fcpsgn(DisasContext *ctx) static void gen_fmrgew(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 b0; TCGv_i64 t0; TCGv_i64 t1; @@ -565,7 +563,7 @@ static void gen_fmrgew(DisasContext *ctx) static void gen_fmrgow(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; TCGv_i64 t2; @@ -607,8 +605,8 @@ static void gen_mcrfs(DisasContext *ctx) shift = 4 * nibble; tcg_gen_shri_tl(tcg_ctx, tmp, cpu_fpscr, shift); tcg_gen_trunc_tl_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)], tmp); - tcg_gen_andi_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], - 0xf); + tcg_gen_andi_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)], + cpu_crf[crfD(ctx->opcode)], 0xf); tcg_temp_free(tcg_ctx, tmp); tcg_gen_extu_tl_i64(tcg_ctx, tnew_fpscr, cpu_fpscr); /* Only the exception bits (including FX) should be cleared if read */ @@ -836,7 +834,8 @@ static void gen_mtfsf(DisasContext *ctx) } gen_reset_fpstatus(tcg_ctx); if (l) { - t0 = tcg_const_i32(tcg_ctx, (ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff); + t0 = tcg_const_i32(tcg_ctx, + (ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff); } else { t0 = tcg_const_i32(tcg_ctx, flm << (w * 8)); } @@ -887,101 +886,101 @@ static void gen_mtfsfi(DisasContext *ctx) } /*** Floating-point load ***/ -#define GEN_LDF(name, ldop, opc, type) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_imm_index(ctx, EA, 0); \ - gen_qemu_##ldop(ctx, t0, EA); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_LDF(name, ldop, opc, type) \ + static void glue(gen_, name)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_imm_index(ctx, EA, 0); \ + gen_qemu_##ldop(ctx, t0, EA); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_LDUF(name, ldop, opc, type) \ -static void glue(gen_, name##u)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - if (unlikely(rA(ctx->opcode) == 0)) { \ - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_imm_index(ctx, EA, 0); \ - gen_qemu_##ldop(ctx, t0, EA); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ - tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_LDUF(name, ldop, opc, type) \ + static void glue(gen_, name##u)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + if (unlikely(rA(ctx->opcode) == 0)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_imm_index(ctx, EA, 0); \ + gen_qemu_##ldop(ctx, t0, EA); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_LDUXF(name, ldop, opc, type) \ -static void glue(gen_, name##ux)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - if (unlikely(rA(ctx->opcode) == 0)) { \ - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - gen_addr_reg_index(ctx, EA); \ - gen_qemu_##ldop(ctx, t0, EA); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ - tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_LDUXF(name, ldop, opc, type) \ + static void glue(gen_, name##ux)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + if (unlikely(rA(ctx->opcode) == 0)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + gen_addr_reg_index(ctx, EA); \ + gen_qemu_##ldop(ctx, t0, EA); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_LDXF(name, ldop, opc2, opc3, type) \ -static void glue(gen_, name##x)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_reg_index(ctx, EA); \ - gen_qemu_##ldop(ctx, t0, EA); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_LDXF(name, ldop, opc2, opc3, type) \ + static void glue(gen_, name##x)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_reg_index(ctx, EA); \ + gen_qemu_##ldop(ctx, t0, EA); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_LDFS(name, ldop, op, type) \ -GEN_LDF(name, ldop, op | 0x20, type); \ -GEN_LDUF(name, ldop, op | 0x21, type); \ -GEN_LDUXF(name, ldop, op | 0x01, type); \ -GEN_LDXF(name, ldop, 0x17, op | 0x00, type) +#define GEN_LDFS(name, ldop, op, type) \ + GEN_LDF(name, ldop, op | 0x20, type); \ + GEN_LDUF(name, ldop, op | 0x21, type); \ + GEN_LDUXF(name, ldop, op | 0x01, type); \ + GEN_LDXF(name, ldop, 0x17, op | 0x00, type) static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr) { @@ -992,9 +991,9 @@ static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr) tcg_temp_free_i32(tcg_ctx, tmp); } - /* lfd lfdu lfdux lfdx */ +/* lfd lfdu lfdux lfdx */ GEN_LDFS(lfd, ld64_i64, 0x12, PPC_FLOAT); - /* lfs lfsu lfsux lfsx */ +/* lfs lfsu lfsux lfsx */ GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT); /* lfdepx (external PID lfdx) */ @@ -1132,101 +1131,101 @@ static void gen_lfiwzx(DisasContext *ctx) tcg_temp_free_i64(tcg_ctx, t0); } /*** Floating-point store ***/ -#define GEN_STF(name, stop, opc, type) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_imm_index(ctx, EA, 0); \ - get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ - gen_qemu_##stop(ctx, t0, EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_STF(name, stop, opc, type) \ + static void glue(gen_, name)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_imm_index(ctx, EA, 0); \ + get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ + gen_qemu_##stop(ctx, t0, EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_STUF(name, stop, opc, type) \ -static void glue(gen_, name##u)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - if (unlikely(rA(ctx->opcode) == 0)) { \ - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_imm_index(ctx, EA, 0); \ - get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ - gen_qemu_##stop(ctx, t0, EA); \ - tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_STUF(name, stop, opc, type) \ + static void glue(gen_, name##u)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + if (unlikely(rA(ctx->opcode) == 0)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_imm_index(ctx, EA, 0); \ + get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ + gen_qemu_##stop(ctx, t0, EA); \ + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_STUXF(name, stop, opc, type) \ -static void glue(gen_, name##ux)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - if (unlikely(rA(ctx->opcode) == 0)) { \ - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_reg_index(ctx, EA); \ - get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ - gen_qemu_##stop(ctx, t0, EA); \ - tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_STUXF(name, stop, opc, type) \ + static void glue(gen_, name##ux)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + if (unlikely(rA(ctx->opcode) == 0)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_reg_index(ctx, EA); \ + get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ + gen_qemu_##stop(ctx, t0, EA); \ + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_STXF(name, stop, opc2, opc3, type) \ -static void glue(gen_, name##x)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_reg_index(ctx, EA); \ - get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ - gen_qemu_##stop(ctx, t0, EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_STXF(name, stop, opc2, opc3, type) \ + static void glue(gen_, name##x)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_reg_index(ctx, EA); \ + get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ + gen_qemu_##stop(ctx, t0, EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_STFS(name, stop, op, type) \ -GEN_STF(name, stop, op | 0x20, type); \ -GEN_STUF(name, stop, op | 0x21, type); \ -GEN_STUXF(name, stop, op | 0x01, type); \ -GEN_STXF(name, stop, 0x17, op | 0x00, type) +#define GEN_STFS(name, stop, op, type) \ + GEN_STF(name, stop, op | 0x20, type); \ + GEN_STUF(name, stop, op | 0x21, type); \ + GEN_STUXF(name, stop, op | 0x01, type); \ + GEN_STXF(name, stop, 0x17, op | 0x00, type) static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr) { @@ -1338,8 +1337,7 @@ static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) { TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv t0 = tcg_temp_new(tcg_ctx); - tcg_gen_trunc_i64_tl(tcg_ctx, t0, arg1), - gen_qemu_st32(ctx, t0, arg2); + tcg_gen_trunc_i64_tl(tcg_ctx, t0, arg1), gen_qemu_st32(ctx, t0, arg2); tcg_temp_free(tcg_ctx, t0); } /* stfiwx */ diff --git a/qemu/target/ppc/translate/vmx-impl.inc.c b/qemu/target/ppc/translate/vmx-impl.inc.c index 9d4211dd6e..16df4ae63b 100644 --- a/qemu/target/ppc/translate/vmx-impl.inc.c +++ b/qemu/target/ppc/translate/vmx-impl.inc.c @@ -922,13 +922,13 @@ GEN_VXFORM3(vsubeuqm, 31, 0); GEN_VXFORM3(vsubecuq, 31, 0); GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \ vsubecuq, PPC_NONE, PPC2_ALTIVEC_207) -GEN_VXFORM(vrlb, 2, 0); -GEN_VXFORM(vrlh, 2, 1); -GEN_VXFORM(vrlw, 2, 2); +GEN_VXFORM_V(vrlb, MO_8, tcg_gen_gvec_rotlv, 2, 0); +GEN_VXFORM_V(vrlh, MO_16, tcg_gen_gvec_rotlv, 2, 1); +GEN_VXFORM_V(vrlw, MO_32, tcg_gen_gvec_rotlv, 2, 2); GEN_VXFORM(vrlwmi, 2, 2); GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \ vrlwmi, PPC_NONE, PPC2_ISA300) -GEN_VXFORM(vrld, 2, 3); +GEN_VXFORM_V(vrld, MO_64, tcg_gen_gvec_rotlv, 2, 3); GEN_VXFORM(vrldmi, 2, 3); GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \ vrldmi, PPC_NONE, PPC2_ISA300) @@ -1058,22 +1058,25 @@ GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \ GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \ vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207) -#define GEN_VXFORM_DUPI(name, tcg_op, opc2, opc3) \ -static void glue(gen_, name)(DisasContext *ctx) \ - { \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - int simm; \ - if (unlikely(!ctx->altivec_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VPU); \ - return; \ - } \ - simm = SIMM5(ctx->opcode); \ - tcg_op(tcg_ctx, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);\ +static void gen_vsplti(DisasContext *ctx, int vece) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + int simm; + + if (unlikely(!ctx->altivec_enabled)) { + gen_exception(ctx, POWERPC_EXCP_VPU); + return; } + simm = SIMM5(ctx->opcode); + tcg_gen_gvec_dup_imm(tcg_ctx, vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm); +} + +#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \ +static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); } -GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12); -GEN_VXFORM_DUPI(vspltish, tcg_gen_gvec_dup16i, 6, 13); -GEN_VXFORM_DUPI(vspltisw, tcg_gen_gvec_dup32i, 6, 14); +GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12); +GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13); +GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14); #define GEN_VXFORM_NOA(name, opc2, opc3) \ static void glue(gen_, name)(DisasContext *ctx) \ @@ -1598,7 +1601,7 @@ GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE, #undef GEN_VXRFORM_DUAL #undef GEN_VXRFORM1 #undef GEN_VXRFORM -#undef GEN_VXFORM_DUPI +#undef GEN_VXFORM_VSPLTI #undef GEN_VXFORM_NOA #undef GEN_VXFORM_UIMM #undef GEN_VAFORM_PAIRED diff --git a/qemu/target/ppc/translate/vsx-impl.inc.c b/qemu/target/ppc/translate/vsx-impl.inc.c index 679da14902..a0c3832842 100644 --- a/qemu/target/ppc/translate/vsx-impl.inc.c +++ b/qemu/target/ppc/translate/vsx-impl.inc.c @@ -1629,7 +1629,7 @@ static void gen_xxspltib(DisasContext *ctx) return; } } - tcg_gen_gvec_dup8i(tcg_ctx, vsr_full_offset(rt), 16, 16, uim8); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_8, vsr_full_offset(rt), 16, 16, uim8); } static void gen_xxsldwi(DisasContext *ctx) diff --git a/qemu/target/ppc/translate_init.inc.c b/qemu/target/ppc/translate_init.inc.c index d2fb1974ad..6ded89c817 100644 --- a/qemu/target/ppc/translate_init.inc.c +++ b/qemu/target/ppc/translate_init.inc.c @@ -3377,6 +3377,7 @@ static void init_excp_POWER9(CPUPPCState *env) init_excp_POWER8(env); env->excp_vectors[POWERPC_EXCP_HVIRT] = 0x00000EA0; + env->excp_vectors[POWERPC_EXCP_SYSCALL_VECTORED] = 0x00000000; } static void init_excp_POWER10(CPUPPCState *env) @@ -5145,7 +5146,7 @@ POWERPC_FAMILY(e5500)(CPUClass *oc, void *data) PPC_FLOAT_STFIWX | PPC_WAIT | PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC | PPC_64B | PPC_POPCNTB | PPC_POPCNTWD; - pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | \ + pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | PPC2_FP_CVT_S64; pcc->msr_mask = (1ull << MSR_CM) | (1ull << MSR_GS) | @@ -5191,7 +5192,7 @@ POWERPC_FAMILY(e6500)(CPUClass *oc, void *data) PPC_FLOAT_STFIWX | PPC_WAIT | PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC | PPC_64B | PPC_POPCNTB | PPC_POPCNTWD | PPC_ALTIVEC; - pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | \ + pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | PPC2_FP_CVT_S64 | PPC2_ATOMIC_ISA206; pcc->msr_mask = (1ull << MSR_CM) | (1ull << MSR_GS) | @@ -8845,7 +8846,7 @@ POWERPC_FAMILY(POWER9)(CPUClass *oc, void *data) pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE | POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR | - POWERPC_FLAG_VSX | POWERPC_FLAG_TM; + POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV; pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; @@ -8898,11 +8899,6 @@ static void init_proc_POWER10(CPUPPCState *env) gen_spr_power8_rpr(env); gen_spr_power9_mmu(env); - /* POWER9 Specific registers */ - spr_register_kvm(env, SPR_TIDR, "TIDR", NULL, NULL, - spr_read_generic, spr_write_generic, - KVM_REG_PPC_TIDR, 0); - /* FIXME: Filter fields properly based on privilege level */ spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL, spr_read_generic, spr_write_generic, @@ -9680,7 +9676,7 @@ static int gdb_get_float_reg(CPUPPCState *env, GByteArray *buf, int n) { uint8_t *mem_buf; if (n < 32) { - gdb_get_reg64(buf, *cpu_fpr_ptr(env, n)); + gdb_get_float64(buf, *cpu_fpr_ptr(env, n)); mem_buf = gdb_get_reg_ptr(buf, 8); ppc_maybe_bswap_register(env, mem_buf, 8); return 8; diff --git a/qemu/target/riscv/cpu.c b/qemu/target/riscv/cpu.c index 2313cfc6cc..9979df4979 100644 --- a/qemu/target/riscv/cpu.c +++ b/qemu/target/riscv/cpu.c @@ -30,22 +30,20 @@ // static const char riscv_exts[26] = "IEMAFDQCLBJTPVNSUHKORWXYZG"; -const char * const riscv_int_regnames[] = { - "x0/zero", "x1/ra", "x2/sp", "x3/gp", "x4/tp", "x5/t0", "x6/t1", - "x7/t2", "x8/s0", "x9/s1", "x10/a0", "x11/a1", "x12/a2", "x13/a3", - "x14/a4", "x15/a5", "x16/a6", "x17/a7", "x18/s2", "x19/s3", "x20/s4", - "x21/s5", "x22/s6", "x23/s7", "x24/s8", "x25/s9", "x26/s10", "x27/s11", - "x28/t3", "x29/t4", "x30/t5", "x31/t6" -}; - -const char * const riscv_fpr_regnames[] = { - "f0/ft0", "f1/ft1", "f2/ft2", "f3/ft3", "f4/ft4", "f5/ft5", - "f6/ft6", "f7/ft7", "f8/fs0", "f9/fs1", "f10/fa0", "f11/fa1", - "f12/fa2", "f13/fa3", "f14/fa4", "f15/fa5", "f16/fa6", "f17/fa7", - "f18/fs2", "f19/fs3", "f20/fs4", "f21/fs5", "f22/fs6", "f23/fs7", - "f24/fs8", "f25/fs9", "f26/fs10", "f27/fs11", "f28/ft8", "f29/ft9", - "f30/ft10", "f31/ft11" -}; +const char *const riscv_int_regnames[] = { + "x0/zero", "x1/ra", "x2/sp", "x3/gp", "x4/tp", "x5/t0", "x6/t1", + "x7/t2", "x8/s0", "x9/s1", "x10/a0", "x11/a1", "x12/a2", "x13/a3", + "x14/a4", "x15/a5", "x16/a6", "x17/a7", "x18/s2", "x19/s3", "x20/s4", + "x21/s5", "x22/s6", "x23/s7", "x24/s8", "x25/s9", "x26/s10", "x27/s11", + "x28/t3", "x29/t4", "x30/t5", "x31/t6"}; + +const char *const riscv_fpr_regnames[] = { + "f0/ft0", "f1/ft1", "f2/ft2", "f3/ft3", "f4/ft4", "f5/ft5", + "f6/ft6", "f7/ft7", "f8/fs0", "f9/fs1", "f10/fa0", "f11/fa1", + "f12/fa2", "f13/fa3", "f14/fa4", "f15/fa5", "f16/fa6", "f17/fa7", + "f18/fs2", "f19/fs3", "f20/fs4", "f21/fs5", "f22/fs6", "f23/fs7", + "f24/fs8", "f25/fs9", "f26/fs10", "f27/fs11", "f28/ft8", "f29/ft9", + "f30/ft10", "f31/ft11"}; static void set_misa(CPURISCVState *env, target_ulong misa) { @@ -57,6 +55,11 @@ static void set_priv_version(CPURISCVState *env, int priv_ver) env->priv_ver = priv_ver; } +static void set_vext_version(CPURISCVState *env, int vext_ver) +{ + env->vext_ver = vext_ver; +} + static void set_feature(CPURISCVState *env, int feature) { env->features |= (1ULL << feature); @@ -75,65 +78,48 @@ static void riscv_any_cpu_init(CPUState *obj) set_resetvec(env, DEFAULT_RSTVEC); } -#if defined(TARGET_RISCV32) -// rv32 -static void riscv_base32_cpu_init(CPUState *obj) +static void riscv_base_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, 0); + set_resetvec(env, DEFAULT_RSTVEC); } -// sifive-u34 -static void rv32gcsu_priv1_10_0_cpu_init(CPUState *obj) +static void rvxx_sifive_u_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; - set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); + set_misa(env, RVXLEN | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - set_resetvec(env, DEFAULT_RSTVEC); - set_feature(env, RISCV_FEATURE_MMU); - set_feature(env, RISCV_FEATURE_PMP); + set_resetvec(env, 0x1004); } -// sifive-e31 -static void rv32imacu_nommu_cpu_init(CPUState *obj) +static void rvxx_sifive_e_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; - set_misa(env, RV32 | RVI | RVM | RVA | RVC | RVU); + set_misa(env, RVXLEN | RVI | RVM | RVA | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - set_resetvec(env, DEFAULT_RSTVEC); - set_feature(env, RISCV_FEATURE_PMP); + set_resetvec(env, 0x1004); } -#elif defined(TARGET_RISCV64) -// rv64 -static void riscv_base64_cpu_init(CPUState *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - /* We set this in the realise function */ - set_misa(env, 0); -} +#if defined(TARGET_RISCV32) -// sifive-u54 -static void rv64gcsu_priv1_10_0_cpu_init(CPUState *obj) +static void rv32_ibex_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; - set_misa(env, RV64 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); + set_misa(env, RV32 | RVI | RVM | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - set_resetvec(env, DEFAULT_RSTVEC); - set_feature(env, RISCV_FEATURE_MMU); - set_feature(env, RISCV_FEATURE_PMP); + set_resetvec(env, 0x8090); } -// sifive-e51 -static void rv64imacu_nommu_cpu_init(CPUState *obj) +static void rv32_imafcu_nommu_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; - set_misa(env, RV64 | RVI | RVM | RVA | RVC | RVU); + set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); set_resetvec(env, DEFAULT_RSTVEC); - set_feature(env, RISCV_FEATURE_PMP); } + #endif static void riscv_cpu_set_pc(CPUState *cs, vaddr value) @@ -192,6 +178,7 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) RISCVCPU *cpu = RISCV_CPU(dev); CPURISCVState *env = &cpu->env; int priv_version = PRIV_VERSION_1_11_0; + int vext_version = VEXT_VERSION_0_07_1; target_ulong target_misa = 0; cpu_exec_realizefn(cs); @@ -201,16 +188,15 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) priv_version = PRIV_VERSION_1_11_0; } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.10.0")) { priv_version = PRIV_VERSION_1_10_0; - } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.9.1")) { - priv_version = PRIV_VERSION_1_09_1; } else { - // error_setg(errp, "Unsupported privilege spec version '%s'", cpu->cfg.priv_spec); + // error_setg(errp, "Unsupported privilege spec version '%s'", + // cpu->cfg.priv_spec); return; } } set_priv_version(env, priv_version); - set_resetvec(env, DEFAULT_RSTVEC); + set_vext_version(env, vext_version); if (cpu->cfg.mmu) { set_feature(env, RISCV_FEATURE_MMU); @@ -224,7 +210,7 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) if (!env->misa) { /* Do some ISA extension error checking */ if (cpu->cfg.ext_i && cpu->cfg.ext_e) { - //error_setg(errp, "I and E extensions are incompatible"); + // error_setg(errp, "I and E extensions are incompatible"); return; } @@ -233,8 +219,9 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) return; } - if (cpu->cfg.ext_g && !(cpu->cfg.ext_i & cpu->cfg.ext_m & - cpu->cfg.ext_a & cpu->cfg.ext_f & cpu->cfg.ext_d)) { + if (cpu->cfg.ext_g && + !(cpu->cfg.ext_i & cpu->cfg.ext_m & cpu->cfg.ext_a & + cpu->cfg.ext_f & cpu->cfg.ext_d)) { // warn_report("Setting G will also set IMAFD"); cpu->cfg.ext_i = true; cpu->cfg.ext_m = true; @@ -274,6 +261,45 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) if (cpu->cfg.ext_h) { target_misa |= RVH; } + if (cpu->cfg.ext_v) { + target_misa |= RVV; + if (!is_power_of_2(cpu->cfg.vlen)) { + // error_setg(errp, + // "Vector extension VLEN must be power of 2"); + return; + } + if (cpu->cfg.vlen > RV_VLEN_MAX || cpu->cfg.vlen < 128) { + // error_setg(errp, + // "Vector extension implementation only supports VLEN " + // "in the range [128, %d]", RV_VLEN_MAX); + return; + } + if (!is_power_of_2(cpu->cfg.elen)) { + // error_setg(errp, + // "Vector extension ELEN must be power of 2"); + return; + } + if (cpu->cfg.elen > 64 || cpu->cfg.vlen < 8) { + // error_setg(errp, + // "Vector extension implementation only supports ELEN " + // "in the range [8, 64]"); + return; + } + if (cpu->cfg.vext_spec) { + if (!g_strcmp0(cpu->cfg.vext_spec, "v0.7.1")) { + vext_version = VEXT_VERSION_0_07_1; + } else { + // error_setg(errp, + // "Unsupported vector spec version '%s'", + // cpu->cfg.vext_spec); + return; + } + } else { + // qemu_log("vector verison is not specified, " + // "use the default value v0.7.1\n"); + } + set_vext_version(env, vext_version); + } set_misa(env, RVXLEN | target_misa); } @@ -316,16 +342,17 @@ typedef struct CPUModelInfo { } CPUModelInfo; static const CPUModelInfo cpu_models[] = { - {TYPE_RISCV_CPU_ANY, riscv_any_cpu_init}, -#ifdef TARGET_RISCV32 - {TYPE_RISCV_CPU_BASE32, riscv_base32_cpu_init}, - {TYPE_RISCV_CPU_SIFIVE_E31, rv32imacu_nommu_cpu_init}, - {TYPE_RISCV_CPU_SIFIVE_U34, rv32gcsu_priv1_10_0_cpu_init}, -#endif -#ifdef TARGET_RISCV64 - {TYPE_RISCV_CPU_BASE64, riscv_base64_cpu_init}, - {TYPE_RISCV_CPU_SIFIVE_E51, rv64imacu_nommu_cpu_init}, - {TYPE_RISCV_CPU_SIFIVE_U54, rv64gcsu_priv1_10_0_cpu_init}, + {TYPE_RISCV_CPU_ANY, riscv_any_cpu_init}, +#if defined(TARGET_RISCV32) + {TYPE_RISCV_CPU_BASE32, riscv_base_cpu_init}, + {TYPE_RISCV_CPU_IBEX, rv32_ibex_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_E31, rvxx_sifive_e_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_E34, rv32_imafcu_nommu_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_U34, rvxx_sifive_u_cpu_init}, +#elif defined(TARGET_RISCV64) + {TYPE_RISCV_CPU_BASE64, riscv_base_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_E51, rvxx_sifive_e_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_U54, rvxx_sifive_u_cpu_init}, #endif }; @@ -339,7 +366,7 @@ RISCVCPU *cpu_riscv_init(struct uc_struct *uc) if (cpu == NULL) { return NULL; } - memset((void*)cpu, 0, sizeof(*cpu)); + memset((void *)cpu, 0, sizeof(*cpu)); #ifdef TARGET_RISCV32 if (uc->cpu_model == INT_MAX) { diff --git a/qemu/target/riscv/cpu.h b/qemu/target/riscv/cpu.h index b94516eb7c..50c5d5fbf7 100644 --- a/qemu/target/riscv/cpu.h +++ b/qemu/target/riscv/cpu.h @@ -21,6 +21,7 @@ #define RISCV_CPU_H #include "hw/core/cpu.h" +#include "hw/registerfields.h" #include "exec/cpu-defs.h" #include "fpu/softfloat-types.h" @@ -35,7 +36,9 @@ typedef struct TCGContext TCGContext; #define TYPE_RISCV_CPU_ANY RISCV_CPU_TYPE_NAME("any") #define TYPE_RISCV_CPU_BASE32 RISCV_CPU_TYPE_NAME("rv32") #define TYPE_RISCV_CPU_BASE64 RISCV_CPU_TYPE_NAME("rv64") +#define TYPE_RISCV_CPU_IBEX RISCV_CPU_TYPE_NAME("lowrisc-ibex") #define TYPE_RISCV_CPU_SIFIVE_E31 RISCV_CPU_TYPE_NAME("sifive-e31") +#define TYPE_RISCV_CPU_SIFIVE_E34 RISCV_CPU_TYPE_NAME("sifive-e34") #define TYPE_RISCV_CPU_SIFIVE_E51 RISCV_CPU_TYPE_NAME("sifive-e51") #define TYPE_RISCV_CPU_SIFIVE_U34 RISCV_CPU_TYPE_NAME("sifive-u34") #define TYPE_RISCV_CPU_SIFIVE_U54 RISCV_CPU_TYPE_NAME("sifive-u54") @@ -57,6 +60,7 @@ typedef struct TCGContext TCGContext; #define RVA RV('A') #define RVF RV('F') #define RVD RV('D') +#define RVV RV('V') #define RVC RV('C') #define RVS RV('S') #define RVU RV('U') @@ -72,10 +76,11 @@ enum { RISCV_FEATURE_MISA }; -#define PRIV_VERSION_1_09_1 0x00010901 #define PRIV_VERSION_1_10_0 0x00011000 #define PRIV_VERSION_1_11_0 0x00011100 +#define VEXT_VERSION_0_07_1 0x00000701 + #define TRANSLATE_PMP_FAIL 2 #define TRANSLATE_FAIL 1 #define TRANSLATE_SUCCESS 0 @@ -87,9 +92,26 @@ typedef struct CPURISCVState CPURISCVState; #include "pmp.h" +#define RV_VLEN_MAX 256 + +FIELD(VTYPE, VLMUL, 0, 2) +FIELD(VTYPE, VSEW, 2, 3) +FIELD(VTYPE, VEDIV, 5, 2) +FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 9) +FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 1, 1) + struct CPURISCVState { target_ulong gpr[32]; uint64_t fpr[32]; /* assume both F and D extensions */ + + /* vector coprocessor state. */ + uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16); + target_ulong vxrm; + target_ulong vxsat; + target_ulong vl; + target_ulong vstart; + target_ulong vtype; + target_ulong pc; target_ulong load_res; target_ulong load_val; @@ -100,6 +122,7 @@ struct CPURISCVState { target_ulong guest_phys_fault_addr; target_ulong priv_ver; + target_ulong vext_ver; target_ulong misa; target_ulong misa_mask; @@ -245,12 +268,16 @@ typedef struct RISCVCPU { bool ext_s; bool ext_u; bool ext_h; + bool ext_v; bool ext_counters; bool ext_ifencei; bool ext_icsr; char *priv_spec; char *user_spec; + char *vext_spec; + uint16_t vlen; + uint16_t elen; bool mmu; bool pmp; } cfg; @@ -323,15 +350,56 @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong); #define TB_FLAGS_MMU_MASK 3 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS +typedef CPURISCVState CPUArchState; +typedef RISCVCPU ArchCPU; +#include "exec/cpu-all.h" + +FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1) +FIELD(TB_FLAGS, LMUL, 3, 2) +FIELD(TB_FLAGS, SEW, 5, 3) +FIELD(TB_FLAGS, VILL, 8, 1) + +/* + * A simplification for VLMAX + * = (1 << LMUL) * VLEN / (8 * (1 << SEW)) + * = (VLEN << LMUL) / (8 << SEW) + * = (VLEN << LMUL) >> (SEW + 3) + * = VLEN >> (SEW + 3 - LMUL) + */ +static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype) +{ + uint8_t sew, lmul; + + sew = FIELD_EX64(vtype, VTYPE, VSEW); + lmul = FIELD_EX64(vtype, VTYPE, VLMUL); + return cpu->cfg.vlen >> (sew + 3 - lmul); +} + static inline void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) + target_ulong *cs_base, uint32_t *pflags) { + uint32_t flags = 0; + *pc = env->pc; *cs_base = 0; - *flags = cpu_mmu_index(env, 0); + + if (riscv_has_ext(env, RVV)) { + uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype); + bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl); + FIELD_DP32(flags, TB_FLAGS, VILL, FIELD_EX64(env->vtype, VTYPE, VILL), flags); + FIELD_DP32(flags, TB_FLAGS, SEW, FIELD_EX64(env->vtype, VTYPE, VSEW), flags); + FIELD_DP32(flags, TB_FLAGS, LMUL, FIELD_EX64(env->vtype, VTYPE, VLMUL), flags); + FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax, flags); + } else { + FIELD_DP32(flags, TB_FLAGS, VILL, 1, flags); + } + + flags |= cpu_mmu_index(env, 0); if (riscv_cpu_fp_enabled(env)) { - *flags |= env->mstatus & MSTATUS_FS; + flags |= env->mstatus & MSTATUS_FS; } + + *pflags = flags; } int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value, @@ -372,9 +440,4 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops); void riscv_cpu_register_gdb_regs_for_features(CPUState *cs); -typedef CPURISCVState CPUArchState; -typedef RISCVCPU ArchCPU; - -#include "exec/cpu-all.h" - #endif /* RISCV_CPU_H */ diff --git a/qemu/target/riscv/cpu_bits.h b/qemu/target/riscv/cpu_bits.h index ffa73864a9..48625ac2fd 100644 --- a/qemu/target/riscv/cpu_bits.h +++ b/qemu/target/riscv/cpu_bits.h @@ -29,6 +29,14 @@ #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) +/* Vector Fixed-Point round model */ +#define FSR_VXRM_SHIFT 9 +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) + +/* Vector Fixed-Point saturation flag */ +#define FSR_VXSAT_SHIFT 8 +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) + /* Control and Status Registers */ /* User Trap Setup */ @@ -48,6 +56,13 @@ #define CSR_FRM 0x002 #define CSR_FCSR 0x003 +/* User Vector CSRs */ +#define CSR_VSTART 0x008 +#define CSR_VXSAT 0x009 +#define CSR_VXRM 0x00a +#define CSR_VL 0xc20 +#define CSR_VTYPE 0xc21 + /* User Timers and Counters */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 diff --git a/qemu/target/riscv/cpu_helper.c b/qemu/target/riscv/cpu_helper.c index bb2c3d869f..bad05e3049 100644 --- a/qemu/target/riscv/cpu_helper.c +++ b/qemu/target/riscv/cpu_helper.c @@ -300,9 +300,6 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; int mode = mmu_idx; bool use_background = false; - hwaddr base; - int levels = 0, ptidxbits = 0, ptesize = 0, vm, sum, mxr, widened; - /* * Check if we should use the background registers for the two @@ -344,63 +341,45 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, *prot = 0; + hwaddr base; + int levels, ptidxbits, ptesize, vm, sum, mxr, widened; + if (first_stage == true) { mxr = get_field(env->mstatus, MSTATUS_MXR); } else { mxr = get_field(env->vsstatus, MSTATUS_MXR); } - if (env->priv_ver >= PRIV_VERSION_1_10_0) { - if (first_stage == true) { - if (use_background) { - base = (hwaddr)get_field(env->vsatp, SATP_PPN) << PGSHIFT; - vm = get_field(env->vsatp, SATP_MODE); - } else { - base = (hwaddr)get_field(env->satp, SATP_PPN) << PGSHIFT; - vm = get_field(env->satp, SATP_MODE); - } - widened = 0; + if (first_stage == true) { + if (use_background) { + base = (hwaddr)get_field(env->vsatp, SATP_PPN) << PGSHIFT; + vm = get_field(env->vsatp, SATP_MODE); } else { - base = (hwaddr)get_field(env->hgatp, HGATP_PPN) << PGSHIFT; - vm = get_field(env->hgatp, HGATP_MODE); - widened = 2; - } - sum = get_field(env->mstatus, MSTATUS_SUM); - switch (vm) { - case VM_1_10_SV32: - levels = 2; ptidxbits = 10; ptesize = 4; break; - case VM_1_10_SV39: - levels = 3; ptidxbits = 9; ptesize = 8; break; - case VM_1_10_SV48: - levels = 4; ptidxbits = 9; ptesize = 8; break; - case VM_1_10_SV57: - levels = 5; ptidxbits = 9; ptesize = 8; break; - case VM_1_10_MBARE: - *physical = addr; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - return TRANSLATE_SUCCESS; - default: - g_assert_not_reached(); + base = (hwaddr)get_field(env->satp, SATP_PPN) << PGSHIFT; + vm = get_field(env->satp, SATP_MODE); } - } else { widened = 0; - base = (hwaddr)(env->sptbr) << PGSHIFT; - sum = !get_field(env->mstatus, MSTATUS_PUM); - vm = get_field(env->mstatus, MSTATUS_VM); - switch (vm) { - case VM_1_09_SV32: - levels = 2; ptidxbits = 10; ptesize = 4; break; - case VM_1_09_SV39: - levels = 3; ptidxbits = 9; ptesize = 8; break; - case VM_1_09_SV48: - levels = 4; ptidxbits = 9; ptesize = 8; break; - case VM_1_09_MBARE: - *physical = addr; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - return TRANSLATE_SUCCESS; - default: - g_assert_not_reached(); - } + } else { + base = (hwaddr)get_field(env->hgatp, HGATP_PPN) << PGSHIFT; + vm = get_field(env->hgatp, HGATP_MODE); + widened = 2; + } + sum = get_field(env->mstatus, MSTATUS_SUM); + switch (vm) { + case VM_1_10_SV32: + levels = 2; ptidxbits = 10; ptesize = 4; break; + case VM_1_10_SV39: + levels = 3; ptidxbits = 9; ptesize = 8; break; + case VM_1_10_SV48: + levels = 4; ptidxbits = 9; ptesize = 8; break; + case VM_1_10_SV57: + levels = 5; ptidxbits = 9; ptesize = 8; break; + case VM_1_10_MBARE: + *physical = addr; + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + return TRANSLATE_SUCCESS; + default: + g_assert_not_reached(); } CPUState *cs = env_cpu(env); @@ -438,11 +417,17 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, hwaddr pte_addr; if (two_stage && first_stage) { + int vbase_prot; hwaddr vbase; /* Do the second stage translation on the base PTE address. */ - get_physical_address(env, &vbase, prot, base, access_type, - mmu_idx, false, true); + int vbase_ret = get_physical_address(env, &vbase, &vbase_prot, + base, MMU_DATA_LOAD, + mmu_idx, false, true); + + if (vbase_ret != TRANSLATE_SUCCESS) { + return vbase_ret; + } pte_addr = vbase + idx * ptesize; } else { @@ -456,17 +441,9 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, } #if defined(TARGET_RISCV32) -#ifdef UNICORN_ARCH_POSTFIX target_ulong pte = glue(address_space_ldl, UNICORN_ARCH_POSTFIX)(cs->as->uc, cs->as, pte_addr, attrs, &res); -#else - target_ulong pte = address_space_ldl(cs->as->uc, cs->as, pte_addr, attrs, &res); -#endif #elif defined(TARGET_RISCV64) -#ifdef UNICORN_ARCH_POSTFIX target_ulong pte = glue(address_space_ldq, UNICORN_ARCH_POSTFIX)(cs->as->uc, cs->as, pte_addr, attrs, &res); -#else - target_ulong pte = address_space_ldq(cs->as->uc, cs->as, pte_addr, attrs, &res); -#endif #endif if (res != MEMTX_OK) { return TRANSLATE_FAIL; @@ -528,18 +505,14 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, &addr1, &l, false, MEMTXATTRS_UNSPECIFIED); if (memory_region_is_ram(mr)) { target_ulong *pte_pa = - qemu_map_ram_ptr(mr->uc, mr->ram_block, addr1); + qemu_map_ram_ptr(cs->as->uc, mr->ram_block, addr1); #if TCG_OVERSIZED_GUEST /* MTTCG is not enabled on oversized TCG guests so * page table updates do not need to be atomic */ *pte_pa = pte = updated_pte; #else target_ulong old_pte = -#ifdef _MSC_VER - atomic_cmpxchg((long *)pte_pa, pte, updated_pte); -#else atomic_cmpxchg(pte_pa, pte, updated_pte); -#endif if (old_pte != pte) { goto restart; } else { @@ -556,12 +529,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, /* for superpage mappings, make a fake leaf PTE for the TLB's benefit. */ target_ulong vpn = addr >> PGSHIFT; - if (i == 0) { - *physical = (ppn | (vpn & ((1L << (ptshift + widened)) - 1))) << - PGSHIFT; - } else { - *physical = (ppn | (vpn & ((1L << ptshift) - 1))) << PGSHIFT; - } + *physical = (ppn | (vpn & ((1L << ptshift) - 1))) << PGSHIFT; /* set permissions on the TLB entry */ if ((pte & PTE_R) || ((pte & PTE_X) && mxr)) { @@ -590,7 +558,6 @@ static void raise_mmu_exception(CPURISCVState *env, target_ulong address, int page_fault_exceptions; if (first_stage) { page_fault_exceptions = - (env->priv_ver >= PRIV_VERSION_1_10_0) && get_field(env->satp, SATP_MODE) != VM_1_10_MBARE && !pmp_violation; } else { @@ -702,7 +669,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, CPURISCVState *env = &cpu->env; vaddr im_address; hwaddr pa = 0; - int prot; + int prot, prot2; bool pmp_violation = false; bool m_mode_two_stage = false; bool hs_mode_two_stage = false; @@ -752,13 +719,13 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, /* Second stage lookup */ im_address = pa; - ret = get_physical_address(env, &pa, &prot, im_address, + ret = get_physical_address(env, &pa, &prot2, im_address, access_type, mmu_idx, false, true); qemu_log_mask(CPU_LOG_MMU, "%s 2nd-stage address=%" VADDR_PRIx " ret %d physical " TARGET_FMT_plx " prot %d\n", - __func__, im_address, ret, pa, prot); + __func__, im_address, ret, pa, prot2); if (riscv_feature(env, RISCV_FEATURE_PMP) && (ret == TRANSLATE_SUCCESS) && @@ -916,8 +883,7 @@ void riscv_cpu_do_interrupt(CPUState *cs) } s = env->mstatus; - s = set_field(s, MSTATUS_SPIE, env->priv_ver >= PRIV_VERSION_1_10_0 ? - get_field(s, MSTATUS_SIE) : get_field(s, MSTATUS_UIE << env->priv)); + s = set_field(s, MSTATUS_SPIE, get_field(s, MSTATUS_SIE)); s = set_field(s, MSTATUS_SPP, env->priv); s = set_field(s, MSTATUS_SIE, 0); env->mstatus = s; @@ -954,8 +920,7 @@ void riscv_cpu_do_interrupt(CPUState *cs) } s = env->mstatus; - s = set_field(s, MSTATUS_MPIE, env->priv_ver >= PRIV_VERSION_1_10_0 ? - get_field(s, MSTATUS_MIE) : get_field(s, MSTATUS_UIE << env->priv)); + s = set_field(s, MSTATUS_MPIE, get_field(s, MSTATUS_MIE)); s = set_field(s, MSTATUS_MPP, env->priv); s = set_field(s, MSTATUS_MIE, 0); env->mstatus = s; diff --git a/qemu/target/riscv/csr.c b/qemu/target/riscv/csr.c index 785ef26dc4..bd746455d1 100644 --- a/qemu/target/riscv/csr.c +++ b/qemu/target/riscv/csr.c @@ -23,12 +23,21 @@ #include "exec/exec-all.h" static int fs(CPURISCVState *env, int csrno); +static int vs(CPURISCVState *env, int csrno); static int read_fflags(CPURISCVState *env, int csrno, target_ulong *val); static int write_fflags(CPURISCVState *env, int csrno, target_ulong val); static int read_frm(CPURISCVState *env, int csrno, target_ulong *val); static int write_frm(CPURISCVState *env, int csrno, target_ulong val); static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val); static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val); +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val); +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val); +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val); +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val); +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val); +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val); +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val); +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val); static int ctr(CPURISCVState *env, int csrno); static int read_instret(CPURISCVState *env, int csrno, target_ulong *val); static int read_time(CPURISCVState *env, int csrno, target_ulong *val); @@ -49,8 +58,6 @@ static int read_mtvec(CPURISCVState *env, int csrno, target_ulong *val); static int write_mtvec(CPURISCVState *env, int csrno, target_ulong val); static int read_mcounteren(CPURISCVState *env, int csrno, target_ulong *val); static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val); -static int read_mucounteren(CPURISCVState *env, int csrno, target_ulong *val); -static int write_mucounteren(CPURISCVState *env, int csrno, target_ulong val); static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val); static int write_mscounteren(CPURISCVState *env, int csrno, target_ulong val); static int read_mscratch(CPURISCVState *env, int csrno, target_ulong *val); @@ -154,6 +161,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_FRM] = { fs, read_frm, write_frm }, [CSR_FCSR] = { fs, read_fcsr, write_fcsr }, + /* Vector CSRs */ + [CSR_VSTART] = { vs, read_vstart, write_vstart }, + [CSR_VXSAT] = { vs, read_vxsat, write_vxsat }, + [CSR_VXRM] = { vs, read_vxrm, write_vxrm }, + [CSR_VL] = { vs, read_vl }, + [CSR_VTYPE] = { vs, read_vtype }, /* User Timers and Counters */ [CSR_CYCLE] = { ctr, read_instret }, [CSR_INSTRET] = { ctr, read_instret }, @@ -196,8 +209,6 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_MSTATUSH] = { any, read_mstatush, write_mstatush }, #endif - /* Legacy Counter Setup (priv v1.9.1) */ - [CSR_MUCOUNTEREN] = { any, read_mucounteren, write_mucounteren }, [CSR_MSCOUNTEREN] = { any, read_mscounteren, write_mscounteren }, /* Machine Trap Handling */ @@ -441,41 +452,34 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops) /* Predicates */ static int fs(CPURISCVState *env, int csrno) { + /* loose check condition for fcsr in vector extension */ + if ((csrno == CSR_FCSR) && (env->misa & RVV)) { + return 0; + } if (!env->debugger && !riscv_cpu_fp_enabled(env)) { return -1; } return 0; } +static int vs(CPURISCVState *env, int csrno) +{ + if (env->misa & RVV) { + return 0; + } + return -1; +} + static int ctr(CPURISCVState *env, int csrno) { CPUState *cs = env_cpu(env); RISCVCPU *cpu = RISCV_CPU(cs); - uint32_t ctr_en = ~0u; if (!cpu->cfg.ext_counters) { /* The Counters extensions is not enabled */ return -1; } - /* - * The counters are always enabled at run time on newer priv specs, as the - * CSR has changed from controlling that the counters can be read to - * controlling that the counters increment. - */ - if (env->priv_ver > PRIV_VERSION_1_09_1) { - return 0; - } - - if (env->priv < PRV_M) { - ctr_en &= env->mcounteren; - } - if (env->priv < PRV_S) { - ctr_en &= env->scounteren; - } - if (!(ctr_en & (1u << (csrno & 31)))) { - return -1; - } return 0; } @@ -554,6 +558,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) } *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) | (env->frm << FSR_RD_SHIFT); + if (vs(env, csrno) >= 0) { + *val |= (env->vxrm << FSR_VXRM_SHIFT) + | (env->vxsat << FSR_VXSAT_SHIFT); + } return 0; } @@ -564,10 +572,62 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) } env->mstatus |= MSTATUS_FS; env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; + if (vs(env, csrno) >= 0) { + env->vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; + env->vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; + } riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); return 0; } +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vtype; + return 0; +} + +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vl; + return 0; +} + +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vxrm; + return 0; +} + +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vxrm = val; + return 0; +} + +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vxsat; + return 0; +} + +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vxsat = val; + return 0; +} + +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vstart; + return 0; +} + +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vstart = val; + return 0; +} + /* User Timers and Counters */ static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) { @@ -640,9 +700,6 @@ static const target_ulong delegable_excps = (1ULL << (RISCV_EXCP_INST_GUEST_PAGE_FAULT)) | (1ULL << (RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT)) | (1ULL << (RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT)); -static const target_ulong sstatus_v1_9_mask = SSTATUS_SIE | SSTATUS_SPIE | - SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS | - SSTATUS_SUM | SSTATUS_SD; static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE | SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS | SSTATUS_SUM | SSTATUS_MXR | SSTATUS_SD; @@ -651,20 +708,11 @@ static const target_ulong hip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP; static const target_ulong vsip_writable_mask = MIP_VSSIP; #if defined(TARGET_RISCV32) -static const char valid_vm_1_09[16] = { - [VM_1_09_MBARE] = 1, - [VM_1_09_SV32] = 1, -}; static const char valid_vm_1_10[16] = { [VM_1_10_MBARE] = 1, [VM_1_10_SV32] = 1 }; #elif defined(TARGET_RISCV64) -static const char valid_vm_1_09[16] = { - [VM_1_09_MBARE] = 1, - [VM_1_09_SV39] = 1, - [VM_1_09_SV48] = 1, -}; static const char valid_vm_1_10[16] = { [VM_1_10_MBARE] = 1, [VM_1_10_SV39] = 1, @@ -694,8 +742,7 @@ static int read_mstatus(CPURISCVState *env, int csrno, target_ulong *val) static int validate_vm(CPURISCVState *env, target_ulong vm) { - return (env->priv_ver >= PRIV_VERSION_1_10_0) ? - valid_vm_1_10[vm & 0xf] : valid_vm_1_09[vm & 0xf]; + return valid_vm_1_10[vm & 0xf]; } static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val) @@ -705,34 +752,21 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val) int dirty; /* flush tlb on mstatus fields that affect VM */ - if (env->priv_ver <= PRIV_VERSION_1_09_1) { - if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | - MSTATUS_MPRV | MSTATUS_SUM | MSTATUS_VM)) { - tlb_flush(env_cpu(env)); - } - mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE | - MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM | - MSTATUS_MPP | MSTATUS_MXR | - (validate_vm(env, get_field(val, MSTATUS_VM)) ? - MSTATUS_VM : 0); + if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV | + MSTATUS_MPRV | MSTATUS_SUM)) { + tlb_flush(env_cpu(env)); } - if (env->priv_ver >= PRIV_VERSION_1_10_0) { - if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV | - MSTATUS_MPRV | MSTATUS_SUM)) { - tlb_flush(env_cpu(env)); - } - mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE | - MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM | - MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR | - MSTATUS_TW; + mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE | + MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM | + MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR | + MSTATUS_TW; #if defined(TARGET_RISCV64) - /* - * RV32: MPV and MTL are not in mstatus. The current plan is to - * add them to mstatush. For now, we just don't support it. - */ - mask |= MSTATUS_MTL | MSTATUS_MPV; + /* + * RV32: MPV and MTL are not in mstatus. The current plan is to + * add them to mstatush. For now, we just don't support it. + */ + mask |= MSTATUS_MTL | MSTATUS_MPV; #endif - } mstatus = (mstatus & ~mask) | (val & mask); @@ -881,18 +915,12 @@ static int write_mtvec(CPURISCVState *env, int csrno, target_ulong val) static int read_mcounteren(CPURISCVState *env, int csrno, target_ulong *val) { - if (env->priv_ver < PRIV_VERSION_1_10_0) { - return -1; - } *val = env->mcounteren; return 0; } static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val) { - if (env->priv_ver < PRIV_VERSION_1_10_0) { - return -1; - } env->mcounteren = val; return 0; } @@ -900,8 +928,7 @@ static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val) /* This regiser is replaced with CSR_MCOUNTINHIBIT in 1.11.0 */ static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val) { - if (env->priv_ver > PRIV_VERSION_1_09_1 - && env->priv_ver < PRIV_VERSION_1_11_0) { + if (env->priv_ver < PRIV_VERSION_1_11_0) { return -1; } *val = env->mcounteren; @@ -911,32 +938,13 @@ static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val) /* This regiser is replaced with CSR_MCOUNTINHIBIT in 1.11.0 */ static int write_mscounteren(CPURISCVState *env, int csrno, target_ulong val) { - if (env->priv_ver > PRIV_VERSION_1_09_1 - && env->priv_ver < PRIV_VERSION_1_11_0) { + if (env->priv_ver < PRIV_VERSION_1_11_0) { return -1; } env->mcounteren = val; return 0; } -static int read_mucounteren(CPURISCVState *env, int csrno, target_ulong *val) -{ - if (env->priv_ver > PRIV_VERSION_1_09_1) { - return -1; - } - *val = env->scounteren; - return 0; -} - -static int write_mucounteren(CPURISCVState *env, int csrno, target_ulong val) -{ - if (env->priv_ver > PRIV_VERSION_1_09_1) { - return -1; - } - env->scounteren = val; - return 0; -} - /* Machine Trap Handling */ static int read_mscratch(CPURISCVState *env, int csrno, target_ulong *val) { @@ -1010,16 +1018,14 @@ static int rmw_mip(CPURISCVState *env, int csrno, target_ulong *ret_value, /* Supervisor Trap Setup */ static int read_sstatus(CPURISCVState *env, int csrno, target_ulong *val) { - target_ulong mask = ((env->priv_ver >= PRIV_VERSION_1_10_0) ? - sstatus_v1_10_mask : sstatus_v1_9_mask); + target_ulong mask = (sstatus_v1_10_mask); *val = env->mstatus & mask; return 0; } static int write_sstatus(CPURISCVState *env, int csrno, target_ulong val) { - target_ulong mask = ((env->priv_ver >= PRIV_VERSION_1_10_0) ? - sstatus_v1_10_mask : sstatus_v1_9_mask); + target_ulong mask = (sstatus_v1_10_mask); target_ulong newval = (env->mstatus & ~mask) | (val & mask); return write_mstatus(env, CSR_MSTATUS, newval); } @@ -1069,18 +1075,12 @@ static int write_stvec(CPURISCVState *env, int csrno, target_ulong val) static int read_scounteren(CPURISCVState *env, int csrno, target_ulong *val) { - if (env->priv_ver < PRIV_VERSION_1_10_0) { - return -1; - } *val = env->scounteren; return 0; } static int write_scounteren(CPURISCVState *env, int csrno, target_ulong val) { - if (env->priv_ver < PRIV_VERSION_1_10_0) { - return -1; - } env->scounteren = val; return 0; } @@ -1159,15 +1159,15 @@ static int read_satp(CPURISCVState *env, int csrno, target_ulong *val) { if (!riscv_feature(env, RISCV_FEATURE_MMU)) { *val = 0; - } else if (env->priv_ver >= PRIV_VERSION_1_10_0) { - if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) { - return -1; - } else { - *val = env->satp; - } + return 0; + } + + if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) { + return -1; } else { - *val = env->sptbr; + *val = env->satp; } + return 0; } @@ -1176,13 +1176,7 @@ static int write_satp(CPURISCVState *env, int csrno, target_ulong val) if (!riscv_feature(env, RISCV_FEATURE_MMU)) { return 0; } - if (env->priv_ver <= PRIV_VERSION_1_09_1 && (val ^ env->sptbr)) { - tlb_flush(env_cpu(env)); - env->sptbr = val & (((target_ulong) - 1 << (TARGET_PHYS_ADDR_SPACE_BITS - PGSHIFT)) - 1); - } - if (env->priv_ver >= PRIV_VERSION_1_10_0 && - validate_vm(env, get_field(val, SATP_MODE)) && + if (validate_vm(env, get_field(val, SATP_MODE)) && ((val ^ env->satp) & (SATP_MODE | SATP_ASID | SATP_PPN))) { if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) { diff --git a/qemu/target/riscv/fpu_helper.c b/qemu/target/riscv/fpu_helper.c index 3fb6684b16..4379756dc4 100644 --- a/qemu/target/riscv/fpu_helper.c +++ b/qemu/target/riscv/fpu_helper.c @@ -22,6 +22,7 @@ #include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" +#include "internals.h" target_ulong riscv_cpu_get_fflags(CPURISCVState *env) { @@ -230,21 +231,7 @@ uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1) target_ulong helper_fclass_s(uint64_t frs1) { - float32 f = frs1; - bool sign = float32_is_neg(f); - - if (float32_is_infinity(f)) { - return sign ? 1 << 0 : 1 << 7; - } else if (float32_is_zero(f)) { - return sign ? 1 << 3 : 1 << 4; - } else if (float32_is_zero_or_denormal(f)) { - return sign ? 1 << 2 : 1 << 5; - } else if (float32_is_any_nan(f)) { - float_status s = { 0 }; /* for snan_bit_is_one */ - return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; - } else { - return sign ? 1 << 1 : 1 << 6; - } + return fclass_s(frs1); } uint64_t helper_fadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) @@ -353,19 +340,5 @@ uint64_t helper_fcvt_d_lu(CPURISCVState *env, uint64_t rs1) target_ulong helper_fclass_d(uint64_t frs1) { - float64 f = frs1; - bool sign = float64_is_neg(f); - - if (float64_is_infinity(f)) { - return sign ? 1 << 0 : 1 << 7; - } else if (float64_is_zero(f)) { - return sign ? 1 << 3 : 1 << 4; - } else if (float64_is_zero_or_denormal(f)) { - return sign ? 1 << 2 : 1 << 5; - } else if (float64_is_any_nan(f)) { - float_status s = { 0 }; /* for snan_bit_is_one */ - return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; - } else { - return sign ? 1 << 1 : 1 << 6; - } + return fclass_d(frs1); } diff --git a/qemu/target/riscv/helper.h b/qemu/target/riscv/helper.h index 32e483860f..11b0f57c14 100644 --- a/qemu/target/riscv/helper.h +++ b/qemu/target/riscv/helper.h @@ -78,3 +78,1077 @@ DEF_HELPER_2(sret, tl, env, tl) DEF_HELPER_2(mret, tl, env, tl) DEF_HELPER_1(wfi, void, env) DEF_HELPER_1(tlb_flush, void, env) + +/* Hypervisor functions */ +#ifndef CONFIG_USER_ONLY +DEF_HELPER_1(hyp_tlb_flush, void, env) +#endif + +/* Vector functions */ +DEF_HELPER_3(vsetvl, tl, env, tl, tl) +DEF_HELPER_5(vlb_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_6(vlsb_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsw_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsw_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlswu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlswu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssw_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssw_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlxb_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxwu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxwu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_5(vlbff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32) +#ifdef TARGET_RISCV64 +DEF_HELPER_6(vamoswapw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoswapd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxorw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxord_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoorw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoord_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomind_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominuw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominud_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxuw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxud_v_d, void, ptr, ptr, tl, ptr, env, i32) +#endif +DEF_HELPER_6(vamoswapw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxorw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoorw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_6(vwaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vand_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vnsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmseq_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vminu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vdivu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmerge_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_b, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_h, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_w, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_d, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32) + +DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vssrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vnclip_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclip_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclip_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vfadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfsgnj_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vmfeq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_xu_f_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vfwcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vfncvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vwredsumu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsumu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmandnot_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmxor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32) + +DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32) + +DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32) + +DEF_HELPER_6(vslideup_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vrgather_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/qemu/target/riscv/insn_trans/trans_privileged.inc.c b/qemu/target/riscv/insn_trans/trans_privileged.inc.c index 05662b21e6..7bfb889d35 100644 --- a/qemu/target/riscv/insn_trans/trans_privileged.inc.c +++ b/qemu/target/riscv/insn_trans/trans_privileged.inc.c @@ -77,57 +77,11 @@ static bool trans_wfi(DisasContext *ctx, arg_wfi *a) static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a) { TCGContext *tcg_ctx = ctx->uc->tcg_ctx; - if (ctx->priv_ver >= PRIV_VERSION_1_10_0) { - gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); - return true; - } - return false; + gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); + return true; } static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; - if (ctx->priv_ver <= PRIV_VERSION_1_09_1) { - gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); - return true; - } - return false; -} - -static bool trans_hfence_gvma(DisasContext *ctx, arg_sfence_vma *a) -{ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; - if (ctx->priv_ver >= PRIV_VERSION_1_10_0 && - has_ext(ctx, RVH)) { - /* Hpervisor extensions exist */ - /* - * if (env->priv == PRV_M || - * (env->priv == PRV_S && - * !riscv_cpu_virt_enabled(env) && - * get_field(ctx->mstatus_fs, MSTATUS_TVM))) { - */ - gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); - return true; - /* } */ - } - return false; -} - -static bool trans_hfence_bvma(DisasContext *ctx, arg_sfence_vma *a) -{ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; - if (ctx->priv_ver >= PRIV_VERSION_1_10_0 && - has_ext(ctx, RVH)) { - /* Hpervisor extensions exist */ - /* - * if (env->priv == PRV_M || - * (env->priv == PRV_S && - * !riscv_cpu_virt_enabled(env) && - * get_field(ctx->mstatus_fs, MSTATUS_TVM))) { - */ - gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); - return true; - /* } */ - } return false; } diff --git a/qemu/target/riscv/insn_trans/trans_rvd.inc.c b/qemu/target/riscv/insn_trans/trans_rvd.inc.c index 2e643d5168..e461146e23 100644 --- a/qemu/target/riscv/insn_trans/trans_rvd.inc.c +++ b/qemu/target/riscv/insn_trans/trans_rvd.inc.c @@ -314,7 +314,7 @@ static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a) TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv t0 = tcg_temp_new(tcg_ctx); - gen_helper_fclass_d(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]); + glue(gen_helper_fclass_d, UNICORN_ARCH_POSTFIX)(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]); gen_set_gpr(tcg_ctx, a->rd, t0); tcg_temp_free(tcg_ctx, t0); return true; diff --git a/qemu/target/riscv/insn_trans/trans_rvf.inc.c b/qemu/target/riscv/insn_trans/trans_rvf.inc.c index de044bfeb9..b4fd677b23 100644 --- a/qemu/target/riscv/insn_trans/trans_rvf.inc.c +++ b/qemu/target/riscv/insn_trans/trans_rvf.inc.c @@ -23,6 +23,21 @@ return false; \ } while (0) +/* + * RISC-V requires NaN-boxing of narrower width floating + * point values. This applies when a 32-bit value is + * assigned to a 64-bit FP register. Thus this does not + * apply when the RVD extension is not present. + */ +static void gen_nanbox_fpr(DisasContext *ctx, int regno) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + if (has_ext(ctx, RVD)) { + tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[regno], tcg_ctx->cpu_fpr[regno], + MAKE_64BIT_MASK(32, 32)); + } +} + static bool trans_flw(DisasContext *ctx, arg_flw *a) { TCGContext *tcg_ctx = ctx->uc->tcg_ctx; @@ -33,8 +48,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a) tcg_gen_addi_tl(tcg_ctx, t0, t0, a->imm); tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEUL); - /* RISC-V requires NaN-boxing of narrower width floating point values */ - tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], tcg_ctx->cpu_fpr[a->rd], 0xffffffff00000000ULL); + gen_nanbox_fpr(ctx, a->rd); tcg_temp_free(tcg_ctx, t0); mark_fs_dirty(ctx); @@ -343,7 +357,7 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) TCGv t0 = tcg_temp_new(tcg_ctx); - gen_helper_fclass_s(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]); + glue(gen_helper_fclass_s, UNICORN_ARCH_POSTFIX)(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]); gen_set_gpr(tcg_ctx, a->rd, t0); tcg_temp_free(tcg_ctx, t0); diff --git a/qemu/target/riscv/insn_trans/trans_rvh.inc.c b/qemu/target/riscv/insn_trans/trans_rvh.inc.c new file mode 100644 index 0000000000..c238510e4f --- /dev/null +++ b/qemu/target/riscv/insn_trans/trans_rvh.inc.c @@ -0,0 +1,33 @@ +/* + * RISC-V translation routines for the RVXI Base Integer Instruction Set. + * + * Copyright (c) 2020 Western Digital + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +static bool trans_hfence_gvma(DisasContext *ctx, arg_sfence_vma *a) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + REQUIRE_EXT(ctx, RVH); + gen_helper_hyp_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); + return true; +} + +static bool trans_hfence_vvma(DisasContext *ctx, arg_sfence_vma *a) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + REQUIRE_EXT(ctx, RVH); + gen_helper_hyp_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); + return true; +} diff --git a/qemu/target/riscv/insn_trans/trans_rvv.inc.c b/qemu/target/riscv/insn_trans/trans_rvv.inc.c new file mode 100644 index 0000000000..40b74f11ce --- /dev/null +++ b/qemu/target/riscv/insn_trans/trans_rvv.inc.c @@ -0,0 +1,2954 @@ +/* + * RISC-V translation routines for the RVV Standard Extension. + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include "tcg/tcg-op-gvec.h" +#include "tcg/tcg-gvec-desc.h" +#include "internals.h" + +static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv s1, s2, dst; + + if (!has_ext(ctx, RVV)) { + return false; + } + + s2 = tcg_temp_new(tcg_ctx); + dst = tcg_temp_new(tcg_ctx); + + /* Using x0 as the rs1 register specifier, encodes an infinite AVL */ + if (a->rs1 == 0) { + /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ + s1 = tcg_const_tl(tcg_ctx, RV_VLEN_MAX); + } else { + s1 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, s1, a->rs1); + } + gen_get_gpr(tcg_ctx, s2, a->rs2); + gen_helper_vsetvl(tcg_ctx, dst, tcg_ctx->cpu_env, s1, s2); + gen_set_gpr(tcg_ctx, a->rd, dst); + tcg_gen_movi_tl(tcg_ctx, tcg_ctx->cpu_pc, ctx->pc_succ_insn); + lookup_and_goto_ptr(ctx); + ctx->base.is_jmp = DISAS_NORETURN; + + tcg_temp_free(tcg_ctx, s1); + tcg_temp_free(tcg_ctx, s2); + tcg_temp_free(tcg_ctx, dst); + return true; +} + +static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv s1, s2, dst; + + if (!has_ext(ctx, RVV)) { + return false; + } + + s2 = tcg_const_tl(tcg_ctx, a->zimm); + dst = tcg_temp_new(tcg_ctx); + + /* Using x0 as the rs1 register specifier, encodes an infinite AVL */ + if (a->rs1 == 0) { + /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ + s1 = tcg_const_tl(tcg_ctx, RV_VLEN_MAX); + } else { + s1 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, s1, a->rs1); + } + gen_helper_vsetvl(tcg_ctx, dst, tcg_ctx->cpu_env, s1, s2); + gen_set_gpr(tcg_ctx, a->rd, dst); + gen_goto_tb(ctx, 0, ctx->pc_succ_insn); + ctx->base.is_jmp = DISAS_NORETURN; + + tcg_temp_free(tcg_ctx, s1); + tcg_temp_free(tcg_ctx, s2); + tcg_temp_free(tcg_ctx, dst); + return true; +} + +/* vector register offset from env */ +static uint32_t vreg_ofs(DisasContext *s, int reg) +{ + return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8; +} + +/* check functions */ + +/* + * In cpu_get_tb_cpu_state(), set VILL if RVV was not present. + * So RVV is also be checked in this function. + */ +static bool vext_check_isa_ill(DisasContext *s) +{ + return !s->vill; +} + +/* + * There are two rules check here. + * + * 1. Vector register numbers are multiples of LMUL. (Section 3.2) + * + * 2. For all widening instructions, the destination LMUL value must also be + * a supported LMUL value. (Section 11.2) + */ +static bool vext_check_reg(DisasContext *s, uint32_t reg, bool widen) +{ + /* + * The destination vector register group results are arranged as if both + * SEW and LMUL were at twice their current settings. (Section 11.2). + */ + int legal = widen ? 2 << s->lmul : 1 << s->lmul; + + return !((s->lmul == 0x3 && widen) || (reg % legal)); +} + +/* + * There are two rules check here. + * + * 1. The destination vector register group for a masked vector instruction can + * only overlap the source mask register (v0) when LMUL=1. (Section 5.3) + * + * 2. In widen instructions and some other insturctions, like vslideup.vx, + * there is no need to check whether LMUL=1. + */ +static bool vext_check_overlap_mask(DisasContext *s, uint32_t vd, bool vm, + bool force) +{ + return (vm != 0 || vd != 0) || (!force && (s->lmul == 0)); +} + +/* The LMUL setting must be such that LMUL * NFIELDS <= 8. (Section 7.8) */ +static bool vext_check_nf(DisasContext *s, uint32_t nf) +{ + return (1 << s->lmul) * nf <= 8; +} + +/* + * The destination vector register group cannot overlap a source vector register + * group of a different element width. (Section 11.2) + */ +static inline bool vext_check_overlap_group(int rd, int dlen, int rs, int slen) +{ + return ((rd >= rs + slen) || (rs >= rd + dlen)); +} +/* common translation macro */ +#define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\ +{ \ + if (CHECK(s, a)) { \ + return OP(s, a, SEQ); \ + } \ + return false; \ +} + +/* + *** unit stride load and store + */ +typedef void gen_helper_ldst_us(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, + TCGv_env, TCGv_i32); + +static bool ldst_us_trans(TCGContext *tcg_ctx, uint32_t vd, uint32_t rs1, uint32_t data, + gen_helper_ldst_us *fn, DisasContext *s) +{ + TCGv_ptr dest, mask; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + + /* + * As simd_desc supports at most 256 bytes, and in this implementation, + * the max vector group length is 2048 bytes. So split it into two parts. + * + * The first part is vlen in bytes, encoded in maxsz of simd_desc. + * The second part is lmul, encoded in data of simd_desc. + */ + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[2][7][4] = { + /* masked unit stride load */ + { { gen_helper_vlb_v_b_mask, gen_helper_vlb_v_h_mask, + gen_helper_vlb_v_w_mask, gen_helper_vlb_v_d_mask }, + { NULL, gen_helper_vlh_v_h_mask, + gen_helper_vlh_v_w_mask, gen_helper_vlh_v_d_mask }, + { NULL, NULL, + gen_helper_vlw_v_w_mask, gen_helper_vlw_v_d_mask }, + { gen_helper_vle_v_b_mask, gen_helper_vle_v_h_mask, + gen_helper_vle_v_w_mask, gen_helper_vle_v_d_mask }, + { gen_helper_vlbu_v_b_mask, gen_helper_vlbu_v_h_mask, + gen_helper_vlbu_v_w_mask, gen_helper_vlbu_v_d_mask }, + { NULL, gen_helper_vlhu_v_h_mask, + gen_helper_vlhu_v_w_mask, gen_helper_vlhu_v_d_mask }, + { NULL, NULL, + gen_helper_vlwu_v_w_mask, gen_helper_vlwu_v_d_mask } }, + /* unmasked unit stride load */ + { { gen_helper_vlb_v_b, gen_helper_vlb_v_h, + gen_helper_vlb_v_w, gen_helper_vlb_v_d }, + { NULL, gen_helper_vlh_v_h, + gen_helper_vlh_v_w, gen_helper_vlh_v_d }, + { NULL, NULL, + gen_helper_vlw_v_w, gen_helper_vlw_v_d }, + { gen_helper_vle_v_b, gen_helper_vle_v_h, + gen_helper_vle_v_w, gen_helper_vle_v_d }, + { gen_helper_vlbu_v_b, gen_helper_vlbu_v_h, + gen_helper_vlbu_v_w, gen_helper_vlbu_v_d }, + { NULL, gen_helper_vlhu_v_h, + gen_helper_vlhu_v_w, gen_helper_vlhu_v_d }, + { NULL, NULL, + gen_helper_vlwu_v_w, gen_helper_vlwu_v_d } } + }; + + fn = fns[a->vm][seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_us_trans(tcg_ctx, a->rd, a->rs1, data, fn, s); +} + +static bool ld_us_check(DisasContext *s, arg_r2nfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vlb_v, 0, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlh_v, 1, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlw_v, 2, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vle_v, 3, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlbu_v, 4, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlhu_v, 5, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlwu_v, 6, r2nfvm, ld_us_op, ld_us_check) + +static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[2][4][4] = { + /* masked unit stride load and store */ + { { gen_helper_vsb_v_b_mask, gen_helper_vsb_v_h_mask, + gen_helper_vsb_v_w_mask, gen_helper_vsb_v_d_mask }, + { NULL, gen_helper_vsh_v_h_mask, + gen_helper_vsh_v_w_mask, gen_helper_vsh_v_d_mask }, + { NULL, NULL, + gen_helper_vsw_v_w_mask, gen_helper_vsw_v_d_mask }, + { gen_helper_vse_v_b_mask, gen_helper_vse_v_h_mask, + gen_helper_vse_v_w_mask, gen_helper_vse_v_d_mask } }, + /* unmasked unit stride store */ + { { gen_helper_vsb_v_b, gen_helper_vsb_v_h, + gen_helper_vsb_v_w, gen_helper_vsb_v_d }, + { NULL, gen_helper_vsh_v_h, + gen_helper_vsh_v_w, gen_helper_vsh_v_d }, + { NULL, NULL, + gen_helper_vsw_v_w, gen_helper_vsw_v_d }, + { gen_helper_vse_v_b, gen_helper_vse_v_h, + gen_helper_vse_v_w, gen_helper_vse_v_d } } + }; + + fn = fns[a->vm][seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_us_trans(tcg_ctx, a->rd, a->rs1, data, fn, s); +} + +static bool st_us_check(DisasContext *s, arg_r2nfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vsb_v, 0, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vsh_v, 1, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vsw_v, 2, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vse_v, 3, r2nfvm, st_us_op, st_us_check) + +/* + *** stride load and store + */ +typedef void gen_helper_ldst_stride(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, + TCGv, TCGv_env, TCGv_i32); + +static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, + uint32_t data, gen_helper_ldst_stride *fn, + DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, mask; + TCGv base, stride; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + stride = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + gen_get_gpr(tcg_ctx, stride, rs2); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, stride, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free(tcg_ctx, stride); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride * const fns[7][4] = { + { gen_helper_vlsb_v_b, gen_helper_vlsb_v_h, + gen_helper_vlsb_v_w, gen_helper_vlsb_v_d }, + { NULL, gen_helper_vlsh_v_h, + gen_helper_vlsh_v_w, gen_helper_vlsh_v_d }, + { NULL, NULL, + gen_helper_vlsw_v_w, gen_helper_vlsw_v_d }, + { gen_helper_vlse_v_b, gen_helper_vlse_v_h, + gen_helper_vlse_v_w, gen_helper_vlse_v_d }, + { gen_helper_vlsbu_v_b, gen_helper_vlsbu_v_h, + gen_helper_vlsbu_v_w, gen_helper_vlsbu_v_d }, + { NULL, gen_helper_vlshu_v_h, + gen_helper_vlshu_v_w, gen_helper_vlshu_v_d }, + { NULL, NULL, + gen_helper_vlswu_v_w, gen_helper_vlswu_v_d }, + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool ld_stride_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vlsb_v, 0, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsh_v, 1, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsw_v, 2, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlse_v, 3, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsbu_v, 4, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlshu_v, 5, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlswu_v, 6, rnfvm, ld_stride_op, ld_stride_check) + +static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride * const fns[4][4] = { + /* masked stride store */ + { gen_helper_vssb_v_b, gen_helper_vssb_v_h, + gen_helper_vssb_v_w, gen_helper_vssb_v_d }, + { NULL, gen_helper_vssh_v_h, + gen_helper_vssh_v_w, gen_helper_vssh_v_d }, + { NULL, NULL, + gen_helper_vssw_v_w, gen_helper_vssw_v_d }, + { gen_helper_vsse_v_b, gen_helper_vsse_v_h, + gen_helper_vsse_v_w, gen_helper_vsse_v_d } + }; + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool st_stride_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check) + +/* + *** index load and store + */ +typedef void gen_helper_ldst_index(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, + TCGv_ptr, TCGv_env, TCGv_i32); + +static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_ldst_index *fn, + DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, mask, index; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + index = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, index, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, index, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, index); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_index *fn; + static gen_helper_ldst_index * const fns[7][4] = { + { gen_helper_vlxb_v_b, gen_helper_vlxb_v_h, + gen_helper_vlxb_v_w, gen_helper_vlxb_v_d }, + { NULL, gen_helper_vlxh_v_h, + gen_helper_vlxh_v_w, gen_helper_vlxh_v_d }, + { NULL, NULL, + gen_helper_vlxw_v_w, gen_helper_vlxw_v_d }, + { gen_helper_vlxe_v_b, gen_helper_vlxe_v_h, + gen_helper_vlxe_v_w, gen_helper_vlxe_v_d }, + { gen_helper_vlxbu_v_b, gen_helper_vlxbu_v_h, + gen_helper_vlxbu_v_w, gen_helper_vlxbu_v_d }, + { NULL, gen_helper_vlxhu_v_h, + gen_helper_vlxhu_v_w, gen_helper_vlxhu_v_d }, + { NULL, NULL, + gen_helper_vlxwu_v_w, gen_helper_vlxwu_v_d }, + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +/* + * For vector indexed segment loads, the destination vector register + * groups cannot overlap the source vector register group (specified by + * `vs2`), else an illegal instruction exception is raised. + */ +static bool ld_index_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_nf(s, a->nf) && + ((a->nf == 1) || + vext_check_overlap_group(a->rd, a->nf << s->lmul, + a->rs2, 1 << s->lmul))); +} + +GEN_VEXT_TRANS(vlxb_v, 0, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxh_v, 1, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxw_v, 2, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxe_v, 3, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxbu_v, 4, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxhu_v, 5, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxwu_v, 6, rnfvm, ld_index_op, ld_index_check) + +static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_index *fn; + static gen_helper_ldst_index * const fns[4][4] = { + { gen_helper_vsxb_v_b, gen_helper_vsxb_v_h, + gen_helper_vsxb_v_w, gen_helper_vsxb_v_d }, + { NULL, gen_helper_vsxh_v_h, + gen_helper_vsxh_v_w, gen_helper_vsxh_v_d }, + { NULL, NULL, + gen_helper_vsxw_v_w, gen_helper_vsxw_v_d }, + { gen_helper_vsxe_v_b, gen_helper_vsxe_v_h, + gen_helper_vsxe_v_w, gen_helper_vsxe_v_d } + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool st_index_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check) + +/* + *** unit stride fault-only-first load + */ +static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, + gen_helper_ldst_us *fn, DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, mask; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[7][4] = { + { gen_helper_vlbff_v_b, gen_helper_vlbff_v_h, + gen_helper_vlbff_v_w, gen_helper_vlbff_v_d }, + { NULL, gen_helper_vlhff_v_h, + gen_helper_vlhff_v_w, gen_helper_vlhff_v_d }, + { NULL, NULL, + gen_helper_vlwff_v_w, gen_helper_vlwff_v_d }, + { gen_helper_vleff_v_b, gen_helper_vleff_v_h, + gen_helper_vleff_v_w, gen_helper_vleff_v_d }, + { gen_helper_vlbuff_v_b, gen_helper_vlbuff_v_h, + gen_helper_vlbuff_v_w, gen_helper_vlbuff_v_d }, + { NULL, gen_helper_vlhuff_v_h, + gen_helper_vlhuff_v_w, gen_helper_vlhuff_v_d }, + { NULL, NULL, + gen_helper_vlwuff_v_w, gen_helper_vlwuff_v_d } + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldff_trans(a->rd, a->rs1, data, fn, s); +} + +GEN_VEXT_TRANS(vlbff_v, 0, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlhff_v, 1, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlwff_v, 2, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check) + +/* + *** vector atomic operation + */ +typedef void gen_helper_amo(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_amo *fn, DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, mask, index; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + index = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, index, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, index, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, index); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + uint32_t data = 0; + gen_helper_amo *fn; + static gen_helper_amo *const fnsw[9] = { + /* no atomic operation */ + gen_helper_vamoswapw_v_w, + gen_helper_vamoaddw_v_w, + gen_helper_vamoxorw_v_w, + gen_helper_vamoandw_v_w, + gen_helper_vamoorw_v_w, + gen_helper_vamominw_v_w, + gen_helper_vamomaxw_v_w, + gen_helper_vamominuw_v_w, + gen_helper_vamomaxuw_v_w + }; +#ifdef TARGET_RISCV64 + static gen_helper_amo *const fnsd[18] = { + gen_helper_vamoswapw_v_d, + gen_helper_vamoaddw_v_d, + gen_helper_vamoxorw_v_d, + gen_helper_vamoandw_v_d, + gen_helper_vamoorw_v_d, + gen_helper_vamominw_v_d, + gen_helper_vamomaxw_v_d, + gen_helper_vamominuw_v_d, + gen_helper_vamomaxuw_v_d, + gen_helper_vamoswapd_v_d, + gen_helper_vamoaddd_v_d, + gen_helper_vamoxord_v_d, + gen_helper_vamoandd_v_d, + gen_helper_vamoord_v_d, + gen_helper_vamomind_v_d, + gen_helper_vamomaxd_v_d, + gen_helper_vamominud_v_d, + gen_helper_vamomaxud_v_d + }; +#endif + + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_exit_atomic(tcg_ctx, tcg_ctx->cpu_env); + s->base.is_jmp = DISAS_NORETURN; + return true; + } else { + if (s->sew == 3) { +#ifdef TARGET_RISCV64 + fn = fnsd[seq]; +#else + /* Check done in amo_check(). */ + g_assert_not_reached(); +#endif + } else { + assert(seq < ARRAY_SIZE(fnsw)); + fn = fnsw[seq]; + } + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, WD, a->wd, data); + return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} +/* + * There are two rules check here. + * + * 1. SEW must be at least as wide as the AMO memory element size. + * + * 2. If SEW is greater than XLEN, an illegal instruction exception is raised. + */ +static bool amo_check(DisasContext *s, arg_rwdvm* a) +{ + return (!s->vill && has_ext(s, RVA) && + (!a->wd || vext_check_overlap_mask(s, a->rd, a->vm, false)) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + ((1 << s->sew) <= sizeof(target_ulong)) && + ((1 << s->sew) >= 4)); +} + +GEN_VEXT_TRANS(vamoswapw_v, 0, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoaddw_v, 1, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoxorw_v, 2, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoandw_v, 3, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoorw_v, 4, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominw_v, 5, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxw_v, 6, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominuw_v, 7, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxuw_v, 8, rwdvm, amo_op, amo_check) +#ifdef TARGET_RISCV64 +GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) +#endif + +/* + *** Vector Integer Arithmetic Instructions + */ +#define MAXSZ(s) (s->vlen >> (3 - s->lmul)) + +static bool opivv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false)); +} + +typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +static inline bool +do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, + gen_helper_gvec_4_ptr *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGLabel *over = gen_new_label(tcg_ctx); + if (!opivv_check(s, a)) { + return false; + } + + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + if (a->vm && s->vl_eq_vlmax) { + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), + MAXSZ(s), MAXSZ(s)); + } else { + uint32_t data = 0; + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn); + } + gen_set_label(tcg_ctx, over); + return true; +} + +/* OPIVV with GVEC IR */ +#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVV_GVEC_TRANS(vadd_vv, add) +GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) + +typedef void gen_helper_opivx(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, + gen_helper_opivx *fn, DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + uint32_t data = 0; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + src1 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, src1, rs1); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, src1, src2, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free(tcg_ctx, src1); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool opivx_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false)); +} + +typedef void GVecGen2sFn(TCGContext *, unsigned, uint32_t, uint32_t, TCGv_i64, + uint32_t, uint32_t); + +static inline bool +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, + gen_helper_opivx *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + TCGv_i64 src1 = tcg_temp_new_i64(tcg_ctx); + TCGv tmp = tcg_temp_new(tcg_ctx); + + gen_get_gpr(tcg_ctx, tmp, a->rs1); + tcg_gen_ext_tl_i64(tcg_ctx, src1, tmp); + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + src1, MAXSZ(s), MAXSZ(s)); + + tcg_temp_free_i64(tcg_ctx, src1); + tcg_temp_free(tcg_ctx, tmp); + return true; + } + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); +} + +/* OPIVX with GVEC IR */ +#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) +GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) + +static void gen_vec_rsub8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_vec_sub8_i64(tcg_ctx, d, b, a); +} + +static void gen_vec_rsub16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_vec_sub16_i64(tcg_ctx, d, b, a); +} + +static void gen_rsub_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_sub_i32(tcg_ctx, ret, arg2, arg1); +} + +static void gen_rsub_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_sub_i64(tcg_ctx, ret, arg2, arg1); +} + +static void gen_rsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_sub_vec(tcg_ctx, vece, r, b, a); +} + +static void tcg_gen_gvec_rsubs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; + static const GVecGen2s rsub_op[4] = { + { .fni8 = gen_vec_rsub8_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs8, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_vec_rsub16_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs16, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_rsub_i32, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs32, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_rsub_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs64, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_2s(tcg_ctx, dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); +} + +GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs) + +static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, + gen_helper_opivx *fn, DisasContext *s, int zx) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + uint32_t data = 0; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + if (zx) { + src1 = tcg_const_tl(tcg_ctx, imm); + } else { + src1 = tcg_const_tl(tcg_ctx, sextract64(imm, 0, 5)); + } + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, src1, src2, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free(tcg_ctx, src1); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t, + uint32_t, uint32_t); + +static inline bool +do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, + gen_helper_opivx *fn, int zx) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + if (zx) { + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); + } else { + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); + } + } else { + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx); + } + return true; +} + +/* OPIVI with GVEC IR */ +#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ + fns[s->sew], ZX); \ +} + +GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi) + +static void tcg_gen_gvec_rsubi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz) +{ + TCGv_i64 tmp = tcg_const_i64(tcg_ctx, c); + tcg_gen_gvec_rsubs(tcg_ctx, vece, dofs, aofs, tmp, oprsz, maxsz); + tcg_temp_free_i64(tcg_ctx, tmp); +} + +GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi) + +/* Vector Widening Integer Add/Subtract */ + +/* OPIVV with WIDEN */ +static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, + gen_helper_gvec_4_ptr *fn, + bool (*checkfn)(DisasContext *, arg_rmrr *)) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (checkfn(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), + tcg_ctx->cpu_env, 0, s->vlen / 8, + data, fn); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opivv_widen(s, a, fns[s->sew], CHECK); \ +} + +GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check) + +/* OPIVX with WIDEN */ +static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opivx_widen(DisasContext *s, arg_rmrr *a, + gen_helper_opivx *fn) +{ + if (opivx_widen_check(s, a)) { + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); + } + return false; +} + +#define GEN_OPIVX_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opivx_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIVX_WIDEN_TRANS(vwaddu_vx) +GEN_OPIVX_WIDEN_TRANS(vwadd_vx) +GEN_OPIVX_WIDEN_TRANS(vwsubu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsub_vx) + +/* WIDEN OPIVV with WIDEN */ +static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, + gen_helper_gvec_4_ptr *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (opiwv_widen_check(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +#define GEN_OPIWV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opiwv_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIWV_WIDEN_TRANS(vwaddu_wv) +GEN_OPIWV_WIDEN_TRANS(vwadd_wv) +GEN_OPIWV_WIDEN_TRANS(vwsubu_wv) +GEN_OPIWV_WIDEN_TRANS(vwsub_wv) + +/* WIDEN OPIVX with WIDEN */ +static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a, + gen_helper_opivx *fn) +{ + if (opiwx_widen_check(s, a)) { + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); + } + return false; +} + +#define GEN_OPIWX_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opiwx_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIWX_WIDEN_TRANS(vwaddu_wx) +GEN_OPIWX_WIDEN_TRANS(vwadd_wx) +GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) +GEN_OPIWX_WIDEN_TRANS(vwsub_wx) + +/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ +/* OPIVV without GVEC IR */ +#define GEN_OPIVV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +/* + * For vadc and vsbc, an illegal instruction exception is raised if the + * destination vector register is v0 and LMUL > 1. (Section 12.3) + */ +static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + ((a->rd != 0) || (s->lmul == 0))); +} + +GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check) +GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check) + +/* + * For vmadc and vmsbc, an illegal instruction exception is raised if the + * destination vector register overlaps a source vector register group. + */ +static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)); +} + +GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check) +GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check) + +static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + ((a->rd != 0) || (s->lmul == 0))); +} + +/* OPIVX without GVEC IR */ +#define GEN_OPIVX_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ +} + +GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check) +GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check) + +static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)); +} + +GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check) +GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check) + +/* OPIVI without GVEC IR */ +#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ + fns[s->sew], s, ZX); \ + } \ + return false; \ +} + +GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check) +GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check) + +/* Vector Bitwise Logical Instructions */ +GEN_OPIVV_GVEC_TRANS(vand_vv, and) +GEN_OPIVV_GVEC_TRANS(vor_vv, or) +GEN_OPIVV_GVEC_TRANS(vxor_vv, xor) +GEN_OPIVX_GVEC_TRANS(vand_vx, ands) +GEN_OPIVX_GVEC_TRANS(vor_vx, ors) +GEN_OPIVX_GVEC_TRANS(vxor_vx, xors) +GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi) +GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori) +GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori) + +/* Vector Single-Width Bit Shift Instructions */ +GEN_OPIVV_GVEC_TRANS(vsll_vv, shlv) +GEN_OPIVV_GVEC_TRANS(vsrl_vv, shrv) +GEN_OPIVV_GVEC_TRANS(vsra_vv, sarv) + +typedef void GVecGen2sFn32(TCGContext *, unsigned, uint32_t, uint32_t, TCGv_i32, + uint32_t, uint32_t); + +static inline bool +do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, + gen_helper_opivx *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + TCGv_i32 src1 = tcg_temp_new_i32(tcg_ctx); + TCGv tmp = tcg_temp_new(tcg_ctx); + + gen_get_gpr(tcg_ctx, tmp, a->rs1); + tcg_gen_trunc_tl_i32(tcg_ctx, src1, tmp); + tcg_gen_extract_i32(tcg_ctx, src1, src1, 0, s->sew + 3); + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + src1, MAXSZ(s), MAXSZ(s)); + + tcg_temp_free_i32(tcg_ctx, src1); + tcg_temp_free(tcg_ctx, tmp); + return true; + } + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); +} + +#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx, shls) +GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx, shrs) +GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars) + +GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli) +GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri) +GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari) + +/* Vector Narrowing Integer Right Shift Instructions */ +static bool opivv_narrow_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +/* OPIVV with NARROW */ +#define GEN_OPIVV_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (opivv_narrow_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} +GEN_OPIVV_NARROW_TRANS(vnsra_vv) +GEN_OPIVV_NARROW_TRANS(vnsrl_vv) + +static bool opivx_narrow_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +/* OPIVX with NARROW */ +#define GEN_OPIVX_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opivx_narrow_check(s, a)) { \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ +} + +GEN_OPIVX_NARROW_TRANS(vnsra_vx) +GEN_OPIVX_NARROW_TRANS(vnsrl_vx) + +/* OPIVI with NARROW */ +#define GEN_OPIVI_NARROW_TRANS(NAME, ZX, OPIVX) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opivx_narrow_check(s, a)) { \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##OPIVX##_b, \ + gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ + fns[s->sew], s, ZX); \ + } \ + return false; \ +} + +GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx) +GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx) + +/* Vector Integer Comparison Instructions */ +/* + * For all comparison instructions, an illegal instruction exception is raised + * if the destination vector register overlaps a source vector register group + * and LMUL > 1. + */ +static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) || + (s->lmul == 0))); +} +GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check) + +static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) || + (s->lmul == 0))); +} + +GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check) + +GEN_OPIVI_TRANS(vmseq_vi, 0, vmseq_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsne_vi, 0, vmsne_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check) + +/* Vector Integer Min/Max Instructions */ +GEN_OPIVV_GVEC_TRANS(vminu_vv, umin) +GEN_OPIVV_GVEC_TRANS(vmin_vv, smin) +GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax) +GEN_OPIVV_GVEC_TRANS(vmax_vv, smax) +GEN_OPIVX_TRANS(vminu_vx, opivx_check) +GEN_OPIVX_TRANS(vmin_vx, opivx_check) +GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) +GEN_OPIVX_TRANS(vmax_vx, opivx_check) + +/* Vector Single-Width Integer Multiply Instructions */ +GEN_OPIVV_GVEC_TRANS(vmul_vv, mul) +GEN_OPIVV_TRANS(vmulh_vv, opivv_check) +GEN_OPIVV_TRANS(vmulhu_vv, opivv_check) +GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check) +GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) +GEN_OPIVX_TRANS(vmulh_vx, opivx_check) +GEN_OPIVX_TRANS(vmulhu_vx, opivx_check) +GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check) + +/* Vector Integer Divide Instructions */ +GEN_OPIVV_TRANS(vdivu_vv, opivv_check) +GEN_OPIVV_TRANS(vdiv_vv, opivv_check) +GEN_OPIVV_TRANS(vremu_vv, opivv_check) +GEN_OPIVV_TRANS(vrem_vv, opivv_check) +GEN_OPIVX_TRANS(vdivu_vx, opivx_check) +GEN_OPIVX_TRANS(vdiv_vx, opivx_check) +GEN_OPIVX_TRANS(vremu_vx, opivx_check) +GEN_OPIVX_TRANS(vrem_vx, opivx_check) + +/* Vector Widening Integer Multiply Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmul_vx) +GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) + +/* Vector Single-Width Integer Multiply-Add Instructions */ +GEN_OPIVV_TRANS(vmacc_vv, opivv_check) +GEN_OPIVV_TRANS(vnmsac_vv, opivv_check) +GEN_OPIVV_TRANS(vmadd_vv, opivv_check) +GEN_OPIVV_TRANS(vnmsub_vv, opivv_check) +GEN_OPIVX_TRANS(vmacc_vx, opivx_check) +GEN_OPIVX_TRANS(vnmsac_vx, opivx_check) +GEN_OPIVX_TRANS(vmadd_vx, opivx_check) +GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) + +/* Vector Widening Integer Multiply-Add Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) + +/* Vector Integer Merge and Move Instructions */ +static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs1, false)) { + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_mov(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rs1), + MAXSZ(s), MAXSZ(s)); + } else { + uint32_t data; + FIELD_DP32(0, VDATA, LMUL, s->lmul, data); + static gen_helper_gvec_2_ptr * const fns[4] = { + gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, + gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, + }; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + tcg_gen_gvec_2_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(tcg_ctx, over); + } + return true; + } + return false; +} + +typedef void gen_helper_vmv_vx(TCGContext *, TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32); +static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false)) { + + TCGv s1; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + s1 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, s1, a->rs1); + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_tl(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), s1); + } else { + TCGv_i32 desc ; + TCGv_i64 s1_i64 = tcg_temp_new_i64(tcg_ctx); + TCGv_ptr dest = tcg_temp_new_ptr(tcg_ctx); + uint32_t data; + FIELD_DP32(0, VDATA, LMUL, s->lmul, data); + static gen_helper_vmv_vx * const fns[4] = { + gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, + }; + + tcg_gen_ext_tl_i64(tcg_ctx, s1_i64, s1); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew](tcg_ctx, dest, s1_i64, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_i32(tcg_ctx, desc); + tcg_temp_free_i64(tcg_ctx, s1_i64); + } + + tcg_temp_free(tcg_ctx, s1); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false)) { + + int64_t simm = sextract64(a->rs1, 0, 5); + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_imm(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), simm); + } else { + TCGv_i32 desc; + TCGv_i64 s1; + TCGv_ptr dest; + uint32_t data; + FIELD_DP32(0, VDATA, LMUL, s->lmul, data); + static gen_helper_vmv_vx * const fns[4] = { + gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, + }; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + s1 = tcg_const_i64(tcg_ctx, simm); + dest = tcg_temp_new_ptr(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew](tcg_ctx, dest, s1, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_i32(tcg_ctx, desc); + tcg_temp_free_i64(tcg_ctx, s1); + gen_set_label(tcg_ctx, over); + } + return true; + } + return false; +} + +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check) +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check) +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check) + +/* + *** Vector Fixed-Point Arithmetic Instructions + */ + +/* Vector Single-Width Saturating Add and Subtract */ +GEN_OPIVV_TRANS(vsaddu_vv, opivv_check) +GEN_OPIVV_TRANS(vsadd_vv, opivv_check) +GEN_OPIVV_TRANS(vssubu_vv, opivv_check) +GEN_OPIVV_TRANS(vssub_vv, opivv_check) +GEN_OPIVX_TRANS(vsaddu_vx, opivx_check) +GEN_OPIVX_TRANS(vsadd_vx, opivx_check) +GEN_OPIVX_TRANS(vssubu_vx, opivx_check) +GEN_OPIVX_TRANS(vssub_vx, opivx_check) +GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check) +GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check) + +/* Vector Single-Width Averaging Add and Subtract */ +GEN_OPIVV_TRANS(vaadd_vv, opivv_check) +GEN_OPIVV_TRANS(vasub_vv, opivv_check) +GEN_OPIVX_TRANS(vaadd_vx, opivx_check) +GEN_OPIVX_TRANS(vasub_vx, opivx_check) +GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) + +/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ +GEN_OPIVV_TRANS(vsmul_vv, opivv_check) +GEN_OPIVX_TRANS(vsmul_vx, opivx_check) + +/* Vector Widening Saturating Scaled Multiply-Add */ +GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx) + +/* Vector Single-Width Scaling Shift Instructions */ +GEN_OPIVV_TRANS(vssrl_vv, opivv_check) +GEN_OPIVV_TRANS(vssra_vv, opivv_check) +GEN_OPIVX_TRANS(vssrl_vx, opivx_check) +GEN_OPIVX_TRANS(vssra_vx, opivx_check) +GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check) +GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check) + +/* Vector Narrowing Fixed-Point Clip Instructions */ +GEN_OPIVV_NARROW_TRANS(vnclipu_vv) +GEN_OPIVV_NARROW_TRANS(vnclip_vv) +GEN_OPIVX_NARROW_TRANS(vnclipu_vx) +GEN_OPIVX_NARROW_TRANS(vnclip_vx) +GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx) +GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx) + +/* + *** Vector Float Point Arithmetic Instructions + */ +/* Vector Single-Width Floating-Point Add/Subtract Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised. + */ +static bool opfvv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + (s->sew != 0)); +} + +/* OPFVV without GVEC IR */ +#define GEN_OPFVV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} +GEN_OPFVV_TRANS(vfadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsub_vv, opfvv_check) + +typedef void gen_helper_opfvf(TCGContext* tcg_ctx, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_opfvf *fn, DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, src2, mask; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, tcg_ctx->cpu_fpr[rs1], src2, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool opfvf_check(DisasContext *s, arg_rmrr *a) +{ +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0)); +} + +/* OPFVF without GVEC IR */ +#define GEN_OPFVF_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + gen_set_rm(s, 7); \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) +GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) + +/* Vector Widening Floating-Point Add/Subtract Instructions */ +static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* OPFVV with WIDEN */ +#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check) + +static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* OPFVF with WIDEN */ +#define GEN_OPFVF_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfvf_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + gen_set_rm(s, 7); \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFVF_WIDEN_TRANS(vfwadd_vf) +GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) + +static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* WIDEN OPFVV with WIDEN */ +#define GEN_OPFWV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (opfwv_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFWV_WIDEN_TRANS(vfwadd_wv) +GEN_OPFWV_WIDEN_TRANS(vfwsub_wv) + +static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* WIDEN OPFVF with WIDEN */ +#define GEN_OPFWF_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfwf_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + gen_set_rm(s, 7); \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) +GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) + +/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ +GEN_OPFVV_TRANS(vfmul_vv, opfvv_check) +GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) +GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) +GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) + +/* Vector Widening Floating-Point Multiply */ +GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) + +/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ +GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) + +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ +GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) + +/* Vector Floating-Point Square-Root Instruction */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0)); +} + +#define GEN_OPFV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_TRANS(vfsqrt_v, opfv_check) + +/* Vector Floating-Point MIN/MAX Instructions */ +GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) + +/* Vector Floating-Point Sign-Injection Instructions */ +GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) +GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) + +/* Vector Floating-Point Compare Instructions */ +static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + (s->sew != 0) && + ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) || + (s->lmul == 0))); +} + +GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmford_vv, opfvv_cmp_check) + +static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0) && + (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) || + (s->lmul == 0))); +} + +GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check) + +/* Vector Floating-Point Classify Instruction */ +GEN_OPFV_TRANS(vfclass_v, opfv_check) + +/* Vector Floating-Point Merge Instruction */ +GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check) + +static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + (s->sew != 0)) { + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_i64(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), tcg_ctx->cpu_fpr[a->rs1]); + } else { + TCGv_ptr dest; + TCGv_i32 desc; + uint32_t data; + FIELD_DP32(0, VDATA, LMUL, s->lmul, data); + static gen_helper_vmv_vx * const fns[3] = { + gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, + gen_helper_vmv_v_x_d, + }; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew - 1](tcg_ctx, dest, tcg_ctx->cpu_fpr[a->rs1], tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + } + return true; + } + return false; +} + +/* Single-Width Floating-Point/Integer Type-Convert Instructions */ +GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check) + +/* Widening Floating-Point/Integer Type-Convert Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_widen_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +#define GEN_OPFV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (opfv_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v) + +/* Narrowing Floating-Point/Integer Type-Convert Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_narrow_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +#define GEN_OPFV_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (opfv_narrow_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_NARROW_TRANS(vfncvt_xu_f_v) +GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v) + +/* + *** Vector Reduction Operations + */ +/* Vector Single-Width Integer Reduction Instructions */ +static bool reduction_check(DisasContext *s, arg_rmrr *a) +{ + return vext_check_isa_ill(s) && vext_check_reg(s, a->rs2, false); +} + +GEN_OPIVV_TRANS(vredsum_vs, reduction_check) +GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check) +GEN_OPIVV_TRANS(vredmax_vs, reduction_check) +GEN_OPIVV_TRANS(vredminu_vs, reduction_check) +GEN_OPIVV_TRANS(vredmin_vs, reduction_check) +GEN_OPIVV_TRANS(vredand_vs, reduction_check) +GEN_OPIVV_TRANS(vredor_vs, reduction_check) +GEN_OPIVV_TRANS(vredxor_vs, reduction_check) + +/* Vector Widening Integer Reduction Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check) +GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check) + +/* Vector Single-Width Floating-Point Reduction Instructions */ +GEN_OPFVV_TRANS(vfredsum_vs, reduction_check) +GEN_OPFVV_TRANS(vfredmax_vs, reduction_check) +GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) + +/* Vector Widening Floating-Point Reduction Instructions */ +GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check) + +/* + *** Vector Mask Operations + */ + +/* Vector Mask-Register Logical Instructions */ +#define GEN_MM_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_r *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (vext_check_isa_ill(s)) { \ + uint32_t data = 0; \ + gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fn); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_MM_TRANS(vmand_mm) +GEN_MM_TRANS(vmnand_mm) +GEN_MM_TRANS(vmandnot_mm) +GEN_MM_TRANS(vmxor_mm) +GEN_MM_TRANS(vmor_mm) +GEN_MM_TRANS(vmnor_mm) +GEN_MM_TRANS(vmornot_mm) +GEN_MM_TRANS(vmxnor_mm) + +/* Vector mask population count vmpopc */ +static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s)) { + TCGv_ptr src2, mask; + TCGv dst; + TCGv_i32 desc; + uint32_t data = 0; + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + dst = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, a->rs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + gen_helper_vmpopc_m(tcg_ctx, dst, mask, src2, tcg_ctx->cpu_env, desc); + gen_set_gpr(tcg_ctx, a->rd, dst); + + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free(tcg_ctx, dst); + tcg_temp_free_i32(tcg_ctx, desc); + return true; + } + return false; +} + +/* vmfirst find-first-set mask bit */ +static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s)) { + TCGv_ptr src2, mask; + TCGv dst; + TCGv_i32 desc; + uint32_t data = 0; + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + dst = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, a->rs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + gen_helper_vmfirst_m(tcg_ctx, dst, mask, src2, tcg_ctx->cpu_env, desc); + gen_set_gpr(tcg_ctx, a->rd, dst); + + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free(tcg_ctx, dst); + tcg_temp_free_i32(tcg_ctx, desc); + return true; + } + return false; +} + +/* vmsbf.m set-before-first mask bit */ +/* vmsif.m set-includ-first mask bit */ +/* vmsof.m set-only-first mask bit */ +#define GEN_M_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (vext_check_isa_ill(s)) { \ + uint32_t data = 0; \ + gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), \ + vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_M_TRANS(vmsbf_m) +GEN_M_TRANS(vmsif_m) +GEN_M_TRANS(vmsof_m) + +/* Vector Iota Instruction */ +static bool trans_viota_m(DisasContext *s, arg_viota_m *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, 1) && + (a->vm != 0 || a->rd != 0)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + static gen_helper_gvec_3_ptr * const fns[4] = { + gen_helper_viota_m_b, gen_helper_viota_m_h, + gen_helper_viota_m_w, gen_helper_viota_m_d, + }; + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, + s->vlen / 8, data, fns[s->sew]); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +/* Vector Element Index Instruction */ +static bool trans_vid_v(DisasContext *s, arg_vid_v *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_overlap_mask(s, a->rd, a->vm, false)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + static gen_helper_gvec_2_ptr * const fns[4] = { + gen_helper_vid_v_b, gen_helper_vid_v_h, + gen_helper_vid_v_w, gen_helper_vid_v_d, + }; + tcg_gen_gvec_2_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +/* + *** Vector Permutation Instructions + */ + +/* Integer Extract Instruction */ + +static void load_element(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_ptr base, + int ofs, int sew) +{ + switch (sew) { + case MO_8: + tcg_gen_ld8u_i64(tcg_ctx, dest, base, ofs); + break; + case MO_16: + tcg_gen_ld16u_i64(tcg_ctx, dest, base, ofs); + break; + case MO_32: + tcg_gen_ld32u_i64(tcg_ctx, dest, base, ofs); + break; + case MO_64: + tcg_gen_ld_i64(tcg_ctx, dest, base, ofs); + break; + default: + g_assert_not_reached(); + break; + } +} + +/* offset of the idx element with base regsiter r */ +static uint32_t endian_ofs(DisasContext *s, int r, int idx) +{ +#ifdef HOST_WORDS_BIGENDIAN + return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); +#else + return vreg_ofs(s, r) + (idx << s->sew); +#endif +} + +/* adjust the index according to the endian */ +static void endian_adjust(TCGv_i32 ofs, int sew) +{ +#ifdef HOST_WORDS_BIGENDIAN + tcg_gen_xori_i32(ofs, ofs, 7 >> sew); +#endif +} + +/* Load idx >= VLMAX ? 0 : vreg[idx] */ +static void vec_element_loadx(DisasContext *s, TCGv_i64 dest, + int vreg, TCGv idx, int vlmax) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i32 ofs = tcg_temp_new_i32(tcg_ctx); + TCGv_ptr base = tcg_temp_new_ptr(tcg_ctx); + TCGv_i64 t_idx = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 t_vlmax, t_zero; + + /* + * Mask the index to the length so that we do + * not produce an out-of-range load. + */ + tcg_gen_trunc_tl_i32(tcg_ctx, ofs, idx); + tcg_gen_andi_i32(tcg_ctx, ofs, ofs, vlmax - 1); + + /* Convert the index to an offset. */ + endian_adjust(ofs, s->sew); + tcg_gen_shli_i32(tcg_ctx, ofs, ofs, s->sew); + + /* Convert the index to a pointer. */ + tcg_gen_ext_i32_ptr(tcg_ctx, base, ofs); + tcg_gen_add_ptr(tcg_ctx, base, base, tcg_ctx->cpu_env); + + /* Perform the load. */ + load_element(tcg_ctx, dest, base, + vreg_ofs(s, vreg), s->sew); + tcg_temp_free_ptr(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, ofs); + + /* Flush out-of-range indexing to zero. */ + t_vlmax = tcg_const_i64(tcg_ctx, vlmax); + t_zero = tcg_const_i64(tcg_ctx, 0); + tcg_gen_extu_tl_i64(tcg_ctx, t_idx, idx); + + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LTU, dest, t_idx, + t_vlmax, dest, t_zero); + + tcg_temp_free_i64(tcg_ctx, t_vlmax); + tcg_temp_free_i64(tcg_ctx, t_zero); + tcg_temp_free_i64(tcg_ctx, t_idx); +} + +static void vec_element_loadi(DisasContext *s, TCGv_i64 dest, + int vreg, int idx) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + load_element(tcg_ctx, dest, tcg_ctx->cpu_env, endian_ofs(s, vreg, idx), s->sew); +} + +static bool trans_vext_x_v(DisasContext *s, arg_r *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx); + TCGv dest = tcg_temp_new(tcg_ctx); + + if (a->rs1 == 0) { + /* Special case vmv.x.s rd, vs2. */ + vec_element_loadi(s, tmp, a->rs2, 0); + } else { + /* This instruction ignores LMUL and vector register groups */ + int vlmax = s->vlen >> (3 + s->sew); + vec_element_loadx(s, tmp, a->rs2, tcg_ctx->cpu_gpr[a->rs1], vlmax); + } + tcg_gen_trunc_i64_tl(tcg_ctx, dest, tmp); + gen_set_gpr(tcg_ctx, a->rd, dest); + + tcg_temp_free(tcg_ctx, dest); + tcg_temp_free_i64(tcg_ctx, tmp); + return true; +} + +/* Integer Scalar Move Instruction */ + +static void store_element(TCGContext *tcg_ctx, TCGv_i64 val, TCGv_ptr base, + int ofs, int sew) +{ + switch (sew) { + case MO_8: + tcg_gen_st8_i64(tcg_ctx, val, base, ofs); + break; + case MO_16: + tcg_gen_st16_i64(tcg_ctx, val, base, ofs); + break; + case MO_32: + tcg_gen_st32_i64(tcg_ctx, val, base, ofs); + break; + case MO_64: + tcg_gen_st_i64(tcg_ctx, val, base, ofs); + break; + default: + g_assert_not_reached(); + break; + } +} + +/* + * Store vreg[idx] = val. + * The index must be in range of VLMAX. + */ +static void vec_element_storei(DisasContext *s, int vreg, + int idx, TCGv_i64 val) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + store_element(tcg_ctx, val, tcg_ctx->cpu_env, endian_ofs(s, vreg, idx), s->sew); +} + +/* vmv.s.x vd, rs1 # vd[0] = rs1 */ +static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s)) { + /* This instruction ignores LMUL and vector register groups */ + int maxsz = s->vlen >> 3; + TCGv_i64 t1; + TCGLabel *over = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd), maxsz, maxsz, 0); + if (a->rs1 == 0) { + goto done; + } + + t1 = tcg_temp_new_i64(tcg_ctx); + tcg_gen_extu_tl_i64(tcg_ctx, t1, tcg_ctx->cpu_gpr[a->rs1]); + vec_element_storei(s, a->rd, 0, t1); + tcg_temp_free_i64(tcg_ctx, t1); + done: + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +/* Floating-Point Scalar Move Instructions */ +static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!s->vill && has_ext(s, RVF) && + (s->mstatus_fs != 0) && (s->sew != 0)) { + unsigned int len = 8 << s->sew; + + vec_element_loadi(s, tcg_ctx->cpu_fpr[a->rd], a->rs2, 0); + if (len < 64) { + tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], tcg_ctx->cpu_fpr[a->rd], + MAKE_64BIT_MASK(len, 64 - len)); + } + + mark_fs_dirty(s); + return true; + } + return false; +} + +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */ +static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!s->vill && has_ext(s, RVF) && (s->sew != 0)) { + TCGv_i64 t1; + /* The instructions ignore LMUL and vector register group. */ + uint32_t vlmax = s->vlen >> 3; + + /* if vl == 0, skip vector register write back */ + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + /* zeroed all elements */ + tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd), vlmax, vlmax, 0); + + /* NaN-box f[rs1] as necessary for SEW */ + t1 = tcg_temp_new_i64(tcg_ctx); + if (s->sew == MO_64 && !has_ext(s, RVD)) { + tcg_gen_ori_i64(tcg_ctx, t1, tcg_ctx->cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32)); + } else { + tcg_gen_mov_i64(tcg_ctx, t1, tcg_ctx->cpu_fpr[a->rs1]); + } + vec_element_storei(s, a->rd, 0, t1); + tcg_temp_free_i64(tcg_ctx, t1); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +/* Vector Slide Instructions */ +static bool slideup_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2)); +} + +GEN_OPIVX_TRANS(vslideup_vx, slideup_check) +GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) +GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check) + +GEN_OPIVX_TRANS(vslidedown_vx, opivx_check) +GEN_OPIVX_TRANS(vslide1down_vx, opivx_check) +GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check) + +/* Vector Register Gather Instruction */ +static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2) && (a->rd != a->rs1)); +} + +GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check) + +static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2)); +} + +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ +static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!vrgather_vx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + int vlmax = s->vlen / s->mlen; + TCGv_i64 dest = tcg_temp_new_i64(tcg_ctx); + + if (a->rs1 == 0) { + vec_element_loadi(s, dest, a->rs2, 0); + } else { + vec_element_loadx(s, dest, a->rs2, tcg_ctx->cpu_gpr[a->rs1], vlmax); + } + + tcg_gen_gvec_dup_i64(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), dest); + tcg_temp_free_i64(tcg_ctx, dest); + } else { + static gen_helper_opivx * const fns[4] = { + gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, + gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d + }; + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); + } + return true; +} + +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */ +static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!vrgather_vx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + if (a->rs1 >= s->vlen / s->mlen) { + tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), 0); + } else { + tcg_gen_gvec_dup_mem(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + endian_ofs(s, a->rs2, a->rs1), + MAXSZ(s), MAXSZ(s)); + } + } else { + static gen_helper_opivx * const fns[4] = { + gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, + gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d + }; + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, 1); + } + return true; +} + +/* Vector Compress Instruction */ +static bool vcompress_vm_check(DisasContext *s, arg_r *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs1, 1) && + (a->rd != a->rs2)); +} + +static bool trans_vcompress_vm(DisasContext *s, arg_r *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vcompress_vm_check(s, a)) { + uint32_t data = 0; + static gen_helper_gvec_4_ptr * const fns[4] = { + gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h, + gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d, + }; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} diff --git a/qemu/target/riscv/internals.h b/qemu/target/riscv/internals.h new file mode 100644 index 0000000000..37d33820ad --- /dev/null +++ b/qemu/target/riscv/internals.h @@ -0,0 +1,41 @@ +/* + * QEMU RISC-V CPU -- internal functions and types + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef RISCV_CPU_INTERNALS_H +#define RISCV_CPU_INTERNALS_H + +#include "hw/registerfields.h" + +/* share data between vector helpers and decode code */ +FIELD(VDATA, MLEN, 0, 8) +FIELD(VDATA, VM, 8, 1) +FIELD(VDATA, LMUL, 9, 2) +FIELD(VDATA, NF, 11, 4) +FIELD(VDATA, WD, 11, 1) + +/* float point classify helpers */ +target_ulong fclass_h(uint64_t frs1); +target_ulong fclass_s(uint64_t frs1); +target_ulong fclass_d(uint64_t frs1); + +#define SEW8 0 +#define SEW16 1 +#define SEW32 2 +#define SEW64 3 + +#endif diff --git a/qemu/target/riscv/op_helper.c b/qemu/target/riscv/op_helper.c index 5afb2ce881..c5de354a05 100644 --- a/qemu/target/riscv/op_helper.c +++ b/qemu/target/riscv/op_helper.c @@ -81,8 +81,7 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb) riscv_raise_exception(env, RISCV_EXCP_INST_ADDR_MIS, GETPC()); } - if (env->priv_ver >= PRIV_VERSION_1_10_0 && - get_field(env->mstatus, MSTATUS_TSR) && !(env->priv >= PRV_M)) { + if (get_field(env->mstatus, MSTATUS_TSR) && !(env->priv >= PRV_M)) { riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } @@ -116,10 +115,8 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb) } else { prev_priv = get_field(mstatus, MSTATUS_SPP); - mstatus = set_field(mstatus, - env->priv_ver >= PRIV_VERSION_1_10_0 ? - MSTATUS_SIE : MSTATUS_UIE << prev_priv, - get_field(mstatus, MSTATUS_SPIE)); + mstatus = set_field(mstatus, MSTATUS_SIE, + get_field(mstatus, MSTATUS_SPIE)); mstatus = set_field(mstatus, MSTATUS_SPIE, 1); mstatus = set_field(mstatus, MSTATUS_SPP, PRV_U); env->mstatus = mstatus; @@ -144,10 +141,8 @@ target_ulong helper_mret(CPURISCVState *env, target_ulong cpu_pc_deb) target_ulong mstatus = env->mstatus; target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP); target_ulong prev_virt = MSTATUS_MPV_ISSET(env); - mstatus = set_field(mstatus, - env->priv_ver >= PRIV_VERSION_1_10_0 ? - MSTATUS_MIE : MSTATUS_UIE << prev_priv, - get_field(mstatus, MSTATUS_MPIE)); + mstatus = set_field(mstatus, MSTATUS_MIE, + get_field(mstatus, MSTATUS_MPIE)); mstatus = set_field(mstatus, MSTATUS_MPIE, 1); mstatus = set_field(mstatus, MSTATUS_MPP, PRV_U); #ifdef TARGET_RISCV32 @@ -194,7 +189,6 @@ void helper_tlb_flush(CPURISCVState *env) CPUState *cs = env_cpu(env); if (!(env->priv >= PRV_S) || (env->priv == PRV_S && - env->priv_ver >= PRIV_VERSION_1_10_0 && get_field(env->mstatus, MSTATUS_TVM))) { riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } else { @@ -202,6 +196,19 @@ void helper_tlb_flush(CPURISCVState *env) } } +void helper_hyp_tlb_flush(CPURISCVState *env) +{ + CPUState *cs = env_cpu(env); + + if (env->priv == PRV_M || + (env->priv == PRV_S && !riscv_cpu_virt_enabled(env))) { + tlb_flush(cs); + return; + } + + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); +} + void helper_uc_riscv_exit(CPURISCVState *env) { CPUState *cs = env_cpu(env); @@ -209,4 +216,4 @@ void helper_uc_riscv_exit(CPURISCVState *env) cs->exception_index = EXCP_HLT; cs->halted = 1; cpu_loop_exit(cs); -} \ No newline at end of file +} diff --git a/qemu/target/riscv/pmp.c b/qemu/target/riscv/pmp.c index 888b99c8d9..9e1e614951 100644 --- a/qemu/target/riscv/pmp.c +++ b/qemu/target/riscv/pmp.c @@ -169,7 +169,7 @@ static void pmp_update_rule(CPURISCVState *env, uint32_t pmp_index) case PMP_AMATCH_NA4: sa = this_addr << 2; /* shift up from [xx:0] to [xx+2:2] */ - ea = (this_addr + 4u) - 1u; + ea = (sa + 4u) - 1u; break; case PMP_AMATCH_NAPOT: @@ -231,16 +231,20 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, return true; } - /* - * if size is unknown (0), assume that all bytes - * from addr to the end of the page will be accessed. - */ if (size == 0) { + if (riscv_feature(env, RISCV_FEATURE_MMU)) { + /* + * If size is unknown (0), assume that all bytes + * from addr to the end of the page will be accessed. + */ #ifdef _MSC_VER - pmp_size = 0 - (addr | TARGET_PAGE_MASK); + pmp_size = 0 - (addr | TARGET_PAGE_MASK); #else - pmp_size = -(addr | TARGET_PAGE_MASK); + pmp_size = -(addr | TARGET_PAGE_MASK); #endif + } else { + pmp_size = sizeof(target_ulong); + } } else { pmp_size = size; } diff --git a/qemu/target/riscv/riscv32/decode_insn16.inc.c b/qemu/target/riscv/riscv32/decode_insn16.inc.c index 66ebf61203..ba4cccaf76 100644 --- a/qemu/target/riscv/riscv32/decode_insn16.inc.c +++ b/qemu/target/riscv/riscv32/decode_insn16.inc.c @@ -1,11 +1,9 @@ /* This file is autogenerated by scripts/decodetree.py. */ -#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wredundant-decls" -# ifdef __clang__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#ifdef __clang__ # pragma GCC diagnostic ignored "-Wtypedef-redefinition" -# endif #endif typedef arg_empty arg_illegal; @@ -55,9 +53,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a); typedef arg_s arg_fsw; static bool trans_fsw(DisasContext *ctx, arg_fsw *a); -#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE -# pragma GCC diagnostic pop -#endif +#pragma GCC diagnostic pop static void decode_insn16_extract_c_addi16sp(DisasContext *ctx, arg_i *a, uint16_t insn) { @@ -231,55 +227,45 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 000..... ......00 */ if ((insn & 0x00001fe0) == 0x00000000) { /* 00000000 000...00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:87 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); - ctx->invalid = true; if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:88 */ decode_insn16_extract_c_addi4spn(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; case 0x00000001: /* 000..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:96 */ decode_insn16_extract_ci(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; case 0x00000002: /* 000..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:115 */ decode_insn16_extract_c_shift2(ctx, &u.f_shift, insn); if (trans_slli(ctx, &u.f_shift)) return true; return false; case 0x00002000: /* 001..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:90 */ decode_insn16_extract_cl_d(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; case 0x00002001: /* 001..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:24 */ decode_insn16_extract_cj(ctx, &u.f_j, insn); u.f_j.rd = 1; if (trans_jal(ctx, &u.f_j)) return true; return false; case 0x00002002: /* 001..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:116 */ decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; case 0x00004000: /* 010..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:91 */ decode_insn16_extract_cl_w(ctx, &u.f_i, insn); if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x00004001: /* 010..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:97 */ decode_insn16_extract_c_li(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -287,17 +273,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 010..... ......10 */ if ((insn & 0x00000f80) == 0x00000000) { /* 010.0000 0.....10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:118 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:119 */ decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn); if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x00006000: /* 011..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:20 */ decode_insn16_extract_cl_w(ctx, &u.f_i, insn); if (trans_flw(ctx, &u.f_i)) return true; return false; @@ -305,23 +288,19 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 011..... ......01 */ if ((insn & 0x0000107c) == 0x00000000) { /* 0110.... .0000001 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:99 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } if ((insn & 0x00000f80) == 0x00000100) { /* 011.0001 0.....01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:100 */ decode_insn16_extract_c_addi16sp(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:101 */ decode_insn16_extract_c_lui(ctx, &u.f_u, insn); if (trans_lui(ctx, &u.f_u)) return true; return false; case 0x00006002: /* 011..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:27 */ decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn); if (trans_flw(ctx, &u.f_i)) return true; return false; @@ -330,19 +309,16 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) switch ((insn >> 10) & 0x3) { case 0x0: /* 100.00.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:103 */ decode_insn16_extract_c_shift(ctx, &u.f_shift, insn); if (trans_srli(ctx, &u.f_shift)) return true; return false; case 0x1: /* 100.01.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:104 */ decode_insn16_extract_c_shift(ctx, &u.f_shift, insn); if (trans_srai(ctx, &u.f_shift)) return true; return false; case 0x2: /* 100.10.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:105 */ decode_insn16_extract_c_andi(ctx, &u.f_i, insn); if (trans_andi(ctx, &u.f_i)) return true; return false; @@ -352,22 +328,18 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) switch (insn & 0x00001060) { case 0x00000000: /* 100011.. .00...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:106 */ if (trans_sub(ctx, &u.f_r)) return true; return false; case 0x00000020: /* 100011.. .01...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:107 */ if (trans_xor(ctx, &u.f_r)) return true; return false; case 0x00000040: /* 100011.. .10...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:108 */ if (trans_or(ctx, &u.f_r)) return true; return false; case 0x00000060: /* 100011.. .11...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:109 */ if (trans_and(ctx, &u.f_r)) return true; return false; } @@ -381,18 +353,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 1000.... ......10 */ if ((insn & 0x00000ffc) == 0x00000000) { /* 10000000 00000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:122 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } if ((insn & 0x0000007c) == 0x00000000) { /* 1000.... .0000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:123 */ decode_insn16_extract_c_jalr(ctx, &u.f_i, insn); u.f_i.rd = 0; if (trans_jalr(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:124 */ decode_insn16_extract_c_mv(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -400,18 +369,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 1001.... ......10 */ if ((insn & 0x00000ffc) == 0x00000000) { /* 10010000 00000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:127 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_ebreak(ctx, &u.f_empty)) return true; } if ((insn & 0x0000007c) == 0x00000000) { /* 1001.... .0000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:128 */ decode_insn16_extract_c_jalr(ctx, &u.f_i, insn); u.f_i.rd = 1; if (trans_jalr(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:129 */ decode_insn16_extract_cr(ctx, &u.f_r, insn); if (trans_add(ctx, &u.f_r)) return true; return false; @@ -419,56 +385,47 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) return false; case 0x0000a000: /* 101..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:92 */ decode_insn16_extract_cs_d(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; case 0x0000a001: /* 101..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:110 */ decode_insn16_extract_cj(ctx, &u.f_j, insn); u.f_j.rd = 0; if (trans_jal(ctx, &u.f_j)) return true; return false; case 0x0000a002: /* 101..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:131 */ decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; case 0x0000c000: /* 110..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:93 */ decode_insn16_extract_cs_w(ctx, &u.f_s, insn); if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x0000c001: /* 110..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:111 */ decode_insn16_extract_cb_z(ctx, &u.f_b, insn); if (trans_beq(ctx, &u.f_b)) return true; return false; case 0x0000c002: /* 110..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:132 */ decode_insn16_extract_c_swsp(ctx, &u.f_s, insn); if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x0000e000: /* 111..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:21 */ decode_insn16_extract_cs_w(ctx, &u.f_s, insn); if (trans_fsw(ctx, &u.f_s)) return true; return false; case 0x0000e001: /* 111..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:112 */ decode_insn16_extract_cb_z(ctx, &u.f_b, insn); if (trans_bne(ctx, &u.f_b)) return true; return false; case 0x0000e002: /* 111..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:28 */ decode_insn16_extract_c_swsp(ctx, &u.f_s, insn); if (trans_fsw(ctx, &u.f_s)) return true; return false; diff --git a/qemu/target/riscv/riscv32/decode_insn32.inc.c b/qemu/target/riscv/riscv32/decode_insn32.inc.c index c4c25de13b..ce08737432 100644 --- a/qemu/target/riscv/riscv32/decode_insn32.inc.c +++ b/qemu/target/riscv/riscv32/decode_insn32.inc.c @@ -14,56 +14,70 @@ typedef struct { int rs2; } arg_b; +typedef struct { + int csr; + int rd; + int rs1; +} arg_decode_insn3214; + typedef struct { int rd; int rm; int rs1; int rs2; int rs3; -} arg_decode_insn3210; +} arg_decode_insn3215; typedef struct { int rd; int rm; int rs1; int rs2; -} arg_decode_insn3211; +} arg_decode_insn3216; typedef struct { int rd; int rm; int rs1; -} arg_decode_insn3212; +} arg_decode_insn3217; typedef struct { int rd; int rs1; -} arg_decode_insn3213; +} arg_decode_insn3218; typedef struct { - int rs1; + int rd; + int vm; +} arg_decode_insn3219; + +typedef struct { + int rd; int rs2; -} arg_decode_insn3214; +} arg_decode_insn3220; typedef struct { + int rd; int rs1; -} arg_decode_insn3215; + int zimm; +} arg_decode_insn3221; typedef struct { - int pred; - int succ; -} arg_decode_insn3216; + int rs1; + int rs2; +} arg_decode_insn3222; typedef struct { - int csr; - int rd; int rs1; -} arg_decode_insn329; +} arg_decode_insn3223; + +typedef struct { + int pred; + int succ; +} arg_decode_insn3224; typedef struct { -#ifdef _MSC_VER - int dummy; // MSVC does not allow empty struct -#endif + int : 0; } arg_empty; typedef struct { @@ -83,6 +97,42 @@ typedef struct { int rs2; } arg_r; +typedef struct { + int nf; + int rd; + int rs1; + int vm; +} arg_r2nfvm; + +typedef struct { + int rd; + int rs2; + int vm; +} arg_rmr; + +typedef struct { + int rd; + int rs1; + int rs2; + int vm; +} arg_rmrr; + +typedef struct { + int nf; + int rd; + int rs1; + int rs2; + int vm; +} arg_rnfvm; + +typedef struct { + int rd; + int rs1; + int rs2; + int vm; + int wd; +} arg_rwdvm; + typedef struct { int imm; int rs1; @@ -112,13 +162,9 @@ typedef arg_empty arg_mret; static bool trans_mret(DisasContext *ctx, arg_mret *a); typedef arg_empty arg_wfi; static bool trans_wfi(DisasContext *ctx, arg_wfi *a); -typedef arg_decode_insn3214 arg_hfence_gvma; -static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a); -typedef arg_decode_insn3214 arg_hfence_bvma; -static bool trans_hfence_bvma(DisasContext *ctx, arg_hfence_bvma *a); -typedef arg_decode_insn3214 arg_sfence_vma; +typedef arg_decode_insn3222 arg_sfence_vma; static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a); -typedef arg_decode_insn3215 arg_sfence_vm; +typedef arg_decode_insn3223 arg_sfence_vm; static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a); typedef arg_u arg_lui; static bool trans_lui(DisasContext *ctx, arg_lui *a); @@ -194,21 +240,21 @@ typedef arg_r arg_or; static bool trans_or(DisasContext *ctx, arg_or *a); typedef arg_r arg_and; static bool trans_and(DisasContext *ctx, arg_and *a); -typedef arg_decode_insn3216 arg_fence; +typedef arg_decode_insn3224 arg_fence; static bool trans_fence(DisasContext *ctx, arg_fence *a); typedef arg_empty arg_fence_i; static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a); -typedef arg_decode_insn329 arg_csrrw; +typedef arg_decode_insn3214 arg_csrrw; static bool trans_csrrw(DisasContext *ctx, arg_csrrw *a); -typedef arg_decode_insn329 arg_csrrs; +typedef arg_decode_insn3214 arg_csrrs; static bool trans_csrrs(DisasContext *ctx, arg_csrrs *a); -typedef arg_decode_insn329 arg_csrrc; +typedef arg_decode_insn3214 arg_csrrc; static bool trans_csrrc(DisasContext *ctx, arg_csrrc *a); -typedef arg_decode_insn329 arg_csrrwi; +typedef arg_decode_insn3214 arg_csrrwi; static bool trans_csrrwi(DisasContext *ctx, arg_csrrwi *a); -typedef arg_decode_insn329 arg_csrrsi; +typedef arg_decode_insn3214 arg_csrrsi; static bool trans_csrrsi(DisasContext *ctx, arg_csrrsi *a); -typedef arg_decode_insn329 arg_csrrci; +typedef arg_decode_insn3214 arg_csrrci; static bool trans_csrrci(DisasContext *ctx, arg_csrrci *a); typedef arg_r arg_mul; static bool trans_mul(DisasContext *ctx, arg_mul *a); @@ -252,23 +298,23 @@ typedef arg_i arg_flw; static bool trans_flw(DisasContext *ctx, arg_flw *a); typedef arg_s arg_fsw; static bool trans_fsw(DisasContext *ctx, arg_fsw *a); -typedef arg_decode_insn3210 arg_fmadd_s; +typedef arg_decode_insn3215 arg_fmadd_s; static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a); -typedef arg_decode_insn3210 arg_fmsub_s; +typedef arg_decode_insn3215 arg_fmsub_s; static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a); -typedef arg_decode_insn3210 arg_fnmsub_s; +typedef arg_decode_insn3215 arg_fnmsub_s; static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a); -typedef arg_decode_insn3210 arg_fnmadd_s; +typedef arg_decode_insn3215 arg_fnmadd_s; static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a); -typedef arg_decode_insn3211 arg_fadd_s; +typedef arg_decode_insn3216 arg_fadd_s; static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a); -typedef arg_decode_insn3211 arg_fsub_s; +typedef arg_decode_insn3216 arg_fsub_s; static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a); -typedef arg_decode_insn3211 arg_fmul_s; +typedef arg_decode_insn3216 arg_fmul_s; static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a); -typedef arg_decode_insn3211 arg_fdiv_s; +typedef arg_decode_insn3216 arg_fdiv_s; static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a); -typedef arg_decode_insn3212 arg_fsqrt_s; +typedef arg_decode_insn3217 arg_fsqrt_s; static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a); typedef arg_r arg_fsgnj_s; static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a); @@ -280,11 +326,11 @@ typedef arg_r arg_fmin_s; static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a); typedef arg_r arg_fmax_s; static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a); -typedef arg_decode_insn3212 arg_fcvt_w_s; +typedef arg_decode_insn3217 arg_fcvt_w_s; static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a); -typedef arg_decode_insn3212 arg_fcvt_wu_s; +typedef arg_decode_insn3217 arg_fcvt_wu_s; static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a); -typedef arg_decode_insn3213 arg_fmv_x_w; +typedef arg_decode_insn3218 arg_fmv_x_w; static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a); typedef arg_r arg_feq_s; static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a); @@ -292,35 +338,35 @@ typedef arg_r arg_flt_s; static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a); typedef arg_r arg_fle_s; static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a); -typedef arg_decode_insn3213 arg_fclass_s; +typedef arg_decode_insn3218 arg_fclass_s; static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a); -typedef arg_decode_insn3212 arg_fcvt_s_w; +typedef arg_decode_insn3217 arg_fcvt_s_w; static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a); -typedef arg_decode_insn3212 arg_fcvt_s_wu; +typedef arg_decode_insn3217 arg_fcvt_s_wu; static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a); -typedef arg_decode_insn3213 arg_fmv_w_x; +typedef arg_decode_insn3218 arg_fmv_w_x; static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a); typedef arg_i arg_fld; static bool trans_fld(DisasContext *ctx, arg_fld *a); typedef arg_s arg_fsd; static bool trans_fsd(DisasContext *ctx, arg_fsd *a); -typedef arg_decode_insn3210 arg_fmadd_d; +typedef arg_decode_insn3215 arg_fmadd_d; static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a); -typedef arg_decode_insn3210 arg_fmsub_d; +typedef arg_decode_insn3215 arg_fmsub_d; static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a); -typedef arg_decode_insn3210 arg_fnmsub_d; +typedef arg_decode_insn3215 arg_fnmsub_d; static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a); -typedef arg_decode_insn3210 arg_fnmadd_d; +typedef arg_decode_insn3215 arg_fnmadd_d; static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a); -typedef arg_decode_insn3211 arg_fadd_d; +typedef arg_decode_insn3216 arg_fadd_d; static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a); -typedef arg_decode_insn3211 arg_fsub_d; +typedef arg_decode_insn3216 arg_fsub_d; static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a); -typedef arg_decode_insn3211 arg_fmul_d; +typedef arg_decode_insn3216 arg_fmul_d; static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a); -typedef arg_decode_insn3211 arg_fdiv_d; +typedef arg_decode_insn3216 arg_fdiv_d; static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a); -typedef arg_decode_insn3212 arg_fsqrt_d; +typedef arg_decode_insn3217 arg_fsqrt_d; static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a); typedef arg_r arg_fsgnj_d; static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a); @@ -332,9 +378,9 @@ typedef arg_r arg_fmin_d; static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a); typedef arg_r arg_fmax_d; static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a); -typedef arg_decode_insn3212 arg_fcvt_s_d; +typedef arg_decode_insn3217 arg_fcvt_s_d; static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_s; +typedef arg_decode_insn3217 arg_fcvt_d_s; static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a); typedef arg_r arg_feq_d; static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a); @@ -342,16 +388,704 @@ typedef arg_r arg_flt_d; static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a); typedef arg_r arg_fle_d; static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a); -typedef arg_decode_insn3213 arg_fclass_d; +typedef arg_decode_insn3218 arg_fclass_d; static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a); -typedef arg_decode_insn3212 arg_fcvt_w_d; +typedef arg_decode_insn3217 arg_fcvt_w_d; static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a); -typedef arg_decode_insn3212 arg_fcvt_wu_d; +typedef arg_decode_insn3217 arg_fcvt_wu_d; static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_w; +typedef arg_decode_insn3217 arg_fcvt_d_w; static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a); -typedef arg_decode_insn3212 arg_fcvt_d_wu; +typedef arg_decode_insn3217 arg_fcvt_d_wu; static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a); +typedef arg_decode_insn3222 arg_hfence_gvma; +static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a); +typedef arg_decode_insn3222 arg_hfence_vvma; +static bool trans_hfence_vvma(DisasContext *ctx, arg_hfence_vvma *a); +typedef arg_r2nfvm arg_vlb_v; +static bool trans_vlb_v(DisasContext *ctx, arg_vlb_v *a); +typedef arg_r2nfvm arg_vlh_v; +static bool trans_vlh_v(DisasContext *ctx, arg_vlh_v *a); +typedef arg_r2nfvm arg_vlw_v; +static bool trans_vlw_v(DisasContext *ctx, arg_vlw_v *a); +typedef arg_r2nfvm arg_vle_v; +static bool trans_vle_v(DisasContext *ctx, arg_vle_v *a); +typedef arg_r2nfvm arg_vlbu_v; +static bool trans_vlbu_v(DisasContext *ctx, arg_vlbu_v *a); +typedef arg_r2nfvm arg_vlhu_v; +static bool trans_vlhu_v(DisasContext *ctx, arg_vlhu_v *a); +typedef arg_r2nfvm arg_vlwu_v; +static bool trans_vlwu_v(DisasContext *ctx, arg_vlwu_v *a); +typedef arg_r2nfvm arg_vlbff_v; +static bool trans_vlbff_v(DisasContext *ctx, arg_vlbff_v *a); +typedef arg_r2nfvm arg_vlhff_v; +static bool trans_vlhff_v(DisasContext *ctx, arg_vlhff_v *a); +typedef arg_r2nfvm arg_vlwff_v; +static bool trans_vlwff_v(DisasContext *ctx, arg_vlwff_v *a); +typedef arg_r2nfvm arg_vleff_v; +static bool trans_vleff_v(DisasContext *ctx, arg_vleff_v *a); +typedef arg_r2nfvm arg_vlbuff_v; +static bool trans_vlbuff_v(DisasContext *ctx, arg_vlbuff_v *a); +typedef arg_r2nfvm arg_vlhuff_v; +static bool trans_vlhuff_v(DisasContext *ctx, arg_vlhuff_v *a); +typedef arg_r2nfvm arg_vlwuff_v; +static bool trans_vlwuff_v(DisasContext *ctx, arg_vlwuff_v *a); +typedef arg_r2nfvm arg_vsb_v; +static bool trans_vsb_v(DisasContext *ctx, arg_vsb_v *a); +typedef arg_r2nfvm arg_vsh_v; +static bool trans_vsh_v(DisasContext *ctx, arg_vsh_v *a); +typedef arg_r2nfvm arg_vsw_v; +static bool trans_vsw_v(DisasContext *ctx, arg_vsw_v *a); +typedef arg_r2nfvm arg_vse_v; +static bool trans_vse_v(DisasContext *ctx, arg_vse_v *a); +typedef arg_rnfvm arg_vlsb_v; +static bool trans_vlsb_v(DisasContext *ctx, arg_vlsb_v *a); +typedef arg_rnfvm arg_vlsh_v; +static bool trans_vlsh_v(DisasContext *ctx, arg_vlsh_v *a); +typedef arg_rnfvm arg_vlsw_v; +static bool trans_vlsw_v(DisasContext *ctx, arg_vlsw_v *a); +typedef arg_rnfvm arg_vlse_v; +static bool trans_vlse_v(DisasContext *ctx, arg_vlse_v *a); +typedef arg_rnfvm arg_vlsbu_v; +static bool trans_vlsbu_v(DisasContext *ctx, arg_vlsbu_v *a); +typedef arg_rnfvm arg_vlshu_v; +static bool trans_vlshu_v(DisasContext *ctx, arg_vlshu_v *a); +typedef arg_rnfvm arg_vlswu_v; +static bool trans_vlswu_v(DisasContext *ctx, arg_vlswu_v *a); +typedef arg_rnfvm arg_vssb_v; +static bool trans_vssb_v(DisasContext *ctx, arg_vssb_v *a); +typedef arg_rnfvm arg_vssh_v; +static bool trans_vssh_v(DisasContext *ctx, arg_vssh_v *a); +typedef arg_rnfvm arg_vssw_v; +static bool trans_vssw_v(DisasContext *ctx, arg_vssw_v *a); +typedef arg_rnfvm arg_vsse_v; +static bool trans_vsse_v(DisasContext *ctx, arg_vsse_v *a); +typedef arg_rnfvm arg_vlxb_v; +static bool trans_vlxb_v(DisasContext *ctx, arg_vlxb_v *a); +typedef arg_rnfvm arg_vlxh_v; +static bool trans_vlxh_v(DisasContext *ctx, arg_vlxh_v *a); +typedef arg_rnfvm arg_vlxw_v; +static bool trans_vlxw_v(DisasContext *ctx, arg_vlxw_v *a); +typedef arg_rnfvm arg_vlxe_v; +static bool trans_vlxe_v(DisasContext *ctx, arg_vlxe_v *a); +typedef arg_rnfvm arg_vlxbu_v; +static bool trans_vlxbu_v(DisasContext *ctx, arg_vlxbu_v *a); +typedef arg_rnfvm arg_vlxhu_v; +static bool trans_vlxhu_v(DisasContext *ctx, arg_vlxhu_v *a); +typedef arg_rnfvm arg_vlxwu_v; +static bool trans_vlxwu_v(DisasContext *ctx, arg_vlxwu_v *a); +typedef arg_rnfvm arg_vsxb_v; +static bool trans_vsxb_v(DisasContext *ctx, arg_vsxb_v *a); +typedef arg_rnfvm arg_vsxh_v; +static bool trans_vsxh_v(DisasContext *ctx, arg_vsxh_v *a); +typedef arg_rnfvm arg_vsxw_v; +static bool trans_vsxw_v(DisasContext *ctx, arg_vsxw_v *a); +typedef arg_rnfvm arg_vsxe_v; +static bool trans_vsxe_v(DisasContext *ctx, arg_vsxe_v *a); +typedef arg_rwdvm arg_vamoswapw_v; +static bool trans_vamoswapw_v(DisasContext *ctx, arg_vamoswapw_v *a); +typedef arg_rwdvm arg_vamoaddw_v; +static bool trans_vamoaddw_v(DisasContext *ctx, arg_vamoaddw_v *a); +typedef arg_rwdvm arg_vamoxorw_v; +static bool trans_vamoxorw_v(DisasContext *ctx, arg_vamoxorw_v *a); +typedef arg_rwdvm arg_vamoandw_v; +static bool trans_vamoandw_v(DisasContext *ctx, arg_vamoandw_v *a); +typedef arg_rwdvm arg_vamoorw_v; +static bool trans_vamoorw_v(DisasContext *ctx, arg_vamoorw_v *a); +typedef arg_rwdvm arg_vamominw_v; +static bool trans_vamominw_v(DisasContext *ctx, arg_vamominw_v *a); +typedef arg_rwdvm arg_vamomaxw_v; +static bool trans_vamomaxw_v(DisasContext *ctx, arg_vamomaxw_v *a); +typedef arg_rwdvm arg_vamominuw_v; +static bool trans_vamominuw_v(DisasContext *ctx, arg_vamominuw_v *a); +typedef arg_rwdvm arg_vamomaxuw_v; +static bool trans_vamomaxuw_v(DisasContext *ctx, arg_vamomaxuw_v *a); +typedef arg_rmrr arg_vadd_vv; +static bool trans_vadd_vv(DisasContext *ctx, arg_vadd_vv *a); +typedef arg_rmrr arg_vadd_vx; +static bool trans_vadd_vx(DisasContext *ctx, arg_vadd_vx *a); +typedef arg_rmrr arg_vadd_vi; +static bool trans_vadd_vi(DisasContext *ctx, arg_vadd_vi *a); +typedef arg_rmrr arg_vsub_vv; +static bool trans_vsub_vv(DisasContext *ctx, arg_vsub_vv *a); +typedef arg_rmrr arg_vsub_vx; +static bool trans_vsub_vx(DisasContext *ctx, arg_vsub_vx *a); +typedef arg_rmrr arg_vrsub_vx; +static bool trans_vrsub_vx(DisasContext *ctx, arg_vrsub_vx *a); +typedef arg_rmrr arg_vrsub_vi; +static bool trans_vrsub_vi(DisasContext *ctx, arg_vrsub_vi *a); +typedef arg_rmrr arg_vwaddu_vv; +static bool trans_vwaddu_vv(DisasContext *ctx, arg_vwaddu_vv *a); +typedef arg_rmrr arg_vwaddu_vx; +static bool trans_vwaddu_vx(DisasContext *ctx, arg_vwaddu_vx *a); +typedef arg_rmrr arg_vwadd_vv; +static bool trans_vwadd_vv(DisasContext *ctx, arg_vwadd_vv *a); +typedef arg_rmrr arg_vwadd_vx; +static bool trans_vwadd_vx(DisasContext *ctx, arg_vwadd_vx *a); +typedef arg_rmrr arg_vwsubu_vv; +static bool trans_vwsubu_vv(DisasContext *ctx, arg_vwsubu_vv *a); +typedef arg_rmrr arg_vwsubu_vx; +static bool trans_vwsubu_vx(DisasContext *ctx, arg_vwsubu_vx *a); +typedef arg_rmrr arg_vwsub_vv; +static bool trans_vwsub_vv(DisasContext *ctx, arg_vwsub_vv *a); +typedef arg_rmrr arg_vwsub_vx; +static bool trans_vwsub_vx(DisasContext *ctx, arg_vwsub_vx *a); +typedef arg_rmrr arg_vwaddu_wv; +static bool trans_vwaddu_wv(DisasContext *ctx, arg_vwaddu_wv *a); +typedef arg_rmrr arg_vwaddu_wx; +static bool trans_vwaddu_wx(DisasContext *ctx, arg_vwaddu_wx *a); +typedef arg_rmrr arg_vwadd_wv; +static bool trans_vwadd_wv(DisasContext *ctx, arg_vwadd_wv *a); +typedef arg_rmrr arg_vwadd_wx; +static bool trans_vwadd_wx(DisasContext *ctx, arg_vwadd_wx *a); +typedef arg_rmrr arg_vwsubu_wv; +static bool trans_vwsubu_wv(DisasContext *ctx, arg_vwsubu_wv *a); +typedef arg_rmrr arg_vwsubu_wx; +static bool trans_vwsubu_wx(DisasContext *ctx, arg_vwsubu_wx *a); +typedef arg_rmrr arg_vwsub_wv; +static bool trans_vwsub_wv(DisasContext *ctx, arg_vwsub_wv *a); +typedef arg_rmrr arg_vwsub_wx; +static bool trans_vwsub_wx(DisasContext *ctx, arg_vwsub_wx *a); +typedef arg_rmrr arg_vadc_vvm; +static bool trans_vadc_vvm(DisasContext *ctx, arg_vadc_vvm *a); +typedef arg_rmrr arg_vadc_vxm; +static bool trans_vadc_vxm(DisasContext *ctx, arg_vadc_vxm *a); +typedef arg_rmrr arg_vadc_vim; +static bool trans_vadc_vim(DisasContext *ctx, arg_vadc_vim *a); +typedef arg_rmrr arg_vmadc_vvm; +static bool trans_vmadc_vvm(DisasContext *ctx, arg_vmadc_vvm *a); +typedef arg_rmrr arg_vmadc_vxm; +static bool trans_vmadc_vxm(DisasContext *ctx, arg_vmadc_vxm *a); +typedef arg_rmrr arg_vmadc_vim; +static bool trans_vmadc_vim(DisasContext *ctx, arg_vmadc_vim *a); +typedef arg_rmrr arg_vsbc_vvm; +static bool trans_vsbc_vvm(DisasContext *ctx, arg_vsbc_vvm *a); +typedef arg_rmrr arg_vsbc_vxm; +static bool trans_vsbc_vxm(DisasContext *ctx, arg_vsbc_vxm *a); +typedef arg_rmrr arg_vmsbc_vvm; +static bool trans_vmsbc_vvm(DisasContext *ctx, arg_vmsbc_vvm *a); +typedef arg_rmrr arg_vmsbc_vxm; +static bool trans_vmsbc_vxm(DisasContext *ctx, arg_vmsbc_vxm *a); +typedef arg_rmrr arg_vand_vv; +static bool trans_vand_vv(DisasContext *ctx, arg_vand_vv *a); +typedef arg_rmrr arg_vand_vx; +static bool trans_vand_vx(DisasContext *ctx, arg_vand_vx *a); +typedef arg_rmrr arg_vand_vi; +static bool trans_vand_vi(DisasContext *ctx, arg_vand_vi *a); +typedef arg_rmrr arg_vor_vv; +static bool trans_vor_vv(DisasContext *ctx, arg_vor_vv *a); +typedef arg_rmrr arg_vor_vx; +static bool trans_vor_vx(DisasContext *ctx, arg_vor_vx *a); +typedef arg_rmrr arg_vor_vi; +static bool trans_vor_vi(DisasContext *ctx, arg_vor_vi *a); +typedef arg_rmrr arg_vxor_vv; +static bool trans_vxor_vv(DisasContext *ctx, arg_vxor_vv *a); +typedef arg_rmrr arg_vxor_vx; +static bool trans_vxor_vx(DisasContext *ctx, arg_vxor_vx *a); +typedef arg_rmrr arg_vxor_vi; +static bool trans_vxor_vi(DisasContext *ctx, arg_vxor_vi *a); +typedef arg_rmrr arg_vsll_vv; +static bool trans_vsll_vv(DisasContext *ctx, arg_vsll_vv *a); +typedef arg_rmrr arg_vsll_vx; +static bool trans_vsll_vx(DisasContext *ctx, arg_vsll_vx *a); +typedef arg_rmrr arg_vsll_vi; +static bool trans_vsll_vi(DisasContext *ctx, arg_vsll_vi *a); +typedef arg_rmrr arg_vsrl_vv; +static bool trans_vsrl_vv(DisasContext *ctx, arg_vsrl_vv *a); +typedef arg_rmrr arg_vsrl_vx; +static bool trans_vsrl_vx(DisasContext *ctx, arg_vsrl_vx *a); +typedef arg_rmrr arg_vsrl_vi; +static bool trans_vsrl_vi(DisasContext *ctx, arg_vsrl_vi *a); +typedef arg_rmrr arg_vsra_vv; +static bool trans_vsra_vv(DisasContext *ctx, arg_vsra_vv *a); +typedef arg_rmrr arg_vsra_vx; +static bool trans_vsra_vx(DisasContext *ctx, arg_vsra_vx *a); +typedef arg_rmrr arg_vsra_vi; +static bool trans_vsra_vi(DisasContext *ctx, arg_vsra_vi *a); +typedef arg_rmrr arg_vnsrl_vv; +static bool trans_vnsrl_vv(DisasContext *ctx, arg_vnsrl_vv *a); +typedef arg_rmrr arg_vnsrl_vx; +static bool trans_vnsrl_vx(DisasContext *ctx, arg_vnsrl_vx *a); +typedef arg_rmrr arg_vnsrl_vi; +static bool trans_vnsrl_vi(DisasContext *ctx, arg_vnsrl_vi *a); +typedef arg_rmrr arg_vnsra_vv; +static bool trans_vnsra_vv(DisasContext *ctx, arg_vnsra_vv *a); +typedef arg_rmrr arg_vnsra_vx; +static bool trans_vnsra_vx(DisasContext *ctx, arg_vnsra_vx *a); +typedef arg_rmrr arg_vnsra_vi; +static bool trans_vnsra_vi(DisasContext *ctx, arg_vnsra_vi *a); +typedef arg_rmrr arg_vmseq_vv; +static bool trans_vmseq_vv(DisasContext *ctx, arg_vmseq_vv *a); +typedef arg_rmrr arg_vmseq_vx; +static bool trans_vmseq_vx(DisasContext *ctx, arg_vmseq_vx *a); +typedef arg_rmrr arg_vmseq_vi; +static bool trans_vmseq_vi(DisasContext *ctx, arg_vmseq_vi *a); +typedef arg_rmrr arg_vmsne_vv; +static bool trans_vmsne_vv(DisasContext *ctx, arg_vmsne_vv *a); +typedef arg_rmrr arg_vmsne_vx; +static bool trans_vmsne_vx(DisasContext *ctx, arg_vmsne_vx *a); +typedef arg_rmrr arg_vmsne_vi; +static bool trans_vmsne_vi(DisasContext *ctx, arg_vmsne_vi *a); +typedef arg_rmrr arg_vmsltu_vv; +static bool trans_vmsltu_vv(DisasContext *ctx, arg_vmsltu_vv *a); +typedef arg_rmrr arg_vmsltu_vx; +static bool trans_vmsltu_vx(DisasContext *ctx, arg_vmsltu_vx *a); +typedef arg_rmrr arg_vmslt_vv; +static bool trans_vmslt_vv(DisasContext *ctx, arg_vmslt_vv *a); +typedef arg_rmrr arg_vmslt_vx; +static bool trans_vmslt_vx(DisasContext *ctx, arg_vmslt_vx *a); +typedef arg_rmrr arg_vmsleu_vv; +static bool trans_vmsleu_vv(DisasContext *ctx, arg_vmsleu_vv *a); +typedef arg_rmrr arg_vmsleu_vx; +static bool trans_vmsleu_vx(DisasContext *ctx, arg_vmsleu_vx *a); +typedef arg_rmrr arg_vmsleu_vi; +static bool trans_vmsleu_vi(DisasContext *ctx, arg_vmsleu_vi *a); +typedef arg_rmrr arg_vmsle_vv; +static bool trans_vmsle_vv(DisasContext *ctx, arg_vmsle_vv *a); +typedef arg_rmrr arg_vmsle_vx; +static bool trans_vmsle_vx(DisasContext *ctx, arg_vmsle_vx *a); +typedef arg_rmrr arg_vmsle_vi; +static bool trans_vmsle_vi(DisasContext *ctx, arg_vmsle_vi *a); +typedef arg_rmrr arg_vmsgtu_vx; +static bool trans_vmsgtu_vx(DisasContext *ctx, arg_vmsgtu_vx *a); +typedef arg_rmrr arg_vmsgtu_vi; +static bool trans_vmsgtu_vi(DisasContext *ctx, arg_vmsgtu_vi *a); +typedef arg_rmrr arg_vmsgt_vx; +static bool trans_vmsgt_vx(DisasContext *ctx, arg_vmsgt_vx *a); +typedef arg_rmrr arg_vmsgt_vi; +static bool trans_vmsgt_vi(DisasContext *ctx, arg_vmsgt_vi *a); +typedef arg_rmrr arg_vminu_vv; +static bool trans_vminu_vv(DisasContext *ctx, arg_vminu_vv *a); +typedef arg_rmrr arg_vminu_vx; +static bool trans_vminu_vx(DisasContext *ctx, arg_vminu_vx *a); +typedef arg_rmrr arg_vmin_vv; +static bool trans_vmin_vv(DisasContext *ctx, arg_vmin_vv *a); +typedef arg_rmrr arg_vmin_vx; +static bool trans_vmin_vx(DisasContext *ctx, arg_vmin_vx *a); +typedef arg_rmrr arg_vmaxu_vv; +static bool trans_vmaxu_vv(DisasContext *ctx, arg_vmaxu_vv *a); +typedef arg_rmrr arg_vmaxu_vx; +static bool trans_vmaxu_vx(DisasContext *ctx, arg_vmaxu_vx *a); +typedef arg_rmrr arg_vmax_vv; +static bool trans_vmax_vv(DisasContext *ctx, arg_vmax_vv *a); +typedef arg_rmrr arg_vmax_vx; +static bool trans_vmax_vx(DisasContext *ctx, arg_vmax_vx *a); +typedef arg_rmrr arg_vmul_vv; +static bool trans_vmul_vv(DisasContext *ctx, arg_vmul_vv *a); +typedef arg_rmrr arg_vmul_vx; +static bool trans_vmul_vx(DisasContext *ctx, arg_vmul_vx *a); +typedef arg_rmrr arg_vmulh_vv; +static bool trans_vmulh_vv(DisasContext *ctx, arg_vmulh_vv *a); +typedef arg_rmrr arg_vmulh_vx; +static bool trans_vmulh_vx(DisasContext *ctx, arg_vmulh_vx *a); +typedef arg_rmrr arg_vmulhu_vv; +static bool trans_vmulhu_vv(DisasContext *ctx, arg_vmulhu_vv *a); +typedef arg_rmrr arg_vmulhu_vx; +static bool trans_vmulhu_vx(DisasContext *ctx, arg_vmulhu_vx *a); +typedef arg_rmrr arg_vmulhsu_vv; +static bool trans_vmulhsu_vv(DisasContext *ctx, arg_vmulhsu_vv *a); +typedef arg_rmrr arg_vmulhsu_vx; +static bool trans_vmulhsu_vx(DisasContext *ctx, arg_vmulhsu_vx *a); +typedef arg_rmrr arg_vdivu_vv; +static bool trans_vdivu_vv(DisasContext *ctx, arg_vdivu_vv *a); +typedef arg_rmrr arg_vdivu_vx; +static bool trans_vdivu_vx(DisasContext *ctx, arg_vdivu_vx *a); +typedef arg_rmrr arg_vdiv_vv; +static bool trans_vdiv_vv(DisasContext *ctx, arg_vdiv_vv *a); +typedef arg_rmrr arg_vdiv_vx; +static bool trans_vdiv_vx(DisasContext *ctx, arg_vdiv_vx *a); +typedef arg_rmrr arg_vremu_vv; +static bool trans_vremu_vv(DisasContext *ctx, arg_vremu_vv *a); +typedef arg_rmrr arg_vremu_vx; +static bool trans_vremu_vx(DisasContext *ctx, arg_vremu_vx *a); +typedef arg_rmrr arg_vrem_vv; +static bool trans_vrem_vv(DisasContext *ctx, arg_vrem_vv *a); +typedef arg_rmrr arg_vrem_vx; +static bool trans_vrem_vx(DisasContext *ctx, arg_vrem_vx *a); +typedef arg_rmrr arg_vwmulu_vv; +static bool trans_vwmulu_vv(DisasContext *ctx, arg_vwmulu_vv *a); +typedef arg_rmrr arg_vwmulu_vx; +static bool trans_vwmulu_vx(DisasContext *ctx, arg_vwmulu_vx *a); +typedef arg_rmrr arg_vwmulsu_vv; +static bool trans_vwmulsu_vv(DisasContext *ctx, arg_vwmulsu_vv *a); +typedef arg_rmrr arg_vwmulsu_vx; +static bool trans_vwmulsu_vx(DisasContext *ctx, arg_vwmulsu_vx *a); +typedef arg_rmrr arg_vwmul_vv; +static bool trans_vwmul_vv(DisasContext *ctx, arg_vwmul_vv *a); +typedef arg_rmrr arg_vwmul_vx; +static bool trans_vwmul_vx(DisasContext *ctx, arg_vwmul_vx *a); +typedef arg_rmrr arg_vmacc_vv; +static bool trans_vmacc_vv(DisasContext *ctx, arg_vmacc_vv *a); +typedef arg_rmrr arg_vmacc_vx; +static bool trans_vmacc_vx(DisasContext *ctx, arg_vmacc_vx *a); +typedef arg_rmrr arg_vnmsac_vv; +static bool trans_vnmsac_vv(DisasContext *ctx, arg_vnmsac_vv *a); +typedef arg_rmrr arg_vnmsac_vx; +static bool trans_vnmsac_vx(DisasContext *ctx, arg_vnmsac_vx *a); +typedef arg_rmrr arg_vmadd_vv; +static bool trans_vmadd_vv(DisasContext *ctx, arg_vmadd_vv *a); +typedef arg_rmrr arg_vmadd_vx; +static bool trans_vmadd_vx(DisasContext *ctx, arg_vmadd_vx *a); +typedef arg_rmrr arg_vnmsub_vv; +static bool trans_vnmsub_vv(DisasContext *ctx, arg_vnmsub_vv *a); +typedef arg_rmrr arg_vnmsub_vx; +static bool trans_vnmsub_vx(DisasContext *ctx, arg_vnmsub_vx *a); +typedef arg_rmrr arg_vwmaccu_vv; +static bool trans_vwmaccu_vv(DisasContext *ctx, arg_vwmaccu_vv *a); +typedef arg_rmrr arg_vwmaccu_vx; +static bool trans_vwmaccu_vx(DisasContext *ctx, arg_vwmaccu_vx *a); +typedef arg_rmrr arg_vwmacc_vv; +static bool trans_vwmacc_vv(DisasContext *ctx, arg_vwmacc_vv *a); +typedef arg_rmrr arg_vwmacc_vx; +static bool trans_vwmacc_vx(DisasContext *ctx, arg_vwmacc_vx *a); +typedef arg_rmrr arg_vwmaccsu_vv; +static bool trans_vwmaccsu_vv(DisasContext *ctx, arg_vwmaccsu_vv *a); +typedef arg_rmrr arg_vwmaccsu_vx; +static bool trans_vwmaccsu_vx(DisasContext *ctx, arg_vwmaccsu_vx *a); +typedef arg_rmrr arg_vwmaccus_vx; +static bool trans_vwmaccus_vx(DisasContext *ctx, arg_vwmaccus_vx *a); +typedef arg_decode_insn3218 arg_vmv_v_v; +static bool trans_vmv_v_v(DisasContext *ctx, arg_vmv_v_v *a); +typedef arg_decode_insn3218 arg_vmv_v_x; +static bool trans_vmv_v_x(DisasContext *ctx, arg_vmv_v_x *a); +typedef arg_decode_insn3218 arg_vmv_v_i; +static bool trans_vmv_v_i(DisasContext *ctx, arg_vmv_v_i *a); +typedef arg_rmrr arg_vmerge_vvm; +static bool trans_vmerge_vvm(DisasContext *ctx, arg_vmerge_vvm *a); +typedef arg_rmrr arg_vmerge_vxm; +static bool trans_vmerge_vxm(DisasContext *ctx, arg_vmerge_vxm *a); +typedef arg_rmrr arg_vmerge_vim; +static bool trans_vmerge_vim(DisasContext *ctx, arg_vmerge_vim *a); +typedef arg_rmrr arg_vsaddu_vv; +static bool trans_vsaddu_vv(DisasContext *ctx, arg_vsaddu_vv *a); +typedef arg_rmrr arg_vsaddu_vx; +static bool trans_vsaddu_vx(DisasContext *ctx, arg_vsaddu_vx *a); +typedef arg_rmrr arg_vsaddu_vi; +static bool trans_vsaddu_vi(DisasContext *ctx, arg_vsaddu_vi *a); +typedef arg_rmrr arg_vsadd_vv; +static bool trans_vsadd_vv(DisasContext *ctx, arg_vsadd_vv *a); +typedef arg_rmrr arg_vsadd_vx; +static bool trans_vsadd_vx(DisasContext *ctx, arg_vsadd_vx *a); +typedef arg_rmrr arg_vsadd_vi; +static bool trans_vsadd_vi(DisasContext *ctx, arg_vsadd_vi *a); +typedef arg_rmrr arg_vssubu_vv; +static bool trans_vssubu_vv(DisasContext *ctx, arg_vssubu_vv *a); +typedef arg_rmrr arg_vssubu_vx; +static bool trans_vssubu_vx(DisasContext *ctx, arg_vssubu_vx *a); +typedef arg_rmrr arg_vssub_vv; +static bool trans_vssub_vv(DisasContext *ctx, arg_vssub_vv *a); +typedef arg_rmrr arg_vssub_vx; +static bool trans_vssub_vx(DisasContext *ctx, arg_vssub_vx *a); +typedef arg_rmrr arg_vaadd_vv; +static bool trans_vaadd_vv(DisasContext *ctx, arg_vaadd_vv *a); +typedef arg_rmrr arg_vaadd_vx; +static bool trans_vaadd_vx(DisasContext *ctx, arg_vaadd_vx *a); +typedef arg_rmrr arg_vaadd_vi; +static bool trans_vaadd_vi(DisasContext *ctx, arg_vaadd_vi *a); +typedef arg_rmrr arg_vasub_vv; +static bool trans_vasub_vv(DisasContext *ctx, arg_vasub_vv *a); +typedef arg_rmrr arg_vasub_vx; +static bool trans_vasub_vx(DisasContext *ctx, arg_vasub_vx *a); +typedef arg_rmrr arg_vsmul_vv; +static bool trans_vsmul_vv(DisasContext *ctx, arg_vsmul_vv *a); +typedef arg_rmrr arg_vsmul_vx; +static bool trans_vsmul_vx(DisasContext *ctx, arg_vsmul_vx *a); +typedef arg_rmrr arg_vwsmaccu_vv; +static bool trans_vwsmaccu_vv(DisasContext *ctx, arg_vwsmaccu_vv *a); +typedef arg_rmrr arg_vwsmaccu_vx; +static bool trans_vwsmaccu_vx(DisasContext *ctx, arg_vwsmaccu_vx *a); +typedef arg_rmrr arg_vwsmacc_vv; +static bool trans_vwsmacc_vv(DisasContext *ctx, arg_vwsmacc_vv *a); +typedef arg_rmrr arg_vwsmacc_vx; +static bool trans_vwsmacc_vx(DisasContext *ctx, arg_vwsmacc_vx *a); +typedef arg_rmrr arg_vwsmaccsu_vv; +static bool trans_vwsmaccsu_vv(DisasContext *ctx, arg_vwsmaccsu_vv *a); +typedef arg_rmrr arg_vwsmaccsu_vx; +static bool trans_vwsmaccsu_vx(DisasContext *ctx, arg_vwsmaccsu_vx *a); +typedef arg_rmrr arg_vwsmaccus_vx; +static bool trans_vwsmaccus_vx(DisasContext *ctx, arg_vwsmaccus_vx *a); +typedef arg_rmrr arg_vssrl_vv; +static bool trans_vssrl_vv(DisasContext *ctx, arg_vssrl_vv *a); +typedef arg_rmrr arg_vssrl_vx; +static bool trans_vssrl_vx(DisasContext *ctx, arg_vssrl_vx *a); +typedef arg_rmrr arg_vssrl_vi; +static bool trans_vssrl_vi(DisasContext *ctx, arg_vssrl_vi *a); +typedef arg_rmrr arg_vssra_vv; +static bool trans_vssra_vv(DisasContext *ctx, arg_vssra_vv *a); +typedef arg_rmrr arg_vssra_vx; +static bool trans_vssra_vx(DisasContext *ctx, arg_vssra_vx *a); +typedef arg_rmrr arg_vssra_vi; +static bool trans_vssra_vi(DisasContext *ctx, arg_vssra_vi *a); +typedef arg_rmrr arg_vnclipu_vv; +static bool trans_vnclipu_vv(DisasContext *ctx, arg_vnclipu_vv *a); +typedef arg_rmrr arg_vnclipu_vx; +static bool trans_vnclipu_vx(DisasContext *ctx, arg_vnclipu_vx *a); +typedef arg_rmrr arg_vnclipu_vi; +static bool trans_vnclipu_vi(DisasContext *ctx, arg_vnclipu_vi *a); +typedef arg_rmrr arg_vnclip_vv; +static bool trans_vnclip_vv(DisasContext *ctx, arg_vnclip_vv *a); +typedef arg_rmrr arg_vnclip_vx; +static bool trans_vnclip_vx(DisasContext *ctx, arg_vnclip_vx *a); +typedef arg_rmrr arg_vnclip_vi; +static bool trans_vnclip_vi(DisasContext *ctx, arg_vnclip_vi *a); +typedef arg_rmrr arg_vfadd_vv; +static bool trans_vfadd_vv(DisasContext *ctx, arg_vfadd_vv *a); +typedef arg_rmrr arg_vfadd_vf; +static bool trans_vfadd_vf(DisasContext *ctx, arg_vfadd_vf *a); +typedef arg_rmrr arg_vfsub_vv; +static bool trans_vfsub_vv(DisasContext *ctx, arg_vfsub_vv *a); +typedef arg_rmrr arg_vfsub_vf; +static bool trans_vfsub_vf(DisasContext *ctx, arg_vfsub_vf *a); +typedef arg_rmrr arg_vfrsub_vf; +static bool trans_vfrsub_vf(DisasContext *ctx, arg_vfrsub_vf *a); +typedef arg_rmrr arg_vfwadd_vv; +static bool trans_vfwadd_vv(DisasContext *ctx, arg_vfwadd_vv *a); +typedef arg_rmrr arg_vfwadd_vf; +static bool trans_vfwadd_vf(DisasContext *ctx, arg_vfwadd_vf *a); +typedef arg_rmrr arg_vfwadd_wv; +static bool trans_vfwadd_wv(DisasContext *ctx, arg_vfwadd_wv *a); +typedef arg_rmrr arg_vfwadd_wf; +static bool trans_vfwadd_wf(DisasContext *ctx, arg_vfwadd_wf *a); +typedef arg_rmrr arg_vfwsub_vv; +static bool trans_vfwsub_vv(DisasContext *ctx, arg_vfwsub_vv *a); +typedef arg_rmrr arg_vfwsub_vf; +static bool trans_vfwsub_vf(DisasContext *ctx, arg_vfwsub_vf *a); +typedef arg_rmrr arg_vfwsub_wv; +static bool trans_vfwsub_wv(DisasContext *ctx, arg_vfwsub_wv *a); +typedef arg_rmrr arg_vfwsub_wf; +static bool trans_vfwsub_wf(DisasContext *ctx, arg_vfwsub_wf *a); +typedef arg_rmrr arg_vfmul_vv; +static bool trans_vfmul_vv(DisasContext *ctx, arg_vfmul_vv *a); +typedef arg_rmrr arg_vfmul_vf; +static bool trans_vfmul_vf(DisasContext *ctx, arg_vfmul_vf *a); +typedef arg_rmrr arg_vfdiv_vv; +static bool trans_vfdiv_vv(DisasContext *ctx, arg_vfdiv_vv *a); +typedef arg_rmrr arg_vfdiv_vf; +static bool trans_vfdiv_vf(DisasContext *ctx, arg_vfdiv_vf *a); +typedef arg_rmrr arg_vfrdiv_vf; +static bool trans_vfrdiv_vf(DisasContext *ctx, arg_vfrdiv_vf *a); +typedef arg_rmrr arg_vfwmul_vv; +static bool trans_vfwmul_vv(DisasContext *ctx, arg_vfwmul_vv *a); +typedef arg_rmrr arg_vfwmul_vf; +static bool trans_vfwmul_vf(DisasContext *ctx, arg_vfwmul_vf *a); +typedef arg_rmrr arg_vfmacc_vv; +static bool trans_vfmacc_vv(DisasContext *ctx, arg_vfmacc_vv *a); +typedef arg_rmrr arg_vfnmacc_vv; +static bool trans_vfnmacc_vv(DisasContext *ctx, arg_vfnmacc_vv *a); +typedef arg_rmrr arg_vfnmacc_vf; +static bool trans_vfnmacc_vf(DisasContext *ctx, arg_vfnmacc_vf *a); +typedef arg_rmrr arg_vfmacc_vf; +static bool trans_vfmacc_vf(DisasContext *ctx, arg_vfmacc_vf *a); +typedef arg_rmrr arg_vfmsac_vv; +static bool trans_vfmsac_vv(DisasContext *ctx, arg_vfmsac_vv *a); +typedef arg_rmrr arg_vfmsac_vf; +static bool trans_vfmsac_vf(DisasContext *ctx, arg_vfmsac_vf *a); +typedef arg_rmrr arg_vfnmsac_vv; +static bool trans_vfnmsac_vv(DisasContext *ctx, arg_vfnmsac_vv *a); +typedef arg_rmrr arg_vfnmsac_vf; +static bool trans_vfnmsac_vf(DisasContext *ctx, arg_vfnmsac_vf *a); +typedef arg_rmrr arg_vfmadd_vv; +static bool trans_vfmadd_vv(DisasContext *ctx, arg_vfmadd_vv *a); +typedef arg_rmrr arg_vfmadd_vf; +static bool trans_vfmadd_vf(DisasContext *ctx, arg_vfmadd_vf *a); +typedef arg_rmrr arg_vfnmadd_vv; +static bool trans_vfnmadd_vv(DisasContext *ctx, arg_vfnmadd_vv *a); +typedef arg_rmrr arg_vfnmadd_vf; +static bool trans_vfnmadd_vf(DisasContext *ctx, arg_vfnmadd_vf *a); +typedef arg_rmrr arg_vfmsub_vv; +static bool trans_vfmsub_vv(DisasContext *ctx, arg_vfmsub_vv *a); +typedef arg_rmrr arg_vfmsub_vf; +static bool trans_vfmsub_vf(DisasContext *ctx, arg_vfmsub_vf *a); +typedef arg_rmrr arg_vfnmsub_vv; +static bool trans_vfnmsub_vv(DisasContext *ctx, arg_vfnmsub_vv *a); +typedef arg_rmrr arg_vfnmsub_vf; +static bool trans_vfnmsub_vf(DisasContext *ctx, arg_vfnmsub_vf *a); +typedef arg_rmrr arg_vfwmacc_vv; +static bool trans_vfwmacc_vv(DisasContext *ctx, arg_vfwmacc_vv *a); +typedef arg_rmrr arg_vfwmacc_vf; +static bool trans_vfwmacc_vf(DisasContext *ctx, arg_vfwmacc_vf *a); +typedef arg_rmrr arg_vfwnmacc_vv; +static bool trans_vfwnmacc_vv(DisasContext *ctx, arg_vfwnmacc_vv *a); +typedef arg_rmrr arg_vfwnmacc_vf; +static bool trans_vfwnmacc_vf(DisasContext *ctx, arg_vfwnmacc_vf *a); +typedef arg_rmrr arg_vfwmsac_vv; +static bool trans_vfwmsac_vv(DisasContext *ctx, arg_vfwmsac_vv *a); +typedef arg_rmrr arg_vfwmsac_vf; +static bool trans_vfwmsac_vf(DisasContext *ctx, arg_vfwmsac_vf *a); +typedef arg_rmrr arg_vfwnmsac_vv; +static bool trans_vfwnmsac_vv(DisasContext *ctx, arg_vfwnmsac_vv *a); +typedef arg_rmrr arg_vfwnmsac_vf; +static bool trans_vfwnmsac_vf(DisasContext *ctx, arg_vfwnmsac_vf *a); +typedef arg_rmr arg_vfsqrt_v; +static bool trans_vfsqrt_v(DisasContext *ctx, arg_vfsqrt_v *a); +typedef arg_rmrr arg_vfmin_vv; +static bool trans_vfmin_vv(DisasContext *ctx, arg_vfmin_vv *a); +typedef arg_rmrr arg_vfmin_vf; +static bool trans_vfmin_vf(DisasContext *ctx, arg_vfmin_vf *a); +typedef arg_rmrr arg_vfmax_vv; +static bool trans_vfmax_vv(DisasContext *ctx, arg_vfmax_vv *a); +typedef arg_rmrr arg_vfmax_vf; +static bool trans_vfmax_vf(DisasContext *ctx, arg_vfmax_vf *a); +typedef arg_rmrr arg_vfsgnj_vv; +static bool trans_vfsgnj_vv(DisasContext *ctx, arg_vfsgnj_vv *a); +typedef arg_rmrr arg_vfsgnj_vf; +static bool trans_vfsgnj_vf(DisasContext *ctx, arg_vfsgnj_vf *a); +typedef arg_rmrr arg_vfsgnjn_vv; +static bool trans_vfsgnjn_vv(DisasContext *ctx, arg_vfsgnjn_vv *a); +typedef arg_rmrr arg_vfsgnjn_vf; +static bool trans_vfsgnjn_vf(DisasContext *ctx, arg_vfsgnjn_vf *a); +typedef arg_rmrr arg_vfsgnjx_vv; +static bool trans_vfsgnjx_vv(DisasContext *ctx, arg_vfsgnjx_vv *a); +typedef arg_rmrr arg_vfsgnjx_vf; +static bool trans_vfsgnjx_vf(DisasContext *ctx, arg_vfsgnjx_vf *a); +typedef arg_rmrr arg_vmfeq_vv; +static bool trans_vmfeq_vv(DisasContext *ctx, arg_vmfeq_vv *a); +typedef arg_rmrr arg_vmfeq_vf; +static bool trans_vmfeq_vf(DisasContext *ctx, arg_vmfeq_vf *a); +typedef arg_rmrr arg_vmfne_vv; +static bool trans_vmfne_vv(DisasContext *ctx, arg_vmfne_vv *a); +typedef arg_rmrr arg_vmfne_vf; +static bool trans_vmfne_vf(DisasContext *ctx, arg_vmfne_vf *a); +typedef arg_rmrr arg_vmflt_vv; +static bool trans_vmflt_vv(DisasContext *ctx, arg_vmflt_vv *a); +typedef arg_rmrr arg_vmflt_vf; +static bool trans_vmflt_vf(DisasContext *ctx, arg_vmflt_vf *a); +typedef arg_rmrr arg_vmfle_vv; +static bool trans_vmfle_vv(DisasContext *ctx, arg_vmfle_vv *a); +typedef arg_rmrr arg_vmfle_vf; +static bool trans_vmfle_vf(DisasContext *ctx, arg_vmfle_vf *a); +typedef arg_rmrr arg_vmfgt_vf; +static bool trans_vmfgt_vf(DisasContext *ctx, arg_vmfgt_vf *a); +typedef arg_rmrr arg_vmfge_vf; +static bool trans_vmfge_vf(DisasContext *ctx, arg_vmfge_vf *a); +typedef arg_rmrr arg_vmford_vv; +static bool trans_vmford_vv(DisasContext *ctx, arg_vmford_vv *a); +typedef arg_rmrr arg_vmford_vf; +static bool trans_vmford_vf(DisasContext *ctx, arg_vmford_vf *a); +typedef arg_rmr arg_vfclass_v; +static bool trans_vfclass_v(DisasContext *ctx, arg_vfclass_v *a); +typedef arg_rmrr arg_vfmerge_vfm; +static bool trans_vfmerge_vfm(DisasContext *ctx, arg_vfmerge_vfm *a); +typedef arg_decode_insn3218 arg_vfmv_v_f; +static bool trans_vfmv_v_f(DisasContext *ctx, arg_vfmv_v_f *a); +typedef arg_rmr arg_vfcvt_xu_f_v; +static bool trans_vfcvt_xu_f_v(DisasContext *ctx, arg_vfcvt_xu_f_v *a); +typedef arg_rmr arg_vfcvt_x_f_v; +static bool trans_vfcvt_x_f_v(DisasContext *ctx, arg_vfcvt_x_f_v *a); +typedef arg_rmr arg_vfcvt_f_xu_v; +static bool trans_vfcvt_f_xu_v(DisasContext *ctx, arg_vfcvt_f_xu_v *a); +typedef arg_rmr arg_vfcvt_f_x_v; +static bool trans_vfcvt_f_x_v(DisasContext *ctx, arg_vfcvt_f_x_v *a); +typedef arg_rmr arg_vfwcvt_xu_f_v; +static bool trans_vfwcvt_xu_f_v(DisasContext *ctx, arg_vfwcvt_xu_f_v *a); +typedef arg_rmr arg_vfwcvt_x_f_v; +static bool trans_vfwcvt_x_f_v(DisasContext *ctx, arg_vfwcvt_x_f_v *a); +typedef arg_rmr arg_vfwcvt_f_xu_v; +static bool trans_vfwcvt_f_xu_v(DisasContext *ctx, arg_vfwcvt_f_xu_v *a); +typedef arg_rmr arg_vfwcvt_f_x_v; +static bool trans_vfwcvt_f_x_v(DisasContext *ctx, arg_vfwcvt_f_x_v *a); +typedef arg_rmr arg_vfwcvt_f_f_v; +static bool trans_vfwcvt_f_f_v(DisasContext *ctx, arg_vfwcvt_f_f_v *a); +typedef arg_rmr arg_vfncvt_xu_f_v; +static bool trans_vfncvt_xu_f_v(DisasContext *ctx, arg_vfncvt_xu_f_v *a); +typedef arg_rmr arg_vfncvt_x_f_v; +static bool trans_vfncvt_x_f_v(DisasContext *ctx, arg_vfncvt_x_f_v *a); +typedef arg_rmr arg_vfncvt_f_xu_v; +static bool trans_vfncvt_f_xu_v(DisasContext *ctx, arg_vfncvt_f_xu_v *a); +typedef arg_rmr arg_vfncvt_f_x_v; +static bool trans_vfncvt_f_x_v(DisasContext *ctx, arg_vfncvt_f_x_v *a); +typedef arg_rmr arg_vfncvt_f_f_v; +static bool trans_vfncvt_f_f_v(DisasContext *ctx, arg_vfncvt_f_f_v *a); +typedef arg_rmrr arg_vredsum_vs; +static bool trans_vredsum_vs(DisasContext *ctx, arg_vredsum_vs *a); +typedef arg_rmrr arg_vredand_vs; +static bool trans_vredand_vs(DisasContext *ctx, arg_vredand_vs *a); +typedef arg_rmrr arg_vredor_vs; +static bool trans_vredor_vs(DisasContext *ctx, arg_vredor_vs *a); +typedef arg_rmrr arg_vredxor_vs; +static bool trans_vredxor_vs(DisasContext *ctx, arg_vredxor_vs *a); +typedef arg_rmrr arg_vredminu_vs; +static bool trans_vredminu_vs(DisasContext *ctx, arg_vredminu_vs *a); +typedef arg_rmrr arg_vredmin_vs; +static bool trans_vredmin_vs(DisasContext *ctx, arg_vredmin_vs *a); +typedef arg_rmrr arg_vredmaxu_vs; +static bool trans_vredmaxu_vs(DisasContext *ctx, arg_vredmaxu_vs *a); +typedef arg_rmrr arg_vredmax_vs; +static bool trans_vredmax_vs(DisasContext *ctx, arg_vredmax_vs *a); +typedef arg_rmrr arg_vwredsumu_vs; +static bool trans_vwredsumu_vs(DisasContext *ctx, arg_vwredsumu_vs *a); +typedef arg_rmrr arg_vwredsum_vs; +static bool trans_vwredsum_vs(DisasContext *ctx, arg_vwredsum_vs *a); +typedef arg_rmrr arg_vfredsum_vs; +static bool trans_vfredsum_vs(DisasContext *ctx, arg_vfredsum_vs *a); +typedef arg_rmrr arg_vfredmin_vs; +static bool trans_vfredmin_vs(DisasContext *ctx, arg_vfredmin_vs *a); +typedef arg_rmrr arg_vfredmax_vs; +static bool trans_vfredmax_vs(DisasContext *ctx, arg_vfredmax_vs *a); +typedef arg_rmrr arg_vfwredsum_vs; +static bool trans_vfwredsum_vs(DisasContext *ctx, arg_vfwredsum_vs *a); +typedef arg_r arg_vmand_mm; +static bool trans_vmand_mm(DisasContext *ctx, arg_vmand_mm *a); +typedef arg_r arg_vmnand_mm; +static bool trans_vmnand_mm(DisasContext *ctx, arg_vmnand_mm *a); +typedef arg_r arg_vmandnot_mm; +static bool trans_vmandnot_mm(DisasContext *ctx, arg_vmandnot_mm *a); +typedef arg_r arg_vmxor_mm; +static bool trans_vmxor_mm(DisasContext *ctx, arg_vmxor_mm *a); +typedef arg_r arg_vmor_mm; +static bool trans_vmor_mm(DisasContext *ctx, arg_vmor_mm *a); +typedef arg_r arg_vmnor_mm; +static bool trans_vmnor_mm(DisasContext *ctx, arg_vmnor_mm *a); +typedef arg_r arg_vmornot_mm; +static bool trans_vmornot_mm(DisasContext *ctx, arg_vmornot_mm *a); +typedef arg_r arg_vmxnor_mm; +static bool trans_vmxnor_mm(DisasContext *ctx, arg_vmxnor_mm *a); +typedef arg_rmr arg_vmpopc_m; +static bool trans_vmpopc_m(DisasContext *ctx, arg_vmpopc_m *a); +typedef arg_rmr arg_vmfirst_m; +static bool trans_vmfirst_m(DisasContext *ctx, arg_vmfirst_m *a); +typedef arg_rmr arg_vmsbf_m; +static bool trans_vmsbf_m(DisasContext *ctx, arg_vmsbf_m *a); +typedef arg_rmr arg_vmsif_m; +static bool trans_vmsif_m(DisasContext *ctx, arg_vmsif_m *a); +typedef arg_rmr arg_vmsof_m; +static bool trans_vmsof_m(DisasContext *ctx, arg_vmsof_m *a); +typedef arg_rmr arg_viota_m; +static bool trans_viota_m(DisasContext *ctx, arg_viota_m *a); +typedef arg_decode_insn3219 arg_vid_v; +static bool trans_vid_v(DisasContext *ctx, arg_vid_v *a); +typedef arg_r arg_vext_x_v; +static bool trans_vext_x_v(DisasContext *ctx, arg_vext_x_v *a); +typedef arg_decode_insn3218 arg_vmv_s_x; +static bool trans_vmv_s_x(DisasContext *ctx, arg_vmv_s_x *a); +typedef arg_decode_insn3220 arg_vfmv_f_s; +static bool trans_vfmv_f_s(DisasContext *ctx, arg_vfmv_f_s *a); +typedef arg_decode_insn3218 arg_vfmv_s_f; +static bool trans_vfmv_s_f(DisasContext *ctx, arg_vfmv_s_f *a); +typedef arg_rmrr arg_vslideup_vx; +static bool trans_vslideup_vx(DisasContext *ctx, arg_vslideup_vx *a); +typedef arg_rmrr arg_vslideup_vi; +static bool trans_vslideup_vi(DisasContext *ctx, arg_vslideup_vi *a); +typedef arg_rmrr arg_vslide1up_vx; +static bool trans_vslide1up_vx(DisasContext *ctx, arg_vslide1up_vx *a); +typedef arg_rmrr arg_vslidedown_vx; +static bool trans_vslidedown_vx(DisasContext *ctx, arg_vslidedown_vx *a); +typedef arg_rmrr arg_vslidedown_vi; +static bool trans_vslidedown_vi(DisasContext *ctx, arg_vslidedown_vi *a); +typedef arg_rmrr arg_vslide1down_vx; +static bool trans_vslide1down_vx(DisasContext *ctx, arg_vslide1down_vx *a); +typedef arg_rmrr arg_vrgather_vv; +static bool trans_vrgather_vv(DisasContext *ctx, arg_vrgather_vv *a); +typedef arg_rmrr arg_vrgather_vx; +static bool trans_vrgather_vx(DisasContext *ctx, arg_vrgather_vx *a); +typedef arg_rmrr arg_vrgather_vi; +static bool trans_vrgather_vi(DisasContext *ctx, arg_vrgather_vi *a); +typedef arg_r arg_vcompress_vm; +static bool trans_vcompress_vm(DisasContext *ctx, arg_vcompress_vm *a); +typedef arg_decode_insn3221 arg_vsetvli; +static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a); +typedef arg_r arg_vsetvl; +static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a); static void decode_insn32_extract_atom_ld(DisasContext *ctx, arg_atomic *a, uint32_t insn) { @@ -378,30 +1112,30 @@ static void decode_insn32_extract_b(DisasContext *ctx, arg_b *a, uint32_t insn) a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn329 *a, uint32_t insn) +static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) { a->csr = extract32(insn, 20, 12); a->rs1 = extract32(insn, 15, 5); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_decode_insn32_Fmt_18(DisasContext *ctx, arg_empty *a, uint32_t insn) +static void decode_insn32_extract_decode_insn32_Fmt_28(DisasContext *ctx, arg_empty *a, uint32_t insn) { } -static void decode_insn32_extract_decode_insn32_Fmt_19(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn) +static void decode_insn32_extract_decode_insn32_Fmt_29(DisasContext *ctx, arg_decode_insn3224 *a, uint32_t insn) { a->pred = extract32(insn, 24, 4); a->succ = extract32(insn, 20, 4); } -static void decode_insn32_extract_hfence_bvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_hfence_vvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -427,20 +1161,54 @@ static void decode_insn32_extract_r(DisasContext *ctx, arg_r *a, uint32_t insn) a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3213 *a, uint32_t insn) +static void decode_insn32_extract_r1_vm(DisasContext *ctx, arg_decode_insn3219 *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3218 *a, uint32_t insn) +{ + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2_nfvm(DisasContext *ctx, arg_r2nfvm *a, uint32_t insn) { + a->vm = extract32(insn, 25, 1); + a->nf = ex_plus_1(ctx, extract32(insn, 29, 3)); a->rs1 = extract32(insn, 15, 5); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3212 *a, uint32_t insn) +static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3217 *a, uint32_t insn) { a->rs1 = extract32(insn, 15, 5); a->rm = extract32(insn, 12, 3); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 *a, uint32_t insn) +static void decode_insn32_extract_r2_vm(DisasContext *ctx, arg_rmr *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2_zimm(DisasContext *ctx, arg_decode_insn3221 *a, uint32_t insn) +{ + a->zimm = extract32(insn, 20, 11); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2rd(DisasContext *ctx, arg_decode_insn3220 *a, uint32_t insn) +{ + a->rs2 = extract32(insn, 20, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn) { a->rs3 = extract32(insn, 27, 5); a->rs2 = extract32(insn, 20, 5); @@ -449,7 +1217,16 @@ static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 * a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a, uint32_t insn) +static void decode_insn32_extract_r_nfvm(DisasContext *ctx, arg_rnfvm *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->nf = ex_plus_1(ctx, extract32(insn, 29, 3)); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -457,6 +1234,39 @@ static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a a->rd = extract32(insn, 7, 5); } +static void decode_insn32_extract_r_vm(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_vm_0(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = 0; + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_vm_1(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = 1; + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_wdvm(DisasContext *ctx, arg_rwdvm *a, uint32_t insn) +{ + a->wd = extract32(insn, 26, 1); + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn) { a->imm = deposit32(extract32(insn, 7, 5), 5, 27, sextract32(insn, 25, 7)); @@ -464,12 +1274,12 @@ static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn) a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn) +static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3223 *a, uint32_t insn) { a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -493,18 +1303,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) union { arg_atomic f_atomic; arg_b f_b; - arg_decode_insn3210 f_decode_insn3210; - arg_decode_insn3211 f_decode_insn3211; - arg_decode_insn3212 f_decode_insn3212; - arg_decode_insn3213 f_decode_insn3213; arg_decode_insn3214 f_decode_insn3214; arg_decode_insn3215 f_decode_insn3215; arg_decode_insn3216 f_decode_insn3216; - arg_decode_insn329 f_decode_insn329; + arg_decode_insn3217 f_decode_insn3217; + arg_decode_insn3218 f_decode_insn3218; + arg_decode_insn3219 f_decode_insn3219; + arg_decode_insn3220 f_decode_insn3220; + arg_decode_insn3221 f_decode_insn3221; + arg_decode_insn3222 f_decode_insn3222; + arg_decode_insn3223 f_decode_insn3223; + arg_decode_insn3224 f_decode_insn3224; arg_empty f_empty; arg_i f_i; arg_j f_j; arg_r f_r; + arg_r2nfvm f_r2nfvm; + arg_rmr f_rmr; + arg_rmrr f_rmrr; + arg_rnfvm f_rnfvm; + arg_rwdvm f_rwdvm; arg_s f_s; arg_shift f_shift; arg_u f_u; @@ -517,45 +1335,227 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:96 */ if (trans_lb(ctx, &u.f_i)) return true; return false; case 0x1: /* ........ ........ .001.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:97 */ if (trans_lh(ctx, &u.f_i)) return true; return false; case 0x2: /* ........ ........ .010.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:98 */ if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x4: /* ........ ........ .100.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:99 */ if (trans_lbu(ctx, &u.f_i)) return true; return false; case 0x5: /* ........ ........ .101.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:100 */ if (trans_lhu(ctx, &u.f_i)) return true; return false; } return false; case 0x00000007: /* ........ ........ ........ .0000111 */ - decode_insn32_extract_i(ctx, &u.f_i, insn); switch ((insn >> 12) & 0x7) { + case 0x0: + /* ........ ........ .000.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .000.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .000.... .0000111 */ + if (trans_vlbu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .000.... .0000111 */ + if (trans_vlbuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsbu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxbu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .000.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .000.... .0000111 */ + if (trans_vlb_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .000.... .0000111 */ + if (trans_vlbff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsb_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; case 0x2: /* ........ ........ .010.... .0000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:156 */ + decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_flw(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:184 */ + decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; + case 0x5: + /* ........ ........ .101.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .101.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .101.... .0000111 */ + if (trans_vlhu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .101.... .0000111 */ + if (trans_vlhuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlshu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxhu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .101.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .101.... .0000111 */ + if (trans_vlh_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .101.... .0000111 */ + if (trans_vlhff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsh_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x6: + /* ........ ........ .110.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .110.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .110.... .0000111 */ + if (trans_vlwu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .110.... .0000111 */ + if (trans_vlwuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlswu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxwu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .110.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .110.... .0000111 */ + if (trans_vlw_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .110.... .0000111 */ + if (trans_vlwff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsw_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x7: + /* ........ ........ .111.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .111.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .111.... .0000111 */ + if (trans_vle_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .111.... .0000111 */ + if (trans_vleff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .111.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlse_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .111.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxe_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; } return false; case 0x0000000f: @@ -563,14 +1563,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:123 */ - decode_insn32_extract_decode_insn32_Fmt_19(ctx, &u.f_decode_insn3216, insn); - if (trans_fence(ctx, &u.f_decode_insn3216)) return true; + decode_insn32_extract_decode_insn32_Fmt_29(ctx, &u.f_decode_insn3224, insn); + if (trans_fence(ctx, &u.f_decode_insn3224)) return true; return false; case 0x1: /* ........ ........ .001.... .0001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:124 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); if (trans_fence_i(ctx, &u.f_empty)) return true; return false; } @@ -580,7 +1578,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:104 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -590,26 +1587,22 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 30) & 0x3) { case 0x0: /* 00...... ........ .001.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:110 */ if (trans_slli(ctx, &u.f_shift)) return true; return false; } return false; case 0x2: /* ........ ........ .010.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:105 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_slti(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:106 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_sltiu(ctx, &u.f_i)) return true; return false; case 0x4: /* ........ ........ .100.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:107 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_xori(ctx, &u.f_i)) return true; return false; @@ -619,25 +1612,21 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 30) & 0x3) { case 0x0: /* 00...... ........ .101.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:111 */ if (trans_srli(ctx, &u.f_shift)) return true; return false; case 0x1: /* 01...... ........ .101.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:112 */ if (trans_srai(ctx, &u.f_shift)) return true; return false; } return false; case 0x6: /* ........ ........ .110.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:108 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_ori(ctx, &u.f_i)) return true; return false; case 0x7: /* ........ ........ .111.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:109 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_andi(ctx, &u.f_i)) return true; return false; @@ -645,7 +1634,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) return false; case 0x00000017: /* ........ ........ ........ .0010111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:87 */ decode_insn32_extract_u(ctx, &u.f_u, insn); if (trans_auipc(ctx, &u.f_u)) return true; return false; @@ -655,35 +1643,151 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:101 */ if (trans_sb(ctx, &u.f_s)) return true; return false; case 0x1: /* ........ ........ .001.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:102 */ if (trans_sh(ctx, &u.f_s)) return true; return false; case 0x2: /* ........ ........ .010.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:103 */ if (trans_sw(ctx, &u.f_s)) return true; return false; } return false; case 0x00000027: /* ........ ........ ........ .0100111 */ - decode_insn32_extract_s(ctx, &u.f_s, insn); switch ((insn >> 12) & 0x7) { + case 0x0: + /* ........ ........ .000.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .000.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .000.... .0100111 */ + if (trans_vsb_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .000.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .000.... .0100111 */ + if (trans_vssb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .000.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; case 0x2: /* ........ ........ .010.... .0100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:157 */ + decode_insn32_extract_s(ctx, &u.f_s, insn); if (trans_fsw(ctx, &u.f_s)) return true; return false; case 0x3: /* ........ ........ .011.... .0100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:185 */ + decode_insn32_extract_s(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; + case 0x5: + /* ........ ........ .101.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .101.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .101.... .0100111 */ + if (trans_vsh_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .101.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .101.... .0100111 */ + if (trans_vssh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .101.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x6: + /* ........ ........ .110.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .110.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .110.... .0100111 */ + if (trans_vsw_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .110.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .110.... .0100111 */ + if (trans_vssw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .110.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x7: + /* ........ ........ .111.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .111.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .111.... .0100111 */ + if (trans_vse_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .111.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .111.... .0100111 */ + if (trans_vsse_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .111.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxe_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; } return false; case 0x0000002f: @@ -691,75 +1795,109 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xf8007000) { case 0x00002000: /* 00000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:146 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoadd_w(ctx, &u.f_atomic)) return true; return false; + case 0x00006000: + /* 00000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoaddw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x08002000: /* 00001... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:145 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoswap_w(ctx, &u.f_atomic)) return true; return false; + case 0x08006000: + /* 00001... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoswapw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x10002000: /* 00010... ........ .010.... .0101111 */ decode_insn32_extract_atom_ld(ctx, &u.f_atomic, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 00010..0 0000.... .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:143 */ if (trans_lr_w(ctx, &u.f_atomic)) return true; return false; } return false; case 0x18002000: /* 00011... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:144 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_sc_w(ctx, &u.f_atomic)) return true; return false; case 0x20002000: /* 00100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:147 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoxor_w(ctx, &u.f_atomic)) return true; return false; + case 0x20006000: + /* 00100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoxorw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x40002000: /* 01000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:149 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoor_w(ctx, &u.f_atomic)) return true; return false; + case 0x40006000: + /* 01000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoorw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x60002000: /* 01100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:148 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoand_w(ctx, &u.f_atomic)) return true; return false; + case 0x60006000: + /* 01100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoandw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x80002000: /* 10000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:150 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomin_w(ctx, &u.f_atomic)) return true; return false; + case 0x80006000: + /* 10000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xa0002000: /* 10100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:151 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomax_w(ctx, &u.f_atomic)) return true; return false; + case 0xa0006000: + /* 10100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xc0002000: /* 11000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:152 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amominu_w(ctx, &u.f_atomic)) return true; return false; + case 0xc0006000: + /* 11000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominuw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xe0002000: /* 11100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:153 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomaxu_w(ctx, &u.f_atomic)) return true; return false; + case 0xe0006000: + /* 11100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxuw_v(ctx, &u.f_rwdvm)) return true; + return false; } return false; case 0x00000033: @@ -768,163 +1906,136 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe007000) { case 0x00000000: /* 0000000. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:113 */ if (trans_add(ctx, &u.f_r)) return true; return false; case 0x00001000: /* 0000000. ........ .001.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:115 */ if (trans_sll(ctx, &u.f_r)) return true; return false; case 0x00002000: /* 0000000. ........ .010.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:116 */ if (trans_slt(ctx, &u.f_r)) return true; return false; case 0x00003000: /* 0000000. ........ .011.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:117 */ if (trans_sltu(ctx, &u.f_r)) return true; return false; case 0x00004000: /* 0000000. ........ .100.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:118 */ if (trans_xor(ctx, &u.f_r)) return true; return false; case 0x00005000: /* 0000000. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:119 */ if (trans_srl(ctx, &u.f_r)) return true; return false; case 0x00006000: /* 0000000. ........ .110.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:121 */ if (trans_or(ctx, &u.f_r)) return true; return false; case 0x00007000: /* 0000000. ........ .111.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:122 */ if (trans_and(ctx, &u.f_r)) return true; return false; case 0x02000000: /* 0000001. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:133 */ if (trans_mul(ctx, &u.f_r)) return true; return false; case 0x02001000: /* 0000001. ........ .001.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:134 */ if (trans_mulh(ctx, &u.f_r)) return true; return false; case 0x02002000: /* 0000001. ........ .010.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:135 */ if (trans_mulhsu(ctx, &u.f_r)) return true; return false; case 0x02003000: /* 0000001. ........ .011.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:136 */ if (trans_mulhu(ctx, &u.f_r)) return true; return false; case 0x02004000: /* 0000001. ........ .100.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:137 */ if (trans_div(ctx, &u.f_r)) return true; return false; case 0x02005000: /* 0000001. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:138 */ if (trans_divu(ctx, &u.f_r)) return true; return false; case 0x02006000: /* 0000001. ........ .110.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:139 */ if (trans_rem(ctx, &u.f_r)) return true; return false; case 0x02007000: /* 0000001. ........ .111.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:140 */ if (trans_remu(ctx, &u.f_r)) return true; return false; case 0x40000000: /* 0100000. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:114 */ if (trans_sub(ctx, &u.f_r)) return true; return false; case 0x40005000: /* 0100000. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:120 */ if (trans_sra(ctx, &u.f_r)) return true; return false; } return false; case 0x00000037: /* ........ ........ ........ .0110111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:86 */ decode_insn32_extract_u(ctx, &u.f_u, insn); if (trans_lui(ctx, &u.f_u)) return true; return false; case 0x00000043: /* ........ ........ ........ .1000011 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:158 */ - if (trans_fmadd_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmadd_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:186 */ - if (trans_fmadd_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmadd_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x00000047: /* ........ ........ ........ .1000111 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:159 */ - if (trans_fmsub_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmsub_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:187 */ - if (trans_fmsub_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmsub_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x0000004b: /* ........ ........ ........ .1001011 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1001011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:160 */ - if (trans_fnmsub_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmsub_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1001011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:188 */ - if (trans_fnmsub_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmsub_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x0000004f: /* ........ ........ ........ .1001111 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:161 */ - if (trans_fnmadd_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmadd_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:189 */ - if (trans_fnmadd_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmadd_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; @@ -933,51 +2044,43 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 25) & 0x7f) { case 0x0: /* 0000000. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:162 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fadd_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fadd_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x1: /* 0000001. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:190 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fadd_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fadd_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x4: /* 0000100. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:163 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fsub_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fsub_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x5: /* 0000101. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:191 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fsub_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fsub_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x8: /* 0001000. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:164 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fmul_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fmul_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x9: /* 0001001. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:192 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fmul_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fmul_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0xc: /* 0001100. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:165 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fdiv_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fdiv_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0xd: /* 0001101. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:193 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fdiv_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fdiv_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x10: /* 0010000. ........ ........ .1010011 */ @@ -985,17 +2088,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010000. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:167 */ if (trans_fsgnj_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010000. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:168 */ if (trans_fsgnjn_s(ctx, &u.f_r)) return true; return false; case 0x2: /* 0010000. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:169 */ if (trans_fsgnjx_s(ctx, &u.f_r)) return true; return false; } @@ -1006,17 +2106,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010001. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:195 */ if (trans_fsgnj_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010001. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:196 */ if (trans_fsgnjn_d(ctx, &u.f_r)) return true; return false; case 0x2: /* 0010001. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:197 */ if (trans_fsgnjx_d(ctx, &u.f_r)) return true; return false; } @@ -1027,12 +2124,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010100. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:170 */ if (trans_fmin_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010100. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:171 */ if (trans_fmax_s(ctx, &u.f_r)) return true; return false; } @@ -1043,57 +2138,51 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010101. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:198 */ if (trans_fmin_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010101. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:199 */ if (trans_fmax_d(ctx, &u.f_r)) return true; return false; } return false; case 0x20: /* 0100000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x1: /* 01000000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:200 */ - if (trans_fcvt_s_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x21: /* 0100001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01000010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:201 */ - if (trans_fcvt_d_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x2c: /* 0101100. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01011000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:166 */ - if (trans_fsqrt_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fsqrt_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x2d: /* 0101101. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01011010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:194 */ - if (trans_fsqrt_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fsqrt_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; @@ -1103,17 +2192,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 1010000. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:177 */ if (trans_fle_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 1010000. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:176 */ if (trans_flt_s(ctx, &u.f_r)) return true; return false; case 0x2: /* 1010000. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:175 */ if (trans_feq_s(ctx, &u.f_r)) return true; return false; } @@ -1124,120 +2210,1726 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 1010001. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:204 */ if (trans_fle_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 1010001. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:203 */ if (trans_flt_d(ctx, &u.f_r)) return true; return false; case 0x2: /* 1010001. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:202 */ if (trans_feq_d(ctx, &u.f_r)) return true; return false; } return false; case 0x60: /* 1100000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11000000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:172 */ - if (trans_fcvt_w_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_w_s(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11000000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:173 */ - if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x61: /* 1100001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11000010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:206 */ - if (trans_fcvt_w_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_w_d(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11000010 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:207 */ - if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x68: /* 1101000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11010000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:179 */ - if (trans_fcvt_s_w(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_w(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11010000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:180 */ - if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x69: /* 1101001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11010010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:208 */ - if (trans_fcvt_d_w(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_w(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11010010 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:209 */ - if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x70: /* 1110000. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11100000 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:174 */ - if (trans_fmv_x_w(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_x_w(ctx, &u.f_decode_insn3218)) return true; return false; case 0x00001000: /* 11100000 0000.... .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:178 */ - if (trans_fclass_s(ctx, &u.f_decode_insn3213)) return true; + if (trans_fclass_s(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x71: /* 1110001. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00001000: /* 11100010 0000.... .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:205 */ - if (trans_fclass_d(ctx, &u.f_decode_insn3213)) return true; + if (trans_fclass_d(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x78: /* 1111000. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11110000 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:181 */ - if (trans_fmv_w_x(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_w_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x00000057: + /* ........ ........ ........ .1010111 */ + switch (insn & 0x80007000) { + case 0x00000000: + /* 0....... ........ .000.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vminu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmin_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmaxu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmax_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .000.... .1010111 */ + if (trans_vadc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .000.... .1010111 */ + if (trans_vmadc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x12: + /* 010010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100101. ........ .000.... .1010111 */ + if (trans_vsbc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x13: + /* 010011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100111. ........ .000.... .1010111 */ + if (trans_vmsbc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .000.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vvm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .000.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .000.... .1010111 */ + if (trans_vmv_v_v(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsltu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmslt_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00001000: + /* 0....... ........ .001.... .1010111 */ + switch (insn & 0x74000000) { + case 0x00000000: + /* 0000.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000000.. ........ .001.... .1010111 */ + if (trans_vfadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000010.. ........ .001.... .1010111 */ + if (trans_vfsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x04000000: + /* 0000.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x10000000: + /* 0001.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000100.. ........ .001.... .1010111 */ + if (trans_vfmin_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000110.. ........ .001.... .1010111 */ + if (trans_vfmax_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x14000000: + /* 0001.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000101.. ........ .001.... .1010111 */ + if (trans_vfredmin_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000111.. ........ .001.... .1010111 */ + if (trans_vfredmax_vs(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x20000000: + /* 0010.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 001000.. ........ .001.... .1010111 */ + if (trans_vfsgnj_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 001010.. ........ .001.... .1010111 */ + if (trans_vfsgnjx_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x24000000: + /* 0010.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 001001.. ........ .001.... .1010111 */ + if (trans_vfsgnjn_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x30000000: + /* 0011.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r2rd(ctx, &u.f_decode_insn3220, insn); + switch (insn & 0x0a0f8000) { + case 0x02000000: + /* 0011001. ....0000 0001.... .1010111 */ + if (trans_vfmv_f_s(ctx, &u.f_decode_insn3220)) return true; + return false; + } + return false; + case 0x60000000: + /* 0110.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011000.. ........ .001.... .1010111 */ + if (trans_vmfeq_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 011010.. ........ .001.... .1010111 */ + if (trans_vmford_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x64000000: + /* 0110.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011001.. ........ .001.... .1010111 */ + if (trans_vmfle_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 011011.. ........ .001.... .1010111 */ + if (trans_vmflt_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x70000000: + /* 0111.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011100.. ........ .001.... .1010111 */ + if (trans_vmfne_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + } + return false; + case 0x00002000: + /* 0....... ........ .010.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000001.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredand_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredor_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredxor_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredminu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmin_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmaxu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmax_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0011001. ........ .010.... .1010111 */ + if (trans_vext_x_v(ctx, &u.f_r)) return true; + return false; + } + return false; + case 0x14: + /* 010100.. ........ .010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmpopc_m(ctx, &u.f_rmr)) return true; + return false; + case 0x15: + /* 010101.. ........ .010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmfirst_m(ctx, &u.f_rmr)) return true; + return false; + case 0x16: + /* 010110.. ........ .010.... .1010111 */ + switch ((insn >> 15) & 0x1f) { + case 0x1: + /* 010110.. ....0000 1010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsbf_m(ctx, &u.f_rmr)) return true; + return false; + case 0x2: + /* 010110.. ....0001 0010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsof_m(ctx, &u.f_rmr)) return true; + return false; + case 0x3: + /* 010110.. ....0001 1010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsif_m(ctx, &u.f_rmr)) return true; + return false; + case 0x10: + /* 010110.. ....1000 0010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_viota_m(ctx, &u.f_rmr)) return true; + return false; + case 0x11: + /* 010110.. ....1000 1010.... .1010111 */ + decode_insn32_extract_r1_vm(ctx, &u.f_decode_insn3219, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 010110.0 00001000 1010.... .1010111 */ + if (trans_vid_v(ctx, &u.f_decode_insn3219)) return true; + return false; + } + return false; + } + return false; + case 0x17: + /* 010111.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vcompress_vm(ctx, &u.f_r)) return true; + return false; + case 0x18: + /* 011000.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmandnot_mm(ctx, &u.f_r)) return true; + return false; + case 0x19: + /* 011001.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmand_mm(ctx, &u.f_r)) return true; + return false; + case 0x1a: + /* 011010.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1b: + /* 011011.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmxor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1c: + /* 011100.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmornot_mm(ctx, &u.f_r)) return true; + return false; + case 0x1d: + /* 011101.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmnand_mm(ctx, &u.f_r)) return true; + return false; + case 0x1e: + /* 011110.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmnor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1f: + /* 011111.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmxnor_mm(ctx, &u.f_r)) return true; + return false; + } + return false; + case 0x00003000: + /* 0....... ........ .011.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrsub_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 001110.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslideup_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslidedown_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .011.... .1010111 */ + if (trans_vadc_vim(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .011.... .1010111 */ + if (trans_vmadc_vim(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .011.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vim(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .011.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .011.... .1010111 */ + if (trans_vmv_v_i(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 011110.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgtu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgt_vi(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00004000: + /* 0....... ........ .100.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vminu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmin_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmaxu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmax_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 001110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslideup_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslidedown_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .100.... .1010111 */ + if (trans_vadc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .100.... .1010111 */ + if (trans_vmadc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x12: + /* 010010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100101. ........ .100.... .1010111 */ + if (trans_vsbc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x13: + /* 010011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100111. ........ .100.... .1010111 */ + if (trans_vmsbc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .100.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vxm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .100.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .100.... .1010111 */ + if (trans_vmv_v_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsltu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmslt_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 011110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgtu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgt_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00005000: + /* 0....... ........ .101.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfmin_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfmax_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 001000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnj_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnjn_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnjx_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 001101.. ........ .101.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x3f) { + case 0x20: + /* 00110110 0000.... .101.... .1010111 */ + if (trans_vfmv_s_f(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .101.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vfmerge_vfm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .101.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .101.... .1010111 */ + if (trans_vfmv_v_f(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfeq_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfle_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmford_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmflt_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfne_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfgt_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfge_vf(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00006000: + /* 0....... ........ .110.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0xd: + /* 001101.. ........ .110.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x3f) { + case 0x20: + /* 00110110 0000.... .110.... .1010111 */ + if (trans_vmv_s_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + case 0xe: + /* 001110.. ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslide1up_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslide1down_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00007000: + /* 0....... ........ .111.... .1010111 */ + decode_insn32_extract_r2_zimm(ctx, &u.f_decode_insn3221, insn); + if (trans_vsetvli(ctx, &u.f_decode_insn3221)) return true; + return false; + case 0x80000000: + /* 1....... ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .000.... .1010111 */ + if (trans_vsaddu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .000.... .1010111 */ + if (trans_vsadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .000.... .1010111 */ + if (trans_vssubu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .000.... .1010111 */ + if (trans_vssub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .000.... .1010111 */ + if (trans_vaadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .000.... .1010111 */ + if (trans_vsll_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .000.... .1010111 */ + if (trans_vasub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .000.... .1010111 */ + if (trans_vsmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .000.... .1010111 */ + if (trans_vsrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .000.... .1010111 */ + if (trans_vsra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .000.... .1010111 */ + if (trans_vssrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .000.... .1010111 */ + if (trans_vssra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .000.... .1010111 */ + if (trans_vnsrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .000.... .1010111 */ + if (trans_vnsra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .000.... .1010111 */ + if (trans_vnclipu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .000.... .1010111 */ + if (trans_vnclip_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .000.... .1010111 */ + if (trans_vwredsumu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .000.... .1010111 */ + if (trans_vwredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .000.... .1010111 */ + if (trans_vwsmaccu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .000.... .1010111 */ + if (trans_vwsmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .000.... .1010111 */ + if (trans_vwsmaccsu_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80001000: + /* 1....... ........ .001.... .1010111 */ + switch (insn & 0x74000000) { + case 0x00000000: + /* 1000.0.. ........ .001.... .1010111 */ + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 100000.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfdiv_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100010.. ........ .001.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + switch ((insn >> 15) & 0x1f) { + case 0x0: + /* 100010.. ....0000 0001.... .1010111 */ + if (trans_vfcvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x1: + /* 100010.. ....0000 1001.... .1010111 */ + if (trans_vfcvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x2: + /* 100010.. ....0001 0001.... .1010111 */ + if (trans_vfcvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0x3: + /* 100010.. ....0001 1001.... .1010111 */ + if (trans_vfcvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0x8: + /* 100010.. ....0100 0001.... .1010111 */ + if (trans_vfwcvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x9: + /* 100010.. ....0100 1001.... .1010111 */ + if (trans_vfwcvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0xa: + /* 100010.. ....0101 0001.... .1010111 */ + if (trans_vfwcvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0xb: + /* 100010.. ....0101 1001.... .1010111 */ + if (trans_vfwcvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0xc: + /* 100010.. ....0110 0001.... .1010111 */ + if (trans_vfwcvt_f_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x10: + /* 100010.. ....1000 0001.... .1010111 */ + if (trans_vfncvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x11: + /* 100010.. ....1000 1001.... .1010111 */ + if (trans_vfncvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x12: + /* 100010.. ....1001 0001.... .1010111 */ + if (trans_vfncvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0x13: + /* 100010.. ....1001 1001.... .1010111 */ + if (trans_vfncvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0x14: + /* 100010.. ....1010 0001.... .1010111 */ + if (trans_vfncvt_f_f_v(ctx, &u.f_rmr)) return true; + return false; + } + return false; + } + return false; + case 0x04000000: + /* 1000.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + switch (insn & 0x080f8000) { + case 0x08000000: + /* 100011.. ....0000 0001.... .1010111 */ + if (trans_vfsqrt_v(ctx, &u.f_rmr)) return true; + return false; + case 0x08080000: + /* 100011.. ....1000 0001.... .1010111 */ + if (trans_vfclass_v(ctx, &u.f_rmr)) return true; + return false; + } + return false; + case 0x10000000: + /* 1001.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 100100.. ........ .001.... .1010111 */ + if (trans_vfmul_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x20000000: + /* 1010.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101000.. ........ .001.... .1010111 */ + if (trans_vfmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101010.. ........ .001.... .1010111 */ + if (trans_vfmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x24000000: + /* 1010.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101001.. ........ .001.... .1010111 */ + if (trans_vfnmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101011.. ........ .001.... .1010111 */ + if (trans_vfnmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x30000000: + /* 1011.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101100.. ........ .001.... .1010111 */ + if (trans_vfmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101110.. ........ .001.... .1010111 */ + if (trans_vfmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x34000000: + /* 1011.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101101.. ........ .001.... .1010111 */ + if (trans_vfnmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101111.. ........ .001.... .1010111 */ + if (trans_vfnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x40000000: + /* 1100.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 110000.. ........ .001.... .1010111 */ + if (trans_vfwadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 110010.. ........ .001.... .1010111 */ + if (trans_vfwsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x44000000: + /* 1100.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfwredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x50000000: + /* 1101.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 110100.. ........ .001.... .1010111 */ + if (trans_vfwadd_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 110110.. ........ .001.... .1010111 */ + if (trans_vfwsub_wv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x60000000: + /* 1110.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111000.. ........ .001.... .1010111 */ + if (trans_vfwmul_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x70000000: + /* 1111.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111100.. ........ .001.... .1010111 */ + if (trans_vfwmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 111110.. ........ .001.... .1010111 */ + if (trans_vfwmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x74000000: + /* 1111.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111101.. ........ .001.... .1010111 */ + if (trans_vfwnmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 111111.. ........ .001.... .1010111 */ + if (trans_vfwnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + } + return false; + case 0x80002000: + /* 1....... ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .010.... .1010111 */ + if (trans_vdivu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .010.... .1010111 */ + if (trans_vdiv_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .010.... .1010111 */ + if (trans_vremu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .010.... .1010111 */ + if (trans_vrem_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .010.... .1010111 */ + if (trans_vmulhu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .010.... .1010111 */ + if (trans_vmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .010.... .1010111 */ + if (trans_vmulhsu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .010.... .1010111 */ + if (trans_vmulh_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .010.... .1010111 */ + if (trans_vmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .010.... .1010111 */ + if (trans_vnmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .010.... .1010111 */ + if (trans_vmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .010.... .1010111 */ + if (trans_vnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .010.... .1010111 */ + if (trans_vwaddu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .010.... .1010111 */ + if (trans_vwadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .010.... .1010111 */ + if (trans_vwsubu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x13: + /* 110011.. ........ .010.... .1010111 */ + if (trans_vwsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .010.... .1010111 */ + if (trans_vwaddu_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x15: + /* 110101.. ........ .010.... .1010111 */ + if (trans_vwadd_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .010.... .1010111 */ + if (trans_vwsubu_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x17: + /* 110111.. ........ .010.... .1010111 */ + if (trans_vwsub_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .010.... .1010111 */ + if (trans_vwmulu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 111010.. ........ .010.... .1010111 */ + if (trans_vwmulsu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 111011.. ........ .010.... .1010111 */ + if (trans_vwmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .010.... .1010111 */ + if (trans_vwmaccu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .010.... .1010111 */ + if (trans_vwmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .010.... .1010111 */ + if (trans_vwmaccsu_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80003000: + /* 1....... ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .011.... .1010111 */ + if (trans_vsaddu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .011.... .1010111 */ + if (trans_vsadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .011.... .1010111 */ + if (trans_vaadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .011.... .1010111 */ + if (trans_vsll_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .011.... .1010111 */ + if (trans_vsrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .011.... .1010111 */ + if (trans_vsra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .011.... .1010111 */ + if (trans_vssrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .011.... .1010111 */ + if (trans_vssra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .011.... .1010111 */ + if (trans_vnsrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .011.... .1010111 */ + if (trans_vnsra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .011.... .1010111 */ + if (trans_vnclipu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .011.... .1010111 */ + if (trans_vnclip_vi(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80004000: + /* 1....... ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .100.... .1010111 */ + if (trans_vsaddu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .100.... .1010111 */ + if (trans_vsadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .100.... .1010111 */ + if (trans_vssubu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .100.... .1010111 */ + if (trans_vssub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .100.... .1010111 */ + if (trans_vaadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .100.... .1010111 */ + if (trans_vsll_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .100.... .1010111 */ + if (trans_vasub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .100.... .1010111 */ + if (trans_vsmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .100.... .1010111 */ + if (trans_vsrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .100.... .1010111 */ + if (trans_vsra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .100.... .1010111 */ + if (trans_vssrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .100.... .1010111 */ + if (trans_vssra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .100.... .1010111 */ + if (trans_vnsrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .100.... .1010111 */ + if (trans_vnsra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .100.... .1010111 */ + if (trans_vnclipu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .100.... .1010111 */ + if (trans_vnclip_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .100.... .1010111 */ + if (trans_vwsmaccu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .100.... .1010111 */ + if (trans_vwsmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .100.... .1010111 */ + if (trans_vwsmaccsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .100.... .1010111 */ + if (trans_vwsmaccus_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80005000: + /* 1....... ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .101.... .1010111 */ + if (trans_vfdiv_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .101.... .1010111 */ + if (trans_vfrdiv_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .101.... .1010111 */ + if (trans_vfmul_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .101.... .1010111 */ + if (trans_vfrsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .101.... .1010111 */ + if (trans_vfmadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .101.... .1010111 */ + if (trans_vfnmadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .101.... .1010111 */ + if (trans_vfmsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .101.... .1010111 */ + if (trans_vfnmsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .101.... .1010111 */ + if (trans_vfmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .101.... .1010111 */ + if (trans_vfnmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .101.... .1010111 */ + if (trans_vfmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .101.... .1010111 */ + if (trans_vfnmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .101.... .1010111 */ + if (trans_vfwadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .101.... .1010111 */ + if (trans_vfwsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .101.... .1010111 */ + if (trans_vfwadd_wf(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .101.... .1010111 */ + if (trans_vfwsub_wf(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .101.... .1010111 */ + if (trans_vfwmul_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .101.... .1010111 */ + if (trans_vfwmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .101.... .1010111 */ + if (trans_vfwnmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .101.... .1010111 */ + if (trans_vfwmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .101.... .1010111 */ + if (trans_vfwnmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80006000: + /* 1....... ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .110.... .1010111 */ + if (trans_vdivu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .110.... .1010111 */ + if (trans_vdiv_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .110.... .1010111 */ + if (trans_vremu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .110.... .1010111 */ + if (trans_vrem_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .110.... .1010111 */ + if (trans_vmulhu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .110.... .1010111 */ + if (trans_vmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .110.... .1010111 */ + if (trans_vmulhsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .110.... .1010111 */ + if (trans_vmulh_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .110.... .1010111 */ + if (trans_vmadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .110.... .1010111 */ + if (trans_vnmsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .110.... .1010111 */ + if (trans_vmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .110.... .1010111 */ + if (trans_vnmsac_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .110.... .1010111 */ + if (trans_vwaddu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .110.... .1010111 */ + if (trans_vwadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .110.... .1010111 */ + if (trans_vwsubu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x13: + /* 110011.. ........ .110.... .1010111 */ + if (trans_vwsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .110.... .1010111 */ + if (trans_vwaddu_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x15: + /* 110101.. ........ .110.... .1010111 */ + if (trans_vwadd_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .110.... .1010111 */ + if (trans_vwsubu_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x17: + /* 110111.. ........ .110.... .1010111 */ + if (trans_vwsub_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .110.... .1010111 */ + if (trans_vwmulu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 111010.. ........ .110.... .1010111 */ + if (trans_vwmulsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 111011.. ........ .110.... .1010111 */ + if (trans_vwmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .110.... .1010111 */ + if (trans_vwmaccu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .110.... .1010111 */ + if (trans_vwmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .110.... .1010111 */ + if (trans_vwmaccsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .110.... .1010111 */ + if (trans_vwmaccus_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80007000: + /* 1....... ........ .111.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + switch ((insn >> 25) & 0x3f) { + case 0x0: + /* 1000000. ........ .111.... .1010111 */ + if (trans_vsetvl(ctx, &u.f_r)) return true; return false; } return false; @@ -1249,32 +3941,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:90 */ if (trans_beq(ctx, &u.f_b)) return true; return false; case 0x1: /* ........ ........ .001.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:91 */ if (trans_bne(ctx, &u.f_b)) return true; return false; case 0x4: /* ........ ........ .100.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:92 */ if (trans_blt(ctx, &u.f_b)) return true; return false; case 0x5: /* ........ ........ .101.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:93 */ if (trans_bge(ctx, &u.f_b)) return true; return false; case 0x6: /* ........ ........ .110.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:94 */ if (trans_bltu(ctx, &u.f_b)) return true; return false; case 0x7: /* ........ ........ .111.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:95 */ if (trans_bgeu(ctx, &u.f_b)) return true; return false; } @@ -1285,14 +3971,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .1100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:89 */ if (trans_jalr(ctx, &u.f_i)) return true; return false; } return false; case 0x0000006f: /* ........ ........ ........ .1101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:88 */ decode_insn32_extract_j(ctx, &u.f_j, insn); if (trans_jal(ctx, &u.f_j)) return true; return false; @@ -1304,21 +3988,18 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe000f80) { case 0x00000000: /* 0000000. ........ .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x3ff) { case 0x0: /* 00000000 00000000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:74 */ if (trans_ecall(ctx, &u.f_empty)) return true; return false; case 0x20: /* 00000000 00010000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:75 */ if (trans_ebreak(ctx, &u.f_empty)) return true; return false; case 0x40: /* 00000000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:76 */ if (trans_uret(ctx, &u.f_empty)) return true; return false; } @@ -1328,28 +4009,25 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 20) & 0x1f) { case 0x2: /* 00010000 0010.... .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x1f) { case 0x0: /* 00010000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:77 */ if (trans_sret(ctx, &u.f_empty)) return true; return false; } return false; case 0x4: /* 00010000 0100.... .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:83 */ - decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3215, insn); - if (trans_sfence_vm(ctx, &u.f_decode_insn3215)) return true; + decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3223, insn); + if (trans_sfence_vm(ctx, &u.f_decode_insn3223)) return true; return false; case 0x5: /* 00010000 0101.... .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x1f) { case 0x0: /* 00010000 01010000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:79 */ if (trans_wfi(ctx, &u.f_empty)) return true; return false; } @@ -1358,70 +4036,60 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) return false; case 0x12000000: /* 0001001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:82 */ - decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3214, insn); - if (trans_sfence_vma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3222, insn); + if (trans_sfence_vma(ctx, &u.f_decode_insn3222)) return true; return false; case 0x22000000: /* 0010001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:81 */ - decode_insn32_extract_hfence_bvma(ctx, &u.f_decode_insn3214, insn); - if (trans_hfence_bvma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_hfence_vvma(ctx, &u.f_decode_insn3222, insn); + if (trans_hfence_vvma(ctx, &u.f_decode_insn3222)) return true; return false; case 0x30000000: /* 0011000. ........ .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x3ff) { case 0x40: /* 00110000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:78 */ if (trans_mret(ctx, &u.f_empty)) return true; return false; } return false; case 0x62000000: /* 0110001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:80 */ - decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3214, insn); - if (trans_hfence_gvma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3222, insn); + if (trans_hfence_gvma(ctx, &u.f_decode_insn3222)) return true; return false; } return false; case 0x1: /* ........ ........ .001.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:125 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrw(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrw(ctx, &u.f_decode_insn3214)) return true; return false; case 0x2: /* ........ ........ .010.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:126 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrs(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrs(ctx, &u.f_decode_insn3214)) return true; return false; case 0x3: /* ........ ........ .011.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:127 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrc(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrc(ctx, &u.f_decode_insn3214)) return true; return false; case 0x5: /* ........ ........ .101.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:128 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrwi(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrwi(ctx, &u.f_decode_insn3214)) return true; return false; case 0x6: /* ........ ........ .110.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:129 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrsi(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrsi(ctx, &u.f_decode_insn3214)) return true; return false; case 0x7: /* ........ ........ .111.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:130 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrci(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrci(ctx, &u.f_decode_insn3214)) return true; return false; } return false; diff --git a/qemu/target/riscv/riscv64/decode_insn16.inc.c b/qemu/target/riscv/riscv64/decode_insn16.inc.c index 719388566f..a3bfbd0d3f 100644 --- a/qemu/target/riscv/riscv64/decode_insn16.inc.c +++ b/qemu/target/riscv/riscv64/decode_insn16.inc.c @@ -1,11 +1,9 @@ /* This file is autogenerated by scripts/decodetree.py. */ -#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wredundant-decls" -# ifdef __clang__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#ifdef __clang__ # pragma GCC diagnostic ignored "-Wtypedef-redefinition" -# endif #endif typedef arg_empty arg_illegal; @@ -61,9 +59,7 @@ static bool trans_subw(DisasContext *ctx, arg_subw *a); typedef arg_r arg_addw; static bool trans_addw(DisasContext *ctx, arg_addw *a); -#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE -# pragma GCC diagnostic pop -#endif +#pragma GCC diagnostic pop static void decode_insn16_extract_c_addi16sp(DisasContext *ctx, arg_i *a, uint16_t insn) { @@ -237,30 +233,24 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 000..... ......00 */ if ((insn & 0x00001fe0) == 0x00000000) { /* 00000000 000...00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:87 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); - ctx->invalid = true; if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:88 */ decode_insn16_extract_c_addi4spn(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; case 0x00000001: /* 000..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:96 */ decode_insn16_extract_ci(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; case 0x00000002: /* 000..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:115 */ decode_insn16_extract_c_shift2(ctx, &u.f_shift, insn); if (trans_slli(ctx, &u.f_shift)) return true; return false; case 0x00002000: /* 001..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:90 */ decode_insn16_extract_cl_d(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; @@ -268,29 +258,24 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 001..... ......01 */ if ((insn & 0x00000f80) == 0x00000000) { /* 001.0000 0.....01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:25 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:26 */ decode_insn16_extract_ci(ctx, &u.f_i, insn); if (trans_addiw(ctx, &u.f_i)) return true; return false; case 0x00002002: /* 001..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:116 */ decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; case 0x00004000: /* 010..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:91 */ decode_insn16_extract_cl_w(ctx, &u.f_i, insn); if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x00004001: /* 010..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:97 */ decode_insn16_extract_c_li(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -298,17 +283,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 010..... ......10 */ if ((insn & 0x00000f80) == 0x00000000) { /* 010.0000 0.....10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:118 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:119 */ decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn); if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x00006000: /* 011..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:20 */ decode_insn16_extract_cl_d(ctx, &u.f_i, insn); if (trans_ld(ctx, &u.f_i)) return true; return false; @@ -316,17 +298,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 011..... ......01 */ if ((insn & 0x0000107c) == 0x00000000) { /* 0110.... .0000001 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:99 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } if ((insn & 0x00000f80) == 0x00000100) { /* 011.0001 0.....01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:100 */ decode_insn16_extract_c_addi16sp(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:101 */ decode_insn16_extract_c_lui(ctx, &u.f_u, insn); if (trans_lui(ctx, &u.f_u)) return true; return false; @@ -334,11 +313,9 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 011..... ......10 */ if ((insn & 0x00000f80) == 0x00000000) { /* 011.0000 0.....10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:33 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:34 */ decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn); if (trans_ld(ctx, &u.f_i)) return true; return false; @@ -347,19 +324,16 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) switch ((insn >> 10) & 0x3) { case 0x0: /* 100.00.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:103 */ decode_insn16_extract_c_shift(ctx, &u.f_shift, insn); if (trans_srli(ctx, &u.f_shift)) return true; return false; case 0x1: /* 100.01.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:104 */ decode_insn16_extract_c_shift(ctx, &u.f_shift, insn); if (trans_srai(ctx, &u.f_shift)) return true; return false; case 0x2: /* 100.10.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:105 */ decode_insn16_extract_c_andi(ctx, &u.f_i, insn); if (trans_andi(ctx, &u.f_i)) return true; return false; @@ -369,32 +343,26 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) switch (insn & 0x00001060) { case 0x00000000: /* 100011.. .00...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:106 */ if (trans_sub(ctx, &u.f_r)) return true; return false; case 0x00000020: /* 100011.. .01...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:107 */ if (trans_xor(ctx, &u.f_r)) return true; return false; case 0x00000040: /* 100011.. .10...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:108 */ if (trans_or(ctx, &u.f_r)) return true; return false; case 0x00000060: /* 100011.. .11...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:109 */ if (trans_and(ctx, &u.f_r)) return true; return false; case 0x00001000: /* 100111.. .00...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:28 */ if (trans_subw(ctx, &u.f_r)) return true; return false; case 0x00001020: /* 100111.. .01...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:29 */ if (trans_addw(ctx, &u.f_r)) return true; return false; } @@ -408,18 +376,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 1000.... ......10 */ if ((insn & 0x00000ffc) == 0x00000000) { /* 10000000 00000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:122 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } if ((insn & 0x0000007c) == 0x00000000) { /* 1000.... .0000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:123 */ decode_insn16_extract_c_jalr(ctx, &u.f_i, insn); u.f_i.rd = 0; if (trans_jalr(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:124 */ decode_insn16_extract_c_mv(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -427,18 +392,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 1001.... ......10 */ if ((insn & 0x00000ffc) == 0x00000000) { /* 10010000 00000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:127 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_ebreak(ctx, &u.f_empty)) return true; } if ((insn & 0x0000007c) == 0x00000000) { /* 1001.... .0000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:128 */ decode_insn16_extract_c_jalr(ctx, &u.f_i, insn); u.f_i.rd = 1; if (trans_jalr(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:129 */ decode_insn16_extract_cr(ctx, &u.f_r, insn); if (trans_add(ctx, &u.f_r)) return true; return false; @@ -446,56 +408,47 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) return false; case 0x0000a000: /* 101..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:92 */ decode_insn16_extract_cs_d(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; case 0x0000a001: /* 101..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:110 */ decode_insn16_extract_cj(ctx, &u.f_j, insn); u.f_j.rd = 0; if (trans_jal(ctx, &u.f_j)) return true; return false; case 0x0000a002: /* 101..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:131 */ decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; case 0x0000c000: /* 110..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:93 */ decode_insn16_extract_cs_w(ctx, &u.f_s, insn); if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x0000c001: /* 110..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:111 */ decode_insn16_extract_cb_z(ctx, &u.f_b, insn); if (trans_beq(ctx, &u.f_b)) return true; return false; case 0x0000c002: /* 110..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:132 */ decode_insn16_extract_c_swsp(ctx, &u.f_s, insn); if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x0000e000: /* 111..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:21 */ decode_insn16_extract_cs_d(ctx, &u.f_s, insn); if (trans_sd(ctx, &u.f_s)) return true; return false; case 0x0000e001: /* 111..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:112 */ decode_insn16_extract_cb_z(ctx, &u.f_b, insn); if (trans_bne(ctx, &u.f_b)) return true; return false; case 0x0000e002: /* 111..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:36 */ decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn); if (trans_sd(ctx, &u.f_s)) return true; return false; diff --git a/qemu/target/riscv/riscv64/decode_insn32.inc.c b/qemu/target/riscv/riscv64/decode_insn32.inc.c index b5d7896091..7ea843c575 100644 --- a/qemu/target/riscv/riscv64/decode_insn32.inc.c +++ b/qemu/target/riscv/riscv64/decode_insn32.inc.c @@ -14,56 +14,70 @@ typedef struct { int rs2; } arg_b; +typedef struct { + int csr; + int rd; + int rs1; +} arg_decode_insn3214; + typedef struct { int rd; int rm; int rs1; int rs2; int rs3; -} arg_decode_insn3210; +} arg_decode_insn3215; typedef struct { int rd; int rm; int rs1; int rs2; -} arg_decode_insn3211; +} arg_decode_insn3216; typedef struct { int rd; int rm; int rs1; -} arg_decode_insn3212; +} arg_decode_insn3217; typedef struct { int rd; int rs1; -} arg_decode_insn3213; +} arg_decode_insn3218; typedef struct { - int rs1; + int rd; + int vm; +} arg_decode_insn3219; + +typedef struct { + int rd; int rs2; -} arg_decode_insn3214; +} arg_decode_insn3220; typedef struct { + int rd; int rs1; -} arg_decode_insn3215; + int zimm; +} arg_decode_insn3221; typedef struct { - int pred; - int succ; -} arg_decode_insn3216; + int rs1; + int rs2; +} arg_decode_insn3222; typedef struct { - int csr; - int rd; int rs1; -} arg_decode_insn329; +} arg_decode_insn3223; + +typedef struct { + int pred; + int succ; +} arg_decode_insn3224; typedef struct { -#ifdef _MSC_VER - int dummy; // MSVC does not allow empty struct -#endif + int : 0; } arg_empty; typedef struct { @@ -83,6 +97,42 @@ typedef struct { int rs2; } arg_r; +typedef struct { + int nf; + int rd; + int rs1; + int vm; +} arg_r2nfvm; + +typedef struct { + int rd; + int rs2; + int vm; +} arg_rmr; + +typedef struct { + int rd; + int rs1; + int rs2; + int vm; +} arg_rmrr; + +typedef struct { + int nf; + int rd; + int rs1; + int rs2; + int vm; +} arg_rnfvm; + +typedef struct { + int rd; + int rs1; + int rs2; + int vm; + int wd; +} arg_rwdvm; + typedef struct { int imm; int rs1; @@ -112,13 +162,9 @@ typedef arg_empty arg_mret; static bool trans_mret(DisasContext *ctx, arg_mret *a); typedef arg_empty arg_wfi; static bool trans_wfi(DisasContext *ctx, arg_wfi *a); -typedef arg_decode_insn3214 arg_hfence_gvma; -static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a); -typedef arg_decode_insn3214 arg_hfence_bvma; -static bool trans_hfence_bvma(DisasContext *ctx, arg_hfence_bvma *a); -typedef arg_decode_insn3214 arg_sfence_vma; +typedef arg_decode_insn3222 arg_sfence_vma; static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a); -typedef arg_decode_insn3215 arg_sfence_vm; +typedef arg_decode_insn3223 arg_sfence_vm; static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a); typedef arg_u arg_lui; static bool trans_lui(DisasContext *ctx, arg_lui *a); @@ -194,21 +240,21 @@ typedef arg_r arg_or; static bool trans_or(DisasContext *ctx, arg_or *a); typedef arg_r arg_and; static bool trans_and(DisasContext *ctx, arg_and *a); -typedef arg_decode_insn3216 arg_fence; +typedef arg_decode_insn3224 arg_fence; static bool trans_fence(DisasContext *ctx, arg_fence *a); typedef arg_empty arg_fence_i; static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a); -typedef arg_decode_insn329 arg_csrrw; +typedef arg_decode_insn3214 arg_csrrw; static bool trans_csrrw(DisasContext *ctx, arg_csrrw *a); -typedef arg_decode_insn329 arg_csrrs; +typedef arg_decode_insn3214 arg_csrrs; static bool trans_csrrs(DisasContext *ctx, arg_csrrs *a); -typedef arg_decode_insn329 arg_csrrc; +typedef arg_decode_insn3214 arg_csrrc; static bool trans_csrrc(DisasContext *ctx, arg_csrrc *a); -typedef arg_decode_insn329 arg_csrrwi; +typedef arg_decode_insn3214 arg_csrrwi; static bool trans_csrrwi(DisasContext *ctx, arg_csrrwi *a); -typedef arg_decode_insn329 arg_csrrsi; +typedef arg_decode_insn3214 arg_csrrsi; static bool trans_csrrsi(DisasContext *ctx, arg_csrrsi *a); -typedef arg_decode_insn329 arg_csrrci; +typedef arg_decode_insn3214 arg_csrrci; static bool trans_csrrci(DisasContext *ctx, arg_csrrci *a); typedef arg_r arg_mul; static bool trans_mul(DisasContext *ctx, arg_mul *a); @@ -252,23 +298,23 @@ typedef arg_i arg_flw; static bool trans_flw(DisasContext *ctx, arg_flw *a); typedef arg_s arg_fsw; static bool trans_fsw(DisasContext *ctx, arg_fsw *a); -typedef arg_decode_insn3210 arg_fmadd_s; +typedef arg_decode_insn3215 arg_fmadd_s; static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a); -typedef arg_decode_insn3210 arg_fmsub_s; +typedef arg_decode_insn3215 arg_fmsub_s; static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a); -typedef arg_decode_insn3210 arg_fnmsub_s; +typedef arg_decode_insn3215 arg_fnmsub_s; static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a); -typedef arg_decode_insn3210 arg_fnmadd_s; +typedef arg_decode_insn3215 arg_fnmadd_s; static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a); -typedef arg_decode_insn3211 arg_fadd_s; +typedef arg_decode_insn3216 arg_fadd_s; static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a); -typedef arg_decode_insn3211 arg_fsub_s; +typedef arg_decode_insn3216 arg_fsub_s; static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a); -typedef arg_decode_insn3211 arg_fmul_s; +typedef arg_decode_insn3216 arg_fmul_s; static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a); -typedef arg_decode_insn3211 arg_fdiv_s; +typedef arg_decode_insn3216 arg_fdiv_s; static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a); -typedef arg_decode_insn3212 arg_fsqrt_s; +typedef arg_decode_insn3217 arg_fsqrt_s; static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a); typedef arg_r arg_fsgnj_s; static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a); @@ -280,11 +326,11 @@ typedef arg_r arg_fmin_s; static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a); typedef arg_r arg_fmax_s; static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a); -typedef arg_decode_insn3212 arg_fcvt_w_s; +typedef arg_decode_insn3217 arg_fcvt_w_s; static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a); -typedef arg_decode_insn3212 arg_fcvt_wu_s; +typedef arg_decode_insn3217 arg_fcvt_wu_s; static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a); -typedef arg_decode_insn3213 arg_fmv_x_w; +typedef arg_decode_insn3218 arg_fmv_x_w; static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a); typedef arg_r arg_feq_s; static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a); @@ -292,35 +338,35 @@ typedef arg_r arg_flt_s; static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a); typedef arg_r arg_fle_s; static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a); -typedef arg_decode_insn3213 arg_fclass_s; +typedef arg_decode_insn3218 arg_fclass_s; static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a); -typedef arg_decode_insn3212 arg_fcvt_s_w; +typedef arg_decode_insn3217 arg_fcvt_s_w; static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a); -typedef arg_decode_insn3212 arg_fcvt_s_wu; +typedef arg_decode_insn3217 arg_fcvt_s_wu; static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a); -typedef arg_decode_insn3213 arg_fmv_w_x; +typedef arg_decode_insn3218 arg_fmv_w_x; static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a); typedef arg_i arg_fld; static bool trans_fld(DisasContext *ctx, arg_fld *a); typedef arg_s arg_fsd; static bool trans_fsd(DisasContext *ctx, arg_fsd *a); -typedef arg_decode_insn3210 arg_fmadd_d; +typedef arg_decode_insn3215 arg_fmadd_d; static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a); -typedef arg_decode_insn3210 arg_fmsub_d; +typedef arg_decode_insn3215 arg_fmsub_d; static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a); -typedef arg_decode_insn3210 arg_fnmsub_d; +typedef arg_decode_insn3215 arg_fnmsub_d; static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a); -typedef arg_decode_insn3210 arg_fnmadd_d; +typedef arg_decode_insn3215 arg_fnmadd_d; static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a); -typedef arg_decode_insn3211 arg_fadd_d; +typedef arg_decode_insn3216 arg_fadd_d; static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a); -typedef arg_decode_insn3211 arg_fsub_d; +typedef arg_decode_insn3216 arg_fsub_d; static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a); -typedef arg_decode_insn3211 arg_fmul_d; +typedef arg_decode_insn3216 arg_fmul_d; static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a); -typedef arg_decode_insn3211 arg_fdiv_d; +typedef arg_decode_insn3216 arg_fdiv_d; static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a); -typedef arg_decode_insn3212 arg_fsqrt_d; +typedef arg_decode_insn3217 arg_fsqrt_d; static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a); typedef arg_r arg_fsgnj_d; static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a); @@ -332,9 +378,9 @@ typedef arg_r arg_fmin_d; static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a); typedef arg_r arg_fmax_d; static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a); -typedef arg_decode_insn3212 arg_fcvt_s_d; +typedef arg_decode_insn3217 arg_fcvt_s_d; static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_s; +typedef arg_decode_insn3217 arg_fcvt_d_s; static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a); typedef arg_r arg_feq_d; static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a); @@ -342,16 +388,704 @@ typedef arg_r arg_flt_d; static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a); typedef arg_r arg_fle_d; static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a); -typedef arg_decode_insn3213 arg_fclass_d; +typedef arg_decode_insn3218 arg_fclass_d; static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a); -typedef arg_decode_insn3212 arg_fcvt_w_d; +typedef arg_decode_insn3217 arg_fcvt_w_d; static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a); -typedef arg_decode_insn3212 arg_fcvt_wu_d; +typedef arg_decode_insn3217 arg_fcvt_wu_d; static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_w; +typedef arg_decode_insn3217 arg_fcvt_d_w; static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a); -typedef arg_decode_insn3212 arg_fcvt_d_wu; +typedef arg_decode_insn3217 arg_fcvt_d_wu; static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a); +typedef arg_decode_insn3222 arg_hfence_gvma; +static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a); +typedef arg_decode_insn3222 arg_hfence_vvma; +static bool trans_hfence_vvma(DisasContext *ctx, arg_hfence_vvma *a); +typedef arg_r2nfvm arg_vlb_v; +static bool trans_vlb_v(DisasContext *ctx, arg_vlb_v *a); +typedef arg_r2nfvm arg_vlh_v; +static bool trans_vlh_v(DisasContext *ctx, arg_vlh_v *a); +typedef arg_r2nfvm arg_vlw_v; +static bool trans_vlw_v(DisasContext *ctx, arg_vlw_v *a); +typedef arg_r2nfvm arg_vle_v; +static bool trans_vle_v(DisasContext *ctx, arg_vle_v *a); +typedef arg_r2nfvm arg_vlbu_v; +static bool trans_vlbu_v(DisasContext *ctx, arg_vlbu_v *a); +typedef arg_r2nfvm arg_vlhu_v; +static bool trans_vlhu_v(DisasContext *ctx, arg_vlhu_v *a); +typedef arg_r2nfvm arg_vlwu_v; +static bool trans_vlwu_v(DisasContext *ctx, arg_vlwu_v *a); +typedef arg_r2nfvm arg_vlbff_v; +static bool trans_vlbff_v(DisasContext *ctx, arg_vlbff_v *a); +typedef arg_r2nfvm arg_vlhff_v; +static bool trans_vlhff_v(DisasContext *ctx, arg_vlhff_v *a); +typedef arg_r2nfvm arg_vlwff_v; +static bool trans_vlwff_v(DisasContext *ctx, arg_vlwff_v *a); +typedef arg_r2nfvm arg_vleff_v; +static bool trans_vleff_v(DisasContext *ctx, arg_vleff_v *a); +typedef arg_r2nfvm arg_vlbuff_v; +static bool trans_vlbuff_v(DisasContext *ctx, arg_vlbuff_v *a); +typedef arg_r2nfvm arg_vlhuff_v; +static bool trans_vlhuff_v(DisasContext *ctx, arg_vlhuff_v *a); +typedef arg_r2nfvm arg_vlwuff_v; +static bool trans_vlwuff_v(DisasContext *ctx, arg_vlwuff_v *a); +typedef arg_r2nfvm arg_vsb_v; +static bool trans_vsb_v(DisasContext *ctx, arg_vsb_v *a); +typedef arg_r2nfvm arg_vsh_v; +static bool trans_vsh_v(DisasContext *ctx, arg_vsh_v *a); +typedef arg_r2nfvm arg_vsw_v; +static bool trans_vsw_v(DisasContext *ctx, arg_vsw_v *a); +typedef arg_r2nfvm arg_vse_v; +static bool trans_vse_v(DisasContext *ctx, arg_vse_v *a); +typedef arg_rnfvm arg_vlsb_v; +static bool trans_vlsb_v(DisasContext *ctx, arg_vlsb_v *a); +typedef arg_rnfvm arg_vlsh_v; +static bool trans_vlsh_v(DisasContext *ctx, arg_vlsh_v *a); +typedef arg_rnfvm arg_vlsw_v; +static bool trans_vlsw_v(DisasContext *ctx, arg_vlsw_v *a); +typedef arg_rnfvm arg_vlse_v; +static bool trans_vlse_v(DisasContext *ctx, arg_vlse_v *a); +typedef arg_rnfvm arg_vlsbu_v; +static bool trans_vlsbu_v(DisasContext *ctx, arg_vlsbu_v *a); +typedef arg_rnfvm arg_vlshu_v; +static bool trans_vlshu_v(DisasContext *ctx, arg_vlshu_v *a); +typedef arg_rnfvm arg_vlswu_v; +static bool trans_vlswu_v(DisasContext *ctx, arg_vlswu_v *a); +typedef arg_rnfvm arg_vssb_v; +static bool trans_vssb_v(DisasContext *ctx, arg_vssb_v *a); +typedef arg_rnfvm arg_vssh_v; +static bool trans_vssh_v(DisasContext *ctx, arg_vssh_v *a); +typedef arg_rnfvm arg_vssw_v; +static bool trans_vssw_v(DisasContext *ctx, arg_vssw_v *a); +typedef arg_rnfvm arg_vsse_v; +static bool trans_vsse_v(DisasContext *ctx, arg_vsse_v *a); +typedef arg_rnfvm arg_vlxb_v; +static bool trans_vlxb_v(DisasContext *ctx, arg_vlxb_v *a); +typedef arg_rnfvm arg_vlxh_v; +static bool trans_vlxh_v(DisasContext *ctx, arg_vlxh_v *a); +typedef arg_rnfvm arg_vlxw_v; +static bool trans_vlxw_v(DisasContext *ctx, arg_vlxw_v *a); +typedef arg_rnfvm arg_vlxe_v; +static bool trans_vlxe_v(DisasContext *ctx, arg_vlxe_v *a); +typedef arg_rnfvm arg_vlxbu_v; +static bool trans_vlxbu_v(DisasContext *ctx, arg_vlxbu_v *a); +typedef arg_rnfvm arg_vlxhu_v; +static bool trans_vlxhu_v(DisasContext *ctx, arg_vlxhu_v *a); +typedef arg_rnfvm arg_vlxwu_v; +static bool trans_vlxwu_v(DisasContext *ctx, arg_vlxwu_v *a); +typedef arg_rnfvm arg_vsxb_v; +static bool trans_vsxb_v(DisasContext *ctx, arg_vsxb_v *a); +typedef arg_rnfvm arg_vsxh_v; +static bool trans_vsxh_v(DisasContext *ctx, arg_vsxh_v *a); +typedef arg_rnfvm arg_vsxw_v; +static bool trans_vsxw_v(DisasContext *ctx, arg_vsxw_v *a); +typedef arg_rnfvm arg_vsxe_v; +static bool trans_vsxe_v(DisasContext *ctx, arg_vsxe_v *a); +typedef arg_rwdvm arg_vamoswapw_v; +static bool trans_vamoswapw_v(DisasContext *ctx, arg_vamoswapw_v *a); +typedef arg_rwdvm arg_vamoaddw_v; +static bool trans_vamoaddw_v(DisasContext *ctx, arg_vamoaddw_v *a); +typedef arg_rwdvm arg_vamoxorw_v; +static bool trans_vamoxorw_v(DisasContext *ctx, arg_vamoxorw_v *a); +typedef arg_rwdvm arg_vamoandw_v; +static bool trans_vamoandw_v(DisasContext *ctx, arg_vamoandw_v *a); +typedef arg_rwdvm arg_vamoorw_v; +static bool trans_vamoorw_v(DisasContext *ctx, arg_vamoorw_v *a); +typedef arg_rwdvm arg_vamominw_v; +static bool trans_vamominw_v(DisasContext *ctx, arg_vamominw_v *a); +typedef arg_rwdvm arg_vamomaxw_v; +static bool trans_vamomaxw_v(DisasContext *ctx, arg_vamomaxw_v *a); +typedef arg_rwdvm arg_vamominuw_v; +static bool trans_vamominuw_v(DisasContext *ctx, arg_vamominuw_v *a); +typedef arg_rwdvm arg_vamomaxuw_v; +static bool trans_vamomaxuw_v(DisasContext *ctx, arg_vamomaxuw_v *a); +typedef arg_rmrr arg_vadd_vv; +static bool trans_vadd_vv(DisasContext *ctx, arg_vadd_vv *a); +typedef arg_rmrr arg_vadd_vx; +static bool trans_vadd_vx(DisasContext *ctx, arg_vadd_vx *a); +typedef arg_rmrr arg_vadd_vi; +static bool trans_vadd_vi(DisasContext *ctx, arg_vadd_vi *a); +typedef arg_rmrr arg_vsub_vv; +static bool trans_vsub_vv(DisasContext *ctx, arg_vsub_vv *a); +typedef arg_rmrr arg_vsub_vx; +static bool trans_vsub_vx(DisasContext *ctx, arg_vsub_vx *a); +typedef arg_rmrr arg_vrsub_vx; +static bool trans_vrsub_vx(DisasContext *ctx, arg_vrsub_vx *a); +typedef arg_rmrr arg_vrsub_vi; +static bool trans_vrsub_vi(DisasContext *ctx, arg_vrsub_vi *a); +typedef arg_rmrr arg_vwaddu_vv; +static bool trans_vwaddu_vv(DisasContext *ctx, arg_vwaddu_vv *a); +typedef arg_rmrr arg_vwaddu_vx; +static bool trans_vwaddu_vx(DisasContext *ctx, arg_vwaddu_vx *a); +typedef arg_rmrr arg_vwadd_vv; +static bool trans_vwadd_vv(DisasContext *ctx, arg_vwadd_vv *a); +typedef arg_rmrr arg_vwadd_vx; +static bool trans_vwadd_vx(DisasContext *ctx, arg_vwadd_vx *a); +typedef arg_rmrr arg_vwsubu_vv; +static bool trans_vwsubu_vv(DisasContext *ctx, arg_vwsubu_vv *a); +typedef arg_rmrr arg_vwsubu_vx; +static bool trans_vwsubu_vx(DisasContext *ctx, arg_vwsubu_vx *a); +typedef arg_rmrr arg_vwsub_vv; +static bool trans_vwsub_vv(DisasContext *ctx, arg_vwsub_vv *a); +typedef arg_rmrr arg_vwsub_vx; +static bool trans_vwsub_vx(DisasContext *ctx, arg_vwsub_vx *a); +typedef arg_rmrr arg_vwaddu_wv; +static bool trans_vwaddu_wv(DisasContext *ctx, arg_vwaddu_wv *a); +typedef arg_rmrr arg_vwaddu_wx; +static bool trans_vwaddu_wx(DisasContext *ctx, arg_vwaddu_wx *a); +typedef arg_rmrr arg_vwadd_wv; +static bool trans_vwadd_wv(DisasContext *ctx, arg_vwadd_wv *a); +typedef arg_rmrr arg_vwadd_wx; +static bool trans_vwadd_wx(DisasContext *ctx, arg_vwadd_wx *a); +typedef arg_rmrr arg_vwsubu_wv; +static bool trans_vwsubu_wv(DisasContext *ctx, arg_vwsubu_wv *a); +typedef arg_rmrr arg_vwsubu_wx; +static bool trans_vwsubu_wx(DisasContext *ctx, arg_vwsubu_wx *a); +typedef arg_rmrr arg_vwsub_wv; +static bool trans_vwsub_wv(DisasContext *ctx, arg_vwsub_wv *a); +typedef arg_rmrr arg_vwsub_wx; +static bool trans_vwsub_wx(DisasContext *ctx, arg_vwsub_wx *a); +typedef arg_rmrr arg_vadc_vvm; +static bool trans_vadc_vvm(DisasContext *ctx, arg_vadc_vvm *a); +typedef arg_rmrr arg_vadc_vxm; +static bool trans_vadc_vxm(DisasContext *ctx, arg_vadc_vxm *a); +typedef arg_rmrr arg_vadc_vim; +static bool trans_vadc_vim(DisasContext *ctx, arg_vadc_vim *a); +typedef arg_rmrr arg_vmadc_vvm; +static bool trans_vmadc_vvm(DisasContext *ctx, arg_vmadc_vvm *a); +typedef arg_rmrr arg_vmadc_vxm; +static bool trans_vmadc_vxm(DisasContext *ctx, arg_vmadc_vxm *a); +typedef arg_rmrr arg_vmadc_vim; +static bool trans_vmadc_vim(DisasContext *ctx, arg_vmadc_vim *a); +typedef arg_rmrr arg_vsbc_vvm; +static bool trans_vsbc_vvm(DisasContext *ctx, arg_vsbc_vvm *a); +typedef arg_rmrr arg_vsbc_vxm; +static bool trans_vsbc_vxm(DisasContext *ctx, arg_vsbc_vxm *a); +typedef arg_rmrr arg_vmsbc_vvm; +static bool trans_vmsbc_vvm(DisasContext *ctx, arg_vmsbc_vvm *a); +typedef arg_rmrr arg_vmsbc_vxm; +static bool trans_vmsbc_vxm(DisasContext *ctx, arg_vmsbc_vxm *a); +typedef arg_rmrr arg_vand_vv; +static bool trans_vand_vv(DisasContext *ctx, arg_vand_vv *a); +typedef arg_rmrr arg_vand_vx; +static bool trans_vand_vx(DisasContext *ctx, arg_vand_vx *a); +typedef arg_rmrr arg_vand_vi; +static bool trans_vand_vi(DisasContext *ctx, arg_vand_vi *a); +typedef arg_rmrr arg_vor_vv; +static bool trans_vor_vv(DisasContext *ctx, arg_vor_vv *a); +typedef arg_rmrr arg_vor_vx; +static bool trans_vor_vx(DisasContext *ctx, arg_vor_vx *a); +typedef arg_rmrr arg_vor_vi; +static bool trans_vor_vi(DisasContext *ctx, arg_vor_vi *a); +typedef arg_rmrr arg_vxor_vv; +static bool trans_vxor_vv(DisasContext *ctx, arg_vxor_vv *a); +typedef arg_rmrr arg_vxor_vx; +static bool trans_vxor_vx(DisasContext *ctx, arg_vxor_vx *a); +typedef arg_rmrr arg_vxor_vi; +static bool trans_vxor_vi(DisasContext *ctx, arg_vxor_vi *a); +typedef arg_rmrr arg_vsll_vv; +static bool trans_vsll_vv(DisasContext *ctx, arg_vsll_vv *a); +typedef arg_rmrr arg_vsll_vx; +static bool trans_vsll_vx(DisasContext *ctx, arg_vsll_vx *a); +typedef arg_rmrr arg_vsll_vi; +static bool trans_vsll_vi(DisasContext *ctx, arg_vsll_vi *a); +typedef arg_rmrr arg_vsrl_vv; +static bool trans_vsrl_vv(DisasContext *ctx, arg_vsrl_vv *a); +typedef arg_rmrr arg_vsrl_vx; +static bool trans_vsrl_vx(DisasContext *ctx, arg_vsrl_vx *a); +typedef arg_rmrr arg_vsrl_vi; +static bool trans_vsrl_vi(DisasContext *ctx, arg_vsrl_vi *a); +typedef arg_rmrr arg_vsra_vv; +static bool trans_vsra_vv(DisasContext *ctx, arg_vsra_vv *a); +typedef arg_rmrr arg_vsra_vx; +static bool trans_vsra_vx(DisasContext *ctx, arg_vsra_vx *a); +typedef arg_rmrr arg_vsra_vi; +static bool trans_vsra_vi(DisasContext *ctx, arg_vsra_vi *a); +typedef arg_rmrr arg_vnsrl_vv; +static bool trans_vnsrl_vv(DisasContext *ctx, arg_vnsrl_vv *a); +typedef arg_rmrr arg_vnsrl_vx; +static bool trans_vnsrl_vx(DisasContext *ctx, arg_vnsrl_vx *a); +typedef arg_rmrr arg_vnsrl_vi; +static bool trans_vnsrl_vi(DisasContext *ctx, arg_vnsrl_vi *a); +typedef arg_rmrr arg_vnsra_vv; +static bool trans_vnsra_vv(DisasContext *ctx, arg_vnsra_vv *a); +typedef arg_rmrr arg_vnsra_vx; +static bool trans_vnsra_vx(DisasContext *ctx, arg_vnsra_vx *a); +typedef arg_rmrr arg_vnsra_vi; +static bool trans_vnsra_vi(DisasContext *ctx, arg_vnsra_vi *a); +typedef arg_rmrr arg_vmseq_vv; +static bool trans_vmseq_vv(DisasContext *ctx, arg_vmseq_vv *a); +typedef arg_rmrr arg_vmseq_vx; +static bool trans_vmseq_vx(DisasContext *ctx, arg_vmseq_vx *a); +typedef arg_rmrr arg_vmseq_vi; +static bool trans_vmseq_vi(DisasContext *ctx, arg_vmseq_vi *a); +typedef arg_rmrr arg_vmsne_vv; +static bool trans_vmsne_vv(DisasContext *ctx, arg_vmsne_vv *a); +typedef arg_rmrr arg_vmsne_vx; +static bool trans_vmsne_vx(DisasContext *ctx, arg_vmsne_vx *a); +typedef arg_rmrr arg_vmsne_vi; +static bool trans_vmsne_vi(DisasContext *ctx, arg_vmsne_vi *a); +typedef arg_rmrr arg_vmsltu_vv; +static bool trans_vmsltu_vv(DisasContext *ctx, arg_vmsltu_vv *a); +typedef arg_rmrr arg_vmsltu_vx; +static bool trans_vmsltu_vx(DisasContext *ctx, arg_vmsltu_vx *a); +typedef arg_rmrr arg_vmslt_vv; +static bool trans_vmslt_vv(DisasContext *ctx, arg_vmslt_vv *a); +typedef arg_rmrr arg_vmslt_vx; +static bool trans_vmslt_vx(DisasContext *ctx, arg_vmslt_vx *a); +typedef arg_rmrr arg_vmsleu_vv; +static bool trans_vmsleu_vv(DisasContext *ctx, arg_vmsleu_vv *a); +typedef arg_rmrr arg_vmsleu_vx; +static bool trans_vmsleu_vx(DisasContext *ctx, arg_vmsleu_vx *a); +typedef arg_rmrr arg_vmsleu_vi; +static bool trans_vmsleu_vi(DisasContext *ctx, arg_vmsleu_vi *a); +typedef arg_rmrr arg_vmsle_vv; +static bool trans_vmsle_vv(DisasContext *ctx, arg_vmsle_vv *a); +typedef arg_rmrr arg_vmsle_vx; +static bool trans_vmsle_vx(DisasContext *ctx, arg_vmsle_vx *a); +typedef arg_rmrr arg_vmsle_vi; +static bool trans_vmsle_vi(DisasContext *ctx, arg_vmsle_vi *a); +typedef arg_rmrr arg_vmsgtu_vx; +static bool trans_vmsgtu_vx(DisasContext *ctx, arg_vmsgtu_vx *a); +typedef arg_rmrr arg_vmsgtu_vi; +static bool trans_vmsgtu_vi(DisasContext *ctx, arg_vmsgtu_vi *a); +typedef arg_rmrr arg_vmsgt_vx; +static bool trans_vmsgt_vx(DisasContext *ctx, arg_vmsgt_vx *a); +typedef arg_rmrr arg_vmsgt_vi; +static bool trans_vmsgt_vi(DisasContext *ctx, arg_vmsgt_vi *a); +typedef arg_rmrr arg_vminu_vv; +static bool trans_vminu_vv(DisasContext *ctx, arg_vminu_vv *a); +typedef arg_rmrr arg_vminu_vx; +static bool trans_vminu_vx(DisasContext *ctx, arg_vminu_vx *a); +typedef arg_rmrr arg_vmin_vv; +static bool trans_vmin_vv(DisasContext *ctx, arg_vmin_vv *a); +typedef arg_rmrr arg_vmin_vx; +static bool trans_vmin_vx(DisasContext *ctx, arg_vmin_vx *a); +typedef arg_rmrr arg_vmaxu_vv; +static bool trans_vmaxu_vv(DisasContext *ctx, arg_vmaxu_vv *a); +typedef arg_rmrr arg_vmaxu_vx; +static bool trans_vmaxu_vx(DisasContext *ctx, arg_vmaxu_vx *a); +typedef arg_rmrr arg_vmax_vv; +static bool trans_vmax_vv(DisasContext *ctx, arg_vmax_vv *a); +typedef arg_rmrr arg_vmax_vx; +static bool trans_vmax_vx(DisasContext *ctx, arg_vmax_vx *a); +typedef arg_rmrr arg_vmul_vv; +static bool trans_vmul_vv(DisasContext *ctx, arg_vmul_vv *a); +typedef arg_rmrr arg_vmul_vx; +static bool trans_vmul_vx(DisasContext *ctx, arg_vmul_vx *a); +typedef arg_rmrr arg_vmulh_vv; +static bool trans_vmulh_vv(DisasContext *ctx, arg_vmulh_vv *a); +typedef arg_rmrr arg_vmulh_vx; +static bool trans_vmulh_vx(DisasContext *ctx, arg_vmulh_vx *a); +typedef arg_rmrr arg_vmulhu_vv; +static bool trans_vmulhu_vv(DisasContext *ctx, arg_vmulhu_vv *a); +typedef arg_rmrr arg_vmulhu_vx; +static bool trans_vmulhu_vx(DisasContext *ctx, arg_vmulhu_vx *a); +typedef arg_rmrr arg_vmulhsu_vv; +static bool trans_vmulhsu_vv(DisasContext *ctx, arg_vmulhsu_vv *a); +typedef arg_rmrr arg_vmulhsu_vx; +static bool trans_vmulhsu_vx(DisasContext *ctx, arg_vmulhsu_vx *a); +typedef arg_rmrr arg_vdivu_vv; +static bool trans_vdivu_vv(DisasContext *ctx, arg_vdivu_vv *a); +typedef arg_rmrr arg_vdivu_vx; +static bool trans_vdivu_vx(DisasContext *ctx, arg_vdivu_vx *a); +typedef arg_rmrr arg_vdiv_vv; +static bool trans_vdiv_vv(DisasContext *ctx, arg_vdiv_vv *a); +typedef arg_rmrr arg_vdiv_vx; +static bool trans_vdiv_vx(DisasContext *ctx, arg_vdiv_vx *a); +typedef arg_rmrr arg_vremu_vv; +static bool trans_vremu_vv(DisasContext *ctx, arg_vremu_vv *a); +typedef arg_rmrr arg_vremu_vx; +static bool trans_vremu_vx(DisasContext *ctx, arg_vremu_vx *a); +typedef arg_rmrr arg_vrem_vv; +static bool trans_vrem_vv(DisasContext *ctx, arg_vrem_vv *a); +typedef arg_rmrr arg_vrem_vx; +static bool trans_vrem_vx(DisasContext *ctx, arg_vrem_vx *a); +typedef arg_rmrr arg_vwmulu_vv; +static bool trans_vwmulu_vv(DisasContext *ctx, arg_vwmulu_vv *a); +typedef arg_rmrr arg_vwmulu_vx; +static bool trans_vwmulu_vx(DisasContext *ctx, arg_vwmulu_vx *a); +typedef arg_rmrr arg_vwmulsu_vv; +static bool trans_vwmulsu_vv(DisasContext *ctx, arg_vwmulsu_vv *a); +typedef arg_rmrr arg_vwmulsu_vx; +static bool trans_vwmulsu_vx(DisasContext *ctx, arg_vwmulsu_vx *a); +typedef arg_rmrr arg_vwmul_vv; +static bool trans_vwmul_vv(DisasContext *ctx, arg_vwmul_vv *a); +typedef arg_rmrr arg_vwmul_vx; +static bool trans_vwmul_vx(DisasContext *ctx, arg_vwmul_vx *a); +typedef arg_rmrr arg_vmacc_vv; +static bool trans_vmacc_vv(DisasContext *ctx, arg_vmacc_vv *a); +typedef arg_rmrr arg_vmacc_vx; +static bool trans_vmacc_vx(DisasContext *ctx, arg_vmacc_vx *a); +typedef arg_rmrr arg_vnmsac_vv; +static bool trans_vnmsac_vv(DisasContext *ctx, arg_vnmsac_vv *a); +typedef arg_rmrr arg_vnmsac_vx; +static bool trans_vnmsac_vx(DisasContext *ctx, arg_vnmsac_vx *a); +typedef arg_rmrr arg_vmadd_vv; +static bool trans_vmadd_vv(DisasContext *ctx, arg_vmadd_vv *a); +typedef arg_rmrr arg_vmadd_vx; +static bool trans_vmadd_vx(DisasContext *ctx, arg_vmadd_vx *a); +typedef arg_rmrr arg_vnmsub_vv; +static bool trans_vnmsub_vv(DisasContext *ctx, arg_vnmsub_vv *a); +typedef arg_rmrr arg_vnmsub_vx; +static bool trans_vnmsub_vx(DisasContext *ctx, arg_vnmsub_vx *a); +typedef arg_rmrr arg_vwmaccu_vv; +static bool trans_vwmaccu_vv(DisasContext *ctx, arg_vwmaccu_vv *a); +typedef arg_rmrr arg_vwmaccu_vx; +static bool trans_vwmaccu_vx(DisasContext *ctx, arg_vwmaccu_vx *a); +typedef arg_rmrr arg_vwmacc_vv; +static bool trans_vwmacc_vv(DisasContext *ctx, arg_vwmacc_vv *a); +typedef arg_rmrr arg_vwmacc_vx; +static bool trans_vwmacc_vx(DisasContext *ctx, arg_vwmacc_vx *a); +typedef arg_rmrr arg_vwmaccsu_vv; +static bool trans_vwmaccsu_vv(DisasContext *ctx, arg_vwmaccsu_vv *a); +typedef arg_rmrr arg_vwmaccsu_vx; +static bool trans_vwmaccsu_vx(DisasContext *ctx, arg_vwmaccsu_vx *a); +typedef arg_rmrr arg_vwmaccus_vx; +static bool trans_vwmaccus_vx(DisasContext *ctx, arg_vwmaccus_vx *a); +typedef arg_decode_insn3218 arg_vmv_v_v; +static bool trans_vmv_v_v(DisasContext *ctx, arg_vmv_v_v *a); +typedef arg_decode_insn3218 arg_vmv_v_x; +static bool trans_vmv_v_x(DisasContext *ctx, arg_vmv_v_x *a); +typedef arg_decode_insn3218 arg_vmv_v_i; +static bool trans_vmv_v_i(DisasContext *ctx, arg_vmv_v_i *a); +typedef arg_rmrr arg_vmerge_vvm; +static bool trans_vmerge_vvm(DisasContext *ctx, arg_vmerge_vvm *a); +typedef arg_rmrr arg_vmerge_vxm; +static bool trans_vmerge_vxm(DisasContext *ctx, arg_vmerge_vxm *a); +typedef arg_rmrr arg_vmerge_vim; +static bool trans_vmerge_vim(DisasContext *ctx, arg_vmerge_vim *a); +typedef arg_rmrr arg_vsaddu_vv; +static bool trans_vsaddu_vv(DisasContext *ctx, arg_vsaddu_vv *a); +typedef arg_rmrr arg_vsaddu_vx; +static bool trans_vsaddu_vx(DisasContext *ctx, arg_vsaddu_vx *a); +typedef arg_rmrr arg_vsaddu_vi; +static bool trans_vsaddu_vi(DisasContext *ctx, arg_vsaddu_vi *a); +typedef arg_rmrr arg_vsadd_vv; +static bool trans_vsadd_vv(DisasContext *ctx, arg_vsadd_vv *a); +typedef arg_rmrr arg_vsadd_vx; +static bool trans_vsadd_vx(DisasContext *ctx, arg_vsadd_vx *a); +typedef arg_rmrr arg_vsadd_vi; +static bool trans_vsadd_vi(DisasContext *ctx, arg_vsadd_vi *a); +typedef arg_rmrr arg_vssubu_vv; +static bool trans_vssubu_vv(DisasContext *ctx, arg_vssubu_vv *a); +typedef arg_rmrr arg_vssubu_vx; +static bool trans_vssubu_vx(DisasContext *ctx, arg_vssubu_vx *a); +typedef arg_rmrr arg_vssub_vv; +static bool trans_vssub_vv(DisasContext *ctx, arg_vssub_vv *a); +typedef arg_rmrr arg_vssub_vx; +static bool trans_vssub_vx(DisasContext *ctx, arg_vssub_vx *a); +typedef arg_rmrr arg_vaadd_vv; +static bool trans_vaadd_vv(DisasContext *ctx, arg_vaadd_vv *a); +typedef arg_rmrr arg_vaadd_vx; +static bool trans_vaadd_vx(DisasContext *ctx, arg_vaadd_vx *a); +typedef arg_rmrr arg_vaadd_vi; +static bool trans_vaadd_vi(DisasContext *ctx, arg_vaadd_vi *a); +typedef arg_rmrr arg_vasub_vv; +static bool trans_vasub_vv(DisasContext *ctx, arg_vasub_vv *a); +typedef arg_rmrr arg_vasub_vx; +static bool trans_vasub_vx(DisasContext *ctx, arg_vasub_vx *a); +typedef arg_rmrr arg_vsmul_vv; +static bool trans_vsmul_vv(DisasContext *ctx, arg_vsmul_vv *a); +typedef arg_rmrr arg_vsmul_vx; +static bool trans_vsmul_vx(DisasContext *ctx, arg_vsmul_vx *a); +typedef arg_rmrr arg_vwsmaccu_vv; +static bool trans_vwsmaccu_vv(DisasContext *ctx, arg_vwsmaccu_vv *a); +typedef arg_rmrr arg_vwsmaccu_vx; +static bool trans_vwsmaccu_vx(DisasContext *ctx, arg_vwsmaccu_vx *a); +typedef arg_rmrr arg_vwsmacc_vv; +static bool trans_vwsmacc_vv(DisasContext *ctx, arg_vwsmacc_vv *a); +typedef arg_rmrr arg_vwsmacc_vx; +static bool trans_vwsmacc_vx(DisasContext *ctx, arg_vwsmacc_vx *a); +typedef arg_rmrr arg_vwsmaccsu_vv; +static bool trans_vwsmaccsu_vv(DisasContext *ctx, arg_vwsmaccsu_vv *a); +typedef arg_rmrr arg_vwsmaccsu_vx; +static bool trans_vwsmaccsu_vx(DisasContext *ctx, arg_vwsmaccsu_vx *a); +typedef arg_rmrr arg_vwsmaccus_vx; +static bool trans_vwsmaccus_vx(DisasContext *ctx, arg_vwsmaccus_vx *a); +typedef arg_rmrr arg_vssrl_vv; +static bool trans_vssrl_vv(DisasContext *ctx, arg_vssrl_vv *a); +typedef arg_rmrr arg_vssrl_vx; +static bool trans_vssrl_vx(DisasContext *ctx, arg_vssrl_vx *a); +typedef arg_rmrr arg_vssrl_vi; +static bool trans_vssrl_vi(DisasContext *ctx, arg_vssrl_vi *a); +typedef arg_rmrr arg_vssra_vv; +static bool trans_vssra_vv(DisasContext *ctx, arg_vssra_vv *a); +typedef arg_rmrr arg_vssra_vx; +static bool trans_vssra_vx(DisasContext *ctx, arg_vssra_vx *a); +typedef arg_rmrr arg_vssra_vi; +static bool trans_vssra_vi(DisasContext *ctx, arg_vssra_vi *a); +typedef arg_rmrr arg_vnclipu_vv; +static bool trans_vnclipu_vv(DisasContext *ctx, arg_vnclipu_vv *a); +typedef arg_rmrr arg_vnclipu_vx; +static bool trans_vnclipu_vx(DisasContext *ctx, arg_vnclipu_vx *a); +typedef arg_rmrr arg_vnclipu_vi; +static bool trans_vnclipu_vi(DisasContext *ctx, arg_vnclipu_vi *a); +typedef arg_rmrr arg_vnclip_vv; +static bool trans_vnclip_vv(DisasContext *ctx, arg_vnclip_vv *a); +typedef arg_rmrr arg_vnclip_vx; +static bool trans_vnclip_vx(DisasContext *ctx, arg_vnclip_vx *a); +typedef arg_rmrr arg_vnclip_vi; +static bool trans_vnclip_vi(DisasContext *ctx, arg_vnclip_vi *a); +typedef arg_rmrr arg_vfadd_vv; +static bool trans_vfadd_vv(DisasContext *ctx, arg_vfadd_vv *a); +typedef arg_rmrr arg_vfadd_vf; +static bool trans_vfadd_vf(DisasContext *ctx, arg_vfadd_vf *a); +typedef arg_rmrr arg_vfsub_vv; +static bool trans_vfsub_vv(DisasContext *ctx, arg_vfsub_vv *a); +typedef arg_rmrr arg_vfsub_vf; +static bool trans_vfsub_vf(DisasContext *ctx, arg_vfsub_vf *a); +typedef arg_rmrr arg_vfrsub_vf; +static bool trans_vfrsub_vf(DisasContext *ctx, arg_vfrsub_vf *a); +typedef arg_rmrr arg_vfwadd_vv; +static bool trans_vfwadd_vv(DisasContext *ctx, arg_vfwadd_vv *a); +typedef arg_rmrr arg_vfwadd_vf; +static bool trans_vfwadd_vf(DisasContext *ctx, arg_vfwadd_vf *a); +typedef arg_rmrr arg_vfwadd_wv; +static bool trans_vfwadd_wv(DisasContext *ctx, arg_vfwadd_wv *a); +typedef arg_rmrr arg_vfwadd_wf; +static bool trans_vfwadd_wf(DisasContext *ctx, arg_vfwadd_wf *a); +typedef arg_rmrr arg_vfwsub_vv; +static bool trans_vfwsub_vv(DisasContext *ctx, arg_vfwsub_vv *a); +typedef arg_rmrr arg_vfwsub_vf; +static bool trans_vfwsub_vf(DisasContext *ctx, arg_vfwsub_vf *a); +typedef arg_rmrr arg_vfwsub_wv; +static bool trans_vfwsub_wv(DisasContext *ctx, arg_vfwsub_wv *a); +typedef arg_rmrr arg_vfwsub_wf; +static bool trans_vfwsub_wf(DisasContext *ctx, arg_vfwsub_wf *a); +typedef arg_rmrr arg_vfmul_vv; +static bool trans_vfmul_vv(DisasContext *ctx, arg_vfmul_vv *a); +typedef arg_rmrr arg_vfmul_vf; +static bool trans_vfmul_vf(DisasContext *ctx, arg_vfmul_vf *a); +typedef arg_rmrr arg_vfdiv_vv; +static bool trans_vfdiv_vv(DisasContext *ctx, arg_vfdiv_vv *a); +typedef arg_rmrr arg_vfdiv_vf; +static bool trans_vfdiv_vf(DisasContext *ctx, arg_vfdiv_vf *a); +typedef arg_rmrr arg_vfrdiv_vf; +static bool trans_vfrdiv_vf(DisasContext *ctx, arg_vfrdiv_vf *a); +typedef arg_rmrr arg_vfwmul_vv; +static bool trans_vfwmul_vv(DisasContext *ctx, arg_vfwmul_vv *a); +typedef arg_rmrr arg_vfwmul_vf; +static bool trans_vfwmul_vf(DisasContext *ctx, arg_vfwmul_vf *a); +typedef arg_rmrr arg_vfmacc_vv; +static bool trans_vfmacc_vv(DisasContext *ctx, arg_vfmacc_vv *a); +typedef arg_rmrr arg_vfnmacc_vv; +static bool trans_vfnmacc_vv(DisasContext *ctx, arg_vfnmacc_vv *a); +typedef arg_rmrr arg_vfnmacc_vf; +static bool trans_vfnmacc_vf(DisasContext *ctx, arg_vfnmacc_vf *a); +typedef arg_rmrr arg_vfmacc_vf; +static bool trans_vfmacc_vf(DisasContext *ctx, arg_vfmacc_vf *a); +typedef arg_rmrr arg_vfmsac_vv; +static bool trans_vfmsac_vv(DisasContext *ctx, arg_vfmsac_vv *a); +typedef arg_rmrr arg_vfmsac_vf; +static bool trans_vfmsac_vf(DisasContext *ctx, arg_vfmsac_vf *a); +typedef arg_rmrr arg_vfnmsac_vv; +static bool trans_vfnmsac_vv(DisasContext *ctx, arg_vfnmsac_vv *a); +typedef arg_rmrr arg_vfnmsac_vf; +static bool trans_vfnmsac_vf(DisasContext *ctx, arg_vfnmsac_vf *a); +typedef arg_rmrr arg_vfmadd_vv; +static bool trans_vfmadd_vv(DisasContext *ctx, arg_vfmadd_vv *a); +typedef arg_rmrr arg_vfmadd_vf; +static bool trans_vfmadd_vf(DisasContext *ctx, arg_vfmadd_vf *a); +typedef arg_rmrr arg_vfnmadd_vv; +static bool trans_vfnmadd_vv(DisasContext *ctx, arg_vfnmadd_vv *a); +typedef arg_rmrr arg_vfnmadd_vf; +static bool trans_vfnmadd_vf(DisasContext *ctx, arg_vfnmadd_vf *a); +typedef arg_rmrr arg_vfmsub_vv; +static bool trans_vfmsub_vv(DisasContext *ctx, arg_vfmsub_vv *a); +typedef arg_rmrr arg_vfmsub_vf; +static bool trans_vfmsub_vf(DisasContext *ctx, arg_vfmsub_vf *a); +typedef arg_rmrr arg_vfnmsub_vv; +static bool trans_vfnmsub_vv(DisasContext *ctx, arg_vfnmsub_vv *a); +typedef arg_rmrr arg_vfnmsub_vf; +static bool trans_vfnmsub_vf(DisasContext *ctx, arg_vfnmsub_vf *a); +typedef arg_rmrr arg_vfwmacc_vv; +static bool trans_vfwmacc_vv(DisasContext *ctx, arg_vfwmacc_vv *a); +typedef arg_rmrr arg_vfwmacc_vf; +static bool trans_vfwmacc_vf(DisasContext *ctx, arg_vfwmacc_vf *a); +typedef arg_rmrr arg_vfwnmacc_vv; +static bool trans_vfwnmacc_vv(DisasContext *ctx, arg_vfwnmacc_vv *a); +typedef arg_rmrr arg_vfwnmacc_vf; +static bool trans_vfwnmacc_vf(DisasContext *ctx, arg_vfwnmacc_vf *a); +typedef arg_rmrr arg_vfwmsac_vv; +static bool trans_vfwmsac_vv(DisasContext *ctx, arg_vfwmsac_vv *a); +typedef arg_rmrr arg_vfwmsac_vf; +static bool trans_vfwmsac_vf(DisasContext *ctx, arg_vfwmsac_vf *a); +typedef arg_rmrr arg_vfwnmsac_vv; +static bool trans_vfwnmsac_vv(DisasContext *ctx, arg_vfwnmsac_vv *a); +typedef arg_rmrr arg_vfwnmsac_vf; +static bool trans_vfwnmsac_vf(DisasContext *ctx, arg_vfwnmsac_vf *a); +typedef arg_rmr arg_vfsqrt_v; +static bool trans_vfsqrt_v(DisasContext *ctx, arg_vfsqrt_v *a); +typedef arg_rmrr arg_vfmin_vv; +static bool trans_vfmin_vv(DisasContext *ctx, arg_vfmin_vv *a); +typedef arg_rmrr arg_vfmin_vf; +static bool trans_vfmin_vf(DisasContext *ctx, arg_vfmin_vf *a); +typedef arg_rmrr arg_vfmax_vv; +static bool trans_vfmax_vv(DisasContext *ctx, arg_vfmax_vv *a); +typedef arg_rmrr arg_vfmax_vf; +static bool trans_vfmax_vf(DisasContext *ctx, arg_vfmax_vf *a); +typedef arg_rmrr arg_vfsgnj_vv; +static bool trans_vfsgnj_vv(DisasContext *ctx, arg_vfsgnj_vv *a); +typedef arg_rmrr arg_vfsgnj_vf; +static bool trans_vfsgnj_vf(DisasContext *ctx, arg_vfsgnj_vf *a); +typedef arg_rmrr arg_vfsgnjn_vv; +static bool trans_vfsgnjn_vv(DisasContext *ctx, arg_vfsgnjn_vv *a); +typedef arg_rmrr arg_vfsgnjn_vf; +static bool trans_vfsgnjn_vf(DisasContext *ctx, arg_vfsgnjn_vf *a); +typedef arg_rmrr arg_vfsgnjx_vv; +static bool trans_vfsgnjx_vv(DisasContext *ctx, arg_vfsgnjx_vv *a); +typedef arg_rmrr arg_vfsgnjx_vf; +static bool trans_vfsgnjx_vf(DisasContext *ctx, arg_vfsgnjx_vf *a); +typedef arg_rmrr arg_vmfeq_vv; +static bool trans_vmfeq_vv(DisasContext *ctx, arg_vmfeq_vv *a); +typedef arg_rmrr arg_vmfeq_vf; +static bool trans_vmfeq_vf(DisasContext *ctx, arg_vmfeq_vf *a); +typedef arg_rmrr arg_vmfne_vv; +static bool trans_vmfne_vv(DisasContext *ctx, arg_vmfne_vv *a); +typedef arg_rmrr arg_vmfne_vf; +static bool trans_vmfne_vf(DisasContext *ctx, arg_vmfne_vf *a); +typedef arg_rmrr arg_vmflt_vv; +static bool trans_vmflt_vv(DisasContext *ctx, arg_vmflt_vv *a); +typedef arg_rmrr arg_vmflt_vf; +static bool trans_vmflt_vf(DisasContext *ctx, arg_vmflt_vf *a); +typedef arg_rmrr arg_vmfle_vv; +static bool trans_vmfle_vv(DisasContext *ctx, arg_vmfle_vv *a); +typedef arg_rmrr arg_vmfle_vf; +static bool trans_vmfle_vf(DisasContext *ctx, arg_vmfle_vf *a); +typedef arg_rmrr arg_vmfgt_vf; +static bool trans_vmfgt_vf(DisasContext *ctx, arg_vmfgt_vf *a); +typedef arg_rmrr arg_vmfge_vf; +static bool trans_vmfge_vf(DisasContext *ctx, arg_vmfge_vf *a); +typedef arg_rmrr arg_vmford_vv; +static bool trans_vmford_vv(DisasContext *ctx, arg_vmford_vv *a); +typedef arg_rmrr arg_vmford_vf; +static bool trans_vmford_vf(DisasContext *ctx, arg_vmford_vf *a); +typedef arg_rmr arg_vfclass_v; +static bool trans_vfclass_v(DisasContext *ctx, arg_vfclass_v *a); +typedef arg_rmrr arg_vfmerge_vfm; +static bool trans_vfmerge_vfm(DisasContext *ctx, arg_vfmerge_vfm *a); +typedef arg_decode_insn3218 arg_vfmv_v_f; +static bool trans_vfmv_v_f(DisasContext *ctx, arg_vfmv_v_f *a); +typedef arg_rmr arg_vfcvt_xu_f_v; +static bool trans_vfcvt_xu_f_v(DisasContext *ctx, arg_vfcvt_xu_f_v *a); +typedef arg_rmr arg_vfcvt_x_f_v; +static bool trans_vfcvt_x_f_v(DisasContext *ctx, arg_vfcvt_x_f_v *a); +typedef arg_rmr arg_vfcvt_f_xu_v; +static bool trans_vfcvt_f_xu_v(DisasContext *ctx, arg_vfcvt_f_xu_v *a); +typedef arg_rmr arg_vfcvt_f_x_v; +static bool trans_vfcvt_f_x_v(DisasContext *ctx, arg_vfcvt_f_x_v *a); +typedef arg_rmr arg_vfwcvt_xu_f_v; +static bool trans_vfwcvt_xu_f_v(DisasContext *ctx, arg_vfwcvt_xu_f_v *a); +typedef arg_rmr arg_vfwcvt_x_f_v; +static bool trans_vfwcvt_x_f_v(DisasContext *ctx, arg_vfwcvt_x_f_v *a); +typedef arg_rmr arg_vfwcvt_f_xu_v; +static bool trans_vfwcvt_f_xu_v(DisasContext *ctx, arg_vfwcvt_f_xu_v *a); +typedef arg_rmr arg_vfwcvt_f_x_v; +static bool trans_vfwcvt_f_x_v(DisasContext *ctx, arg_vfwcvt_f_x_v *a); +typedef arg_rmr arg_vfwcvt_f_f_v; +static bool trans_vfwcvt_f_f_v(DisasContext *ctx, arg_vfwcvt_f_f_v *a); +typedef arg_rmr arg_vfncvt_xu_f_v; +static bool trans_vfncvt_xu_f_v(DisasContext *ctx, arg_vfncvt_xu_f_v *a); +typedef arg_rmr arg_vfncvt_x_f_v; +static bool trans_vfncvt_x_f_v(DisasContext *ctx, arg_vfncvt_x_f_v *a); +typedef arg_rmr arg_vfncvt_f_xu_v; +static bool trans_vfncvt_f_xu_v(DisasContext *ctx, arg_vfncvt_f_xu_v *a); +typedef arg_rmr arg_vfncvt_f_x_v; +static bool trans_vfncvt_f_x_v(DisasContext *ctx, arg_vfncvt_f_x_v *a); +typedef arg_rmr arg_vfncvt_f_f_v; +static bool trans_vfncvt_f_f_v(DisasContext *ctx, arg_vfncvt_f_f_v *a); +typedef arg_rmrr arg_vredsum_vs; +static bool trans_vredsum_vs(DisasContext *ctx, arg_vredsum_vs *a); +typedef arg_rmrr arg_vredand_vs; +static bool trans_vredand_vs(DisasContext *ctx, arg_vredand_vs *a); +typedef arg_rmrr arg_vredor_vs; +static bool trans_vredor_vs(DisasContext *ctx, arg_vredor_vs *a); +typedef arg_rmrr arg_vredxor_vs; +static bool trans_vredxor_vs(DisasContext *ctx, arg_vredxor_vs *a); +typedef arg_rmrr arg_vredminu_vs; +static bool trans_vredminu_vs(DisasContext *ctx, arg_vredminu_vs *a); +typedef arg_rmrr arg_vredmin_vs; +static bool trans_vredmin_vs(DisasContext *ctx, arg_vredmin_vs *a); +typedef arg_rmrr arg_vredmaxu_vs; +static bool trans_vredmaxu_vs(DisasContext *ctx, arg_vredmaxu_vs *a); +typedef arg_rmrr arg_vredmax_vs; +static bool trans_vredmax_vs(DisasContext *ctx, arg_vredmax_vs *a); +typedef arg_rmrr arg_vwredsumu_vs; +static bool trans_vwredsumu_vs(DisasContext *ctx, arg_vwredsumu_vs *a); +typedef arg_rmrr arg_vwredsum_vs; +static bool trans_vwredsum_vs(DisasContext *ctx, arg_vwredsum_vs *a); +typedef arg_rmrr arg_vfredsum_vs; +static bool trans_vfredsum_vs(DisasContext *ctx, arg_vfredsum_vs *a); +typedef arg_rmrr arg_vfredmin_vs; +static bool trans_vfredmin_vs(DisasContext *ctx, arg_vfredmin_vs *a); +typedef arg_rmrr arg_vfredmax_vs; +static bool trans_vfredmax_vs(DisasContext *ctx, arg_vfredmax_vs *a); +typedef arg_rmrr arg_vfwredsum_vs; +static bool trans_vfwredsum_vs(DisasContext *ctx, arg_vfwredsum_vs *a); +typedef arg_r arg_vmand_mm; +static bool trans_vmand_mm(DisasContext *ctx, arg_vmand_mm *a); +typedef arg_r arg_vmnand_mm; +static bool trans_vmnand_mm(DisasContext *ctx, arg_vmnand_mm *a); +typedef arg_r arg_vmandnot_mm; +static bool trans_vmandnot_mm(DisasContext *ctx, arg_vmandnot_mm *a); +typedef arg_r arg_vmxor_mm; +static bool trans_vmxor_mm(DisasContext *ctx, arg_vmxor_mm *a); +typedef arg_r arg_vmor_mm; +static bool trans_vmor_mm(DisasContext *ctx, arg_vmor_mm *a); +typedef arg_r arg_vmnor_mm; +static bool trans_vmnor_mm(DisasContext *ctx, arg_vmnor_mm *a); +typedef arg_r arg_vmornot_mm; +static bool trans_vmornot_mm(DisasContext *ctx, arg_vmornot_mm *a); +typedef arg_r arg_vmxnor_mm; +static bool trans_vmxnor_mm(DisasContext *ctx, arg_vmxnor_mm *a); +typedef arg_rmr arg_vmpopc_m; +static bool trans_vmpopc_m(DisasContext *ctx, arg_vmpopc_m *a); +typedef arg_rmr arg_vmfirst_m; +static bool trans_vmfirst_m(DisasContext *ctx, arg_vmfirst_m *a); +typedef arg_rmr arg_vmsbf_m; +static bool trans_vmsbf_m(DisasContext *ctx, arg_vmsbf_m *a); +typedef arg_rmr arg_vmsif_m; +static bool trans_vmsif_m(DisasContext *ctx, arg_vmsif_m *a); +typedef arg_rmr arg_vmsof_m; +static bool trans_vmsof_m(DisasContext *ctx, arg_vmsof_m *a); +typedef arg_rmr arg_viota_m; +static bool trans_viota_m(DisasContext *ctx, arg_viota_m *a); +typedef arg_decode_insn3219 arg_vid_v; +static bool trans_vid_v(DisasContext *ctx, arg_vid_v *a); +typedef arg_r arg_vext_x_v; +static bool trans_vext_x_v(DisasContext *ctx, arg_vext_x_v *a); +typedef arg_decode_insn3218 arg_vmv_s_x; +static bool trans_vmv_s_x(DisasContext *ctx, arg_vmv_s_x *a); +typedef arg_decode_insn3220 arg_vfmv_f_s; +static bool trans_vfmv_f_s(DisasContext *ctx, arg_vfmv_f_s *a); +typedef arg_decode_insn3218 arg_vfmv_s_f; +static bool trans_vfmv_s_f(DisasContext *ctx, arg_vfmv_s_f *a); +typedef arg_rmrr arg_vslideup_vx; +static bool trans_vslideup_vx(DisasContext *ctx, arg_vslideup_vx *a); +typedef arg_rmrr arg_vslideup_vi; +static bool trans_vslideup_vi(DisasContext *ctx, arg_vslideup_vi *a); +typedef arg_rmrr arg_vslide1up_vx; +static bool trans_vslide1up_vx(DisasContext *ctx, arg_vslide1up_vx *a); +typedef arg_rmrr arg_vslidedown_vx; +static bool trans_vslidedown_vx(DisasContext *ctx, arg_vslidedown_vx *a); +typedef arg_rmrr arg_vslidedown_vi; +static bool trans_vslidedown_vi(DisasContext *ctx, arg_vslidedown_vi *a); +typedef arg_rmrr arg_vslide1down_vx; +static bool trans_vslide1down_vx(DisasContext *ctx, arg_vslide1down_vx *a); +typedef arg_rmrr arg_vrgather_vv; +static bool trans_vrgather_vv(DisasContext *ctx, arg_vrgather_vv *a); +typedef arg_rmrr arg_vrgather_vx; +static bool trans_vrgather_vx(DisasContext *ctx, arg_vrgather_vx *a); +typedef arg_rmrr arg_vrgather_vi; +static bool trans_vrgather_vi(DisasContext *ctx, arg_vrgather_vi *a); +typedef arg_r arg_vcompress_vm; +static bool trans_vcompress_vm(DisasContext *ctx, arg_vcompress_vm *a); +typedef arg_decode_insn3221 arg_vsetvli; +static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a); +typedef arg_r arg_vsetvl; +static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a); typedef arg_i arg_lwu; static bool trans_lwu(DisasContext *ctx, arg_lwu *a); typedef arg_i arg_ld; @@ -408,25 +1142,43 @@ typedef arg_atomic arg_amominu_d; static bool trans_amominu_d(DisasContext *ctx, arg_amominu_d *a); typedef arg_atomic arg_amomaxu_d; static bool trans_amomaxu_d(DisasContext *ctx, arg_amomaxu_d *a); -typedef arg_decode_insn3212 arg_fcvt_l_s; +typedef arg_rwdvm arg_vamoswapd_v; +static bool trans_vamoswapd_v(DisasContext *ctx, arg_vamoswapd_v *a); +typedef arg_rwdvm arg_vamoaddd_v; +static bool trans_vamoaddd_v(DisasContext *ctx, arg_vamoaddd_v *a); +typedef arg_rwdvm arg_vamoxord_v; +static bool trans_vamoxord_v(DisasContext *ctx, arg_vamoxord_v *a); +typedef arg_rwdvm arg_vamoandd_v; +static bool trans_vamoandd_v(DisasContext *ctx, arg_vamoandd_v *a); +typedef arg_rwdvm arg_vamoord_v; +static bool trans_vamoord_v(DisasContext *ctx, arg_vamoord_v *a); +typedef arg_rwdvm arg_vamomind_v; +static bool trans_vamomind_v(DisasContext *ctx, arg_vamomind_v *a); +typedef arg_rwdvm arg_vamomaxd_v; +static bool trans_vamomaxd_v(DisasContext *ctx, arg_vamomaxd_v *a); +typedef arg_rwdvm arg_vamominud_v; +static bool trans_vamominud_v(DisasContext *ctx, arg_vamominud_v *a); +typedef arg_rwdvm arg_vamomaxud_v; +static bool trans_vamomaxud_v(DisasContext *ctx, arg_vamomaxud_v *a); +typedef arg_decode_insn3217 arg_fcvt_l_s; static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a); -typedef arg_decode_insn3212 arg_fcvt_lu_s; +typedef arg_decode_insn3217 arg_fcvt_lu_s; static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a); -typedef arg_decode_insn3212 arg_fcvt_s_l; +typedef arg_decode_insn3217 arg_fcvt_s_l; static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a); -typedef arg_decode_insn3212 arg_fcvt_s_lu; +typedef arg_decode_insn3217 arg_fcvt_s_lu; static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a); -typedef arg_decode_insn3212 arg_fcvt_l_d; +typedef arg_decode_insn3217 arg_fcvt_l_d; static bool trans_fcvt_l_d(DisasContext *ctx, arg_fcvt_l_d *a); -typedef arg_decode_insn3212 arg_fcvt_lu_d; +typedef arg_decode_insn3217 arg_fcvt_lu_d; static bool trans_fcvt_lu_d(DisasContext *ctx, arg_fcvt_lu_d *a); -typedef arg_decode_insn3213 arg_fmv_x_d; +typedef arg_decode_insn3218 arg_fmv_x_d; static bool trans_fmv_x_d(DisasContext *ctx, arg_fmv_x_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_l; +typedef arg_decode_insn3217 arg_fcvt_d_l; static bool trans_fcvt_d_l(DisasContext *ctx, arg_fcvt_d_l *a); -typedef arg_decode_insn3212 arg_fcvt_d_lu; +typedef arg_decode_insn3217 arg_fcvt_d_lu; static bool trans_fcvt_d_lu(DisasContext *ctx, arg_fcvt_d_lu *a); -typedef arg_decode_insn3213 arg_fmv_d_x; +typedef arg_decode_insn3218 arg_fmv_d_x; static bool trans_fmv_d_x(DisasContext *ctx, arg_fmv_d_x *a); static void decode_insn32_extract_atom_ld(DisasContext *ctx, arg_atomic *a, uint32_t insn) @@ -454,30 +1206,30 @@ static void decode_insn32_extract_b(DisasContext *ctx, arg_b *a, uint32_t insn) a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn329 *a, uint32_t insn) +static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) { a->csr = extract32(insn, 20, 12); a->rs1 = extract32(insn, 15, 5); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_decode_insn32_Fmt_18(DisasContext *ctx, arg_empty *a, uint32_t insn) +static void decode_insn32_extract_decode_insn32_Fmt_28(DisasContext *ctx, arg_empty *a, uint32_t insn) { } -static void decode_insn32_extract_decode_insn32_Fmt_19(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn) +static void decode_insn32_extract_decode_insn32_Fmt_29(DisasContext *ctx, arg_decode_insn3224 *a, uint32_t insn) { a->pred = extract32(insn, 24, 4); a->succ = extract32(insn, 20, 4); } -static void decode_insn32_extract_hfence_bvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_hfence_vvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -503,20 +1255,54 @@ static void decode_insn32_extract_r(DisasContext *ctx, arg_r *a, uint32_t insn) a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3213 *a, uint32_t insn) +static void decode_insn32_extract_r1_vm(DisasContext *ctx, arg_decode_insn3219 *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3218 *a, uint32_t insn) +{ + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2_nfvm(DisasContext *ctx, arg_r2nfvm *a, uint32_t insn) { + a->vm = extract32(insn, 25, 1); + a->nf = ex_plus_1(ctx, extract32(insn, 29, 3)); a->rs1 = extract32(insn, 15, 5); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3212 *a, uint32_t insn) +static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3217 *a, uint32_t insn) { a->rs1 = extract32(insn, 15, 5); a->rm = extract32(insn, 12, 3); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 *a, uint32_t insn) +static void decode_insn32_extract_r2_vm(DisasContext *ctx, arg_rmr *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2_zimm(DisasContext *ctx, arg_decode_insn3221 *a, uint32_t insn) +{ + a->zimm = extract32(insn, 20, 11); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2rd(DisasContext *ctx, arg_decode_insn3220 *a, uint32_t insn) +{ + a->rs2 = extract32(insn, 20, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn) { a->rs3 = extract32(insn, 27, 5); a->rs2 = extract32(insn, 20, 5); @@ -525,7 +1311,16 @@ static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 * a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a, uint32_t insn) +static void decode_insn32_extract_r_nfvm(DisasContext *ctx, arg_rnfvm *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->nf = ex_plus_1(ctx, extract32(insn, 29, 3)); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -533,6 +1328,39 @@ static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a a->rd = extract32(insn, 7, 5); } +static void decode_insn32_extract_r_vm(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_vm_0(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = 0; + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_vm_1(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = 1; + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_wdvm(DisasContext *ctx, arg_rwdvm *a, uint32_t insn) +{ + a->wd = extract32(insn, 26, 1); + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn) { a->imm = deposit32(extract32(insn, 7, 5), 5, 27, sextract32(insn, 25, 7)); @@ -540,12 +1368,12 @@ static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn) a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn) +static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3223 *a, uint32_t insn) { a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -576,18 +1404,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) union { arg_atomic f_atomic; arg_b f_b; - arg_decode_insn3210 f_decode_insn3210; - arg_decode_insn3211 f_decode_insn3211; - arg_decode_insn3212 f_decode_insn3212; - arg_decode_insn3213 f_decode_insn3213; arg_decode_insn3214 f_decode_insn3214; arg_decode_insn3215 f_decode_insn3215; arg_decode_insn3216 f_decode_insn3216; - arg_decode_insn329 f_decode_insn329; + arg_decode_insn3217 f_decode_insn3217; + arg_decode_insn3218 f_decode_insn3218; + arg_decode_insn3219 f_decode_insn3219; + arg_decode_insn3220 f_decode_insn3220; + arg_decode_insn3221 f_decode_insn3221; + arg_decode_insn3222 f_decode_insn3222; + arg_decode_insn3223 f_decode_insn3223; + arg_decode_insn3224 f_decode_insn3224; arg_empty f_empty; arg_i f_i; arg_j f_j; arg_r f_r; + arg_r2nfvm f_r2nfvm; + arg_rmr f_rmr; + arg_rmrr f_rmrr; + arg_rnfvm f_rnfvm; + arg_rwdvm f_rwdvm; arg_s f_s; arg_shift f_shift; arg_u f_u; @@ -600,55 +1436,235 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:96 */ if (trans_lb(ctx, &u.f_i)) return true; return false; case 0x1: /* ........ ........ .001.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:97 */ if (trans_lh(ctx, &u.f_i)) return true; return false; case 0x2: /* ........ ........ .010.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:98 */ if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:28 */ if (trans_ld(ctx, &u.f_i)) return true; return false; case 0x4: /* ........ ........ .100.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:99 */ if (trans_lbu(ctx, &u.f_i)) return true; return false; case 0x5: /* ........ ........ .101.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:100 */ if (trans_lhu(ctx, &u.f_i)) return true; return false; case 0x6: /* ........ ........ .110.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:27 */ if (trans_lwu(ctx, &u.f_i)) return true; return false; } return false; case 0x00000007: /* ........ ........ ........ .0000111 */ - decode_insn32_extract_i(ctx, &u.f_i, insn); switch ((insn >> 12) & 0x7) { + case 0x0: + /* ........ ........ .000.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .000.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .000.... .0000111 */ + if (trans_vlbu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .000.... .0000111 */ + if (trans_vlbuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsbu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxbu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .000.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .000.... .0000111 */ + if (trans_vlb_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .000.... .0000111 */ + if (trans_vlbff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsb_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; case 0x2: /* ........ ........ .010.... .0000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:156 */ + decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_flw(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:184 */ + decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; + case 0x5: + /* ........ ........ .101.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .101.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .101.... .0000111 */ + if (trans_vlhu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .101.... .0000111 */ + if (trans_vlhuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlshu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxhu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .101.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .101.... .0000111 */ + if (trans_vlh_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .101.... .0000111 */ + if (trans_vlhff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsh_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x6: + /* ........ ........ .110.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .110.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .110.... .0000111 */ + if (trans_vlwu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .110.... .0000111 */ + if (trans_vlwuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlswu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxwu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .110.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .110.... .0000111 */ + if (trans_vlw_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .110.... .0000111 */ + if (trans_vlwff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsw_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x7: + /* ........ ........ .111.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .111.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .111.... .0000111 */ + if (trans_vle_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .111.... .0000111 */ + if (trans_vleff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .111.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlse_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .111.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxe_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; } return false; case 0x0000000f: @@ -656,14 +1672,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:123 */ - decode_insn32_extract_decode_insn32_Fmt_19(ctx, &u.f_decode_insn3216, insn); - if (trans_fence(ctx, &u.f_decode_insn3216)) return true; + decode_insn32_extract_decode_insn32_Fmt_29(ctx, &u.f_decode_insn3224, insn); + if (trans_fence(ctx, &u.f_decode_insn3224)) return true; return false; case 0x1: /* ........ ........ .001.... .0001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:124 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); if (trans_fence_i(ctx, &u.f_empty)) return true; return false; } @@ -673,7 +1687,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:104 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -683,26 +1696,22 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 30) & 0x3) { case 0x0: /* 00...... ........ .001.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:110 */ if (trans_slli(ctx, &u.f_shift)) return true; return false; } return false; case 0x2: /* ........ ........ .010.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:105 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_slti(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:106 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_sltiu(ctx, &u.f_i)) return true; return false; case 0x4: /* ........ ........ .100.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:107 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_xori(ctx, &u.f_i)) return true; return false; @@ -712,25 +1721,21 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 30) & 0x3) { case 0x0: /* 00...... ........ .101.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:111 */ if (trans_srli(ctx, &u.f_shift)) return true; return false; case 0x1: /* 01...... ........ .101.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:112 */ if (trans_srai(ctx, &u.f_shift)) return true; return false; } return false; case 0x6: /* ........ ........ .110.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:108 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_ori(ctx, &u.f_i)) return true; return false; case 0x7: /* ........ ........ .111.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:109 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_andi(ctx, &u.f_i)) return true; return false; @@ -738,7 +1743,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) return false; case 0x00000017: /* ........ ........ ........ .0010111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:87 */ decode_insn32_extract_u(ctx, &u.f_u, insn); if (trans_auipc(ctx, &u.f_u)) return true; return false; @@ -747,7 +1751,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0011011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:30 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_addiw(ctx, &u.f_i)) return true; return false; @@ -757,7 +1760,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 25) & 0x7f) { case 0x0: /* 0000000. ........ .001.... .0011011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:31 */ if (trans_slliw(ctx, &u.f_shift)) return true; return false; } @@ -768,12 +1770,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 25) & 0x7f) { case 0x0: /* 0000000. ........ .101.... .0011011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:32 */ if (trans_srliw(ctx, &u.f_shift)) return true; return false; case 0x20: /* 0100000. ........ .101.... .0011011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:33 */ if (trans_sraiw(ctx, &u.f_shift)) return true; return false; } @@ -786,40 +1786,155 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:101 */ if (trans_sb(ctx, &u.f_s)) return true; return false; case 0x1: /* ........ ........ .001.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:102 */ if (trans_sh(ctx, &u.f_s)) return true; return false; case 0x2: /* ........ ........ .010.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:103 */ if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x3: /* ........ ........ .011.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:29 */ if (trans_sd(ctx, &u.f_s)) return true; return false; } return false; case 0x00000027: /* ........ ........ ........ .0100111 */ - decode_insn32_extract_s(ctx, &u.f_s, insn); switch ((insn >> 12) & 0x7) { + case 0x0: + /* ........ ........ .000.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .000.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .000.... .0100111 */ + if (trans_vsb_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .000.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .000.... .0100111 */ + if (trans_vssb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .000.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; case 0x2: /* ........ ........ .010.... .0100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:157 */ + decode_insn32_extract_s(ctx, &u.f_s, insn); if (trans_fsw(ctx, &u.f_s)) return true; return false; case 0x3: /* ........ ........ .011.... .0100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:185 */ + decode_insn32_extract_s(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; + case 0x5: + /* ........ ........ .101.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .101.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .101.... .0100111 */ + if (trans_vsh_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .101.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .101.... .0100111 */ + if (trans_vssh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .101.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x6: + /* ........ ........ .110.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .110.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .110.... .0100111 */ + if (trans_vsw_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .110.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .110.... .0100111 */ + if (trans_vssw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .110.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x7: + /* ........ ........ .111.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .111.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .111.... .0100111 */ + if (trans_vse_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .111.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .111.... .0100111 */ + if (trans_vsse_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .111.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxe_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; } return false; case 0x0000002f: @@ -827,35 +1942,50 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xf8007000) { case 0x00002000: /* 00000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:146 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoadd_w(ctx, &u.f_atomic)) return true; return false; case 0x00003000: /* 00000... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:51 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoadd_d(ctx, &u.f_atomic)) return true; return false; + case 0x00006000: + /* 00000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoaddw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x00007000: + /* 00000... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoaddd_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x08002000: /* 00001... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:145 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoswap_w(ctx, &u.f_atomic)) return true; return false; case 0x08003000: /* 00001... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:50 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoswap_d(ctx, &u.f_atomic)) return true; return false; + case 0x08006000: + /* 00001... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoswapw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x08007000: + /* 00001... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoswapd_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x10002000: /* 00010... ........ .010.... .0101111 */ decode_insn32_extract_atom_ld(ctx, &u.f_atomic, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 00010..0 0000.... .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:143 */ if (trans_lr_w(ctx, &u.f_atomic)) return true; return false; } @@ -866,107 +1996,160 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 20) & 0x1f) { case 0x0: /* 00010..0 0000.... .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:48 */ if (trans_lr_d(ctx, &u.f_atomic)) return true; return false; } return false; case 0x18002000: /* 00011... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:144 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_sc_w(ctx, &u.f_atomic)) return true; return false; case 0x18003000: /* 00011... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:49 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_sc_d(ctx, &u.f_atomic)) return true; return false; case 0x20002000: /* 00100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:147 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoxor_w(ctx, &u.f_atomic)) return true; return false; case 0x20003000: /* 00100... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:52 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoxor_d(ctx, &u.f_atomic)) return true; return false; + case 0x20006000: + /* 00100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoxorw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x20007000: + /* 00100... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoxord_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x40002000: /* 01000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:149 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoor_w(ctx, &u.f_atomic)) return true; return false; case 0x40003000: /* 01000... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:54 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoor_d(ctx, &u.f_atomic)) return true; return false; + case 0x40006000: + /* 01000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoorw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x40007000: + /* 01000... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoord_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x60002000: /* 01100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:148 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoand_w(ctx, &u.f_atomic)) return true; return false; case 0x60003000: /* 01100... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:53 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoand_d(ctx, &u.f_atomic)) return true; return false; + case 0x60006000: + /* 01100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoandw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x60007000: + /* 01100... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoandd_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x80002000: /* 10000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:150 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomin_w(ctx, &u.f_atomic)) return true; return false; case 0x80003000: /* 10000... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:55 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomin_d(ctx, &u.f_atomic)) return true; return false; + case 0x80006000: + /* 10000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x80007000: + /* 10000... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomind_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xa0002000: /* 10100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:151 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomax_w(ctx, &u.f_atomic)) return true; return false; case 0xa0003000: /* 10100... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:56 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomax_d(ctx, &u.f_atomic)) return true; return false; + case 0xa0006000: + /* 10100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0xa0007000: + /* 10100... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxd_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xc0002000: /* 11000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:152 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amominu_w(ctx, &u.f_atomic)) return true; return false; case 0xc0003000: /* 11000... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:57 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amominu_d(ctx, &u.f_atomic)) return true; return false; + case 0xc0006000: + /* 11000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominuw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0xc0007000: + /* 11000... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominud_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xe0002000: /* 11100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:153 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomaxu_w(ctx, &u.f_atomic)) return true; return false; case 0xe0003000: /* 11100... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:58 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomaxu_d(ctx, &u.f_atomic)) return true; return false; + case 0xe0006000: + /* 11100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxuw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0xe0007000: + /* 11100... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxud_v(ctx, &u.f_rwdvm)) return true; + return false; } return false; case 0x00000033: @@ -975,99 +2158,80 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe007000) { case 0x00000000: /* 0000000. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:113 */ if (trans_add(ctx, &u.f_r)) return true; return false; case 0x00001000: /* 0000000. ........ .001.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:115 */ if (trans_sll(ctx, &u.f_r)) return true; return false; case 0x00002000: /* 0000000. ........ .010.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:116 */ if (trans_slt(ctx, &u.f_r)) return true; return false; case 0x00003000: /* 0000000. ........ .011.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:117 */ if (trans_sltu(ctx, &u.f_r)) return true; return false; case 0x00004000: /* 0000000. ........ .100.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:118 */ if (trans_xor(ctx, &u.f_r)) return true; return false; case 0x00005000: /* 0000000. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:119 */ if (trans_srl(ctx, &u.f_r)) return true; return false; case 0x00006000: /* 0000000. ........ .110.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:121 */ if (trans_or(ctx, &u.f_r)) return true; return false; case 0x00007000: /* 0000000. ........ .111.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:122 */ if (trans_and(ctx, &u.f_r)) return true; return false; case 0x02000000: /* 0000001. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:133 */ if (trans_mul(ctx, &u.f_r)) return true; return false; case 0x02001000: /* 0000001. ........ .001.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:134 */ if (trans_mulh(ctx, &u.f_r)) return true; return false; case 0x02002000: /* 0000001. ........ .010.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:135 */ if (trans_mulhsu(ctx, &u.f_r)) return true; return false; case 0x02003000: /* 0000001. ........ .011.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:136 */ if (trans_mulhu(ctx, &u.f_r)) return true; return false; case 0x02004000: /* 0000001. ........ .100.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:137 */ if (trans_div(ctx, &u.f_r)) return true; return false; case 0x02005000: /* 0000001. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:138 */ if (trans_divu(ctx, &u.f_r)) return true; return false; case 0x02006000: /* 0000001. ........ .110.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:139 */ if (trans_rem(ctx, &u.f_r)) return true; return false; case 0x02007000: /* 0000001. ........ .111.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:140 */ if (trans_remu(ctx, &u.f_r)) return true; return false; case 0x40000000: /* 0100000. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:114 */ if (trans_sub(ctx, &u.f_r)) return true; return false; case 0x40005000: /* 0100000. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:120 */ if (trans_sra(ctx, &u.f_r)) return true; return false; } return false; case 0x00000037: /* ........ ........ ........ .0110111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:86 */ decode_insn32_extract_u(ctx, &u.f_u, insn); if (trans_lui(ctx, &u.f_u)) return true; return false; @@ -1077,117 +2241,99 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe007000) { case 0x00000000: /* 0000000. ........ .000.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:34 */ if (trans_addw(ctx, &u.f_r)) return true; return false; case 0x00001000: /* 0000000. ........ .001.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:36 */ if (trans_sllw(ctx, &u.f_r)) return true; return false; case 0x00005000: /* 0000000. ........ .101.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:37 */ if (trans_srlw(ctx, &u.f_r)) return true; return false; case 0x02000000: /* 0000001. ........ .000.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:41 */ if (trans_mulw(ctx, &u.f_r)) return true; return false; case 0x02004000: /* 0000001. ........ .100.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:42 */ if (trans_divw(ctx, &u.f_r)) return true; return false; case 0x02005000: /* 0000001. ........ .101.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:43 */ if (trans_divuw(ctx, &u.f_r)) return true; return false; case 0x02006000: /* 0000001. ........ .110.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:44 */ if (trans_remw(ctx, &u.f_r)) return true; return false; case 0x02007000: /* 0000001. ........ .111.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:45 */ if (trans_remuw(ctx, &u.f_r)) return true; return false; case 0x40000000: /* 0100000. ........ .000.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:35 */ if (trans_subw(ctx, &u.f_r)) return true; return false; case 0x40005000: /* 0100000. ........ .101.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:38 */ if (trans_sraw(ctx, &u.f_r)) return true; return false; } return false; case 0x00000043: /* ........ ........ ........ .1000011 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:158 */ - if (trans_fmadd_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmadd_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:186 */ - if (trans_fmadd_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmadd_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x00000047: /* ........ ........ ........ .1000111 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:159 */ - if (trans_fmsub_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmsub_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:187 */ - if (trans_fmsub_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmsub_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x0000004b: /* ........ ........ ........ .1001011 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1001011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:160 */ - if (trans_fnmsub_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmsub_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1001011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:188 */ - if (trans_fnmsub_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmsub_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x0000004f: /* ........ ........ ........ .1001111 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:161 */ - if (trans_fnmadd_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmadd_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:189 */ - if (trans_fnmadd_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmadd_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; @@ -1196,51 +2342,43 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 25) & 0x7f) { case 0x0: /* 0000000. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:162 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fadd_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fadd_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x1: /* 0000001. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:190 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fadd_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fadd_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x4: /* 0000100. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:163 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fsub_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fsub_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x5: /* 0000101. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:191 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fsub_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fsub_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x8: /* 0001000. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:164 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fmul_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fmul_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x9: /* 0001001. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:192 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fmul_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fmul_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0xc: /* 0001100. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:165 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fdiv_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fdiv_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0xd: /* 0001101. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:193 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fdiv_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fdiv_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x10: /* 0010000. ........ ........ .1010011 */ @@ -1248,17 +2386,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010000. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:167 */ if (trans_fsgnj_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010000. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:168 */ if (trans_fsgnjn_s(ctx, &u.f_r)) return true; return false; case 0x2: /* 0010000. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:169 */ if (trans_fsgnjx_s(ctx, &u.f_r)) return true; return false; } @@ -1269,17 +2404,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010001. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:195 */ if (trans_fsgnj_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010001. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:196 */ if (trans_fsgnjn_d(ctx, &u.f_r)) return true; return false; case 0x2: /* 0010001. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:197 */ if (trans_fsgnjx_d(ctx, &u.f_r)) return true; return false; } @@ -1290,12 +2422,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010100. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:170 */ if (trans_fmin_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010100. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:171 */ if (trans_fmax_s(ctx, &u.f_r)) return true; return false; } @@ -1306,57 +2436,51 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010101. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:198 */ if (trans_fmin_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010101. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:199 */ if (trans_fmax_d(ctx, &u.f_r)) return true; return false; } return false; case 0x20: /* 0100000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x1: /* 01000000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:200 */ - if (trans_fcvt_s_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x21: /* 0100001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01000010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:201 */ - if (trans_fcvt_d_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x2c: /* 0101100. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01011000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:166 */ - if (trans_fsqrt_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fsqrt_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x2d: /* 0101101. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01011010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:194 */ - if (trans_fsqrt_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fsqrt_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; @@ -1366,17 +2490,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 1010000. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:177 */ if (trans_fle_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 1010000. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:176 */ if (trans_flt_s(ctx, &u.f_r)) return true; return false; case 0x2: /* 1010000. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:175 */ if (trans_feq_s(ctx, &u.f_r)) return true; return false; } @@ -1387,176 +2508,1772 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 1010001. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:204 */ if (trans_fle_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 1010001. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:203 */ if (trans_flt_d(ctx, &u.f_r)) return true; return false; case 0x2: /* 1010001. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:202 */ if (trans_feq_d(ctx, &u.f_r)) return true; return false; } return false; case 0x60: /* 1100000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11000000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:172 */ - if (trans_fcvt_w_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_w_s(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11000000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:173 */ - if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3217)) return true; return false; case 0x2: /* 11000000 0010.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:61 */ - if (trans_fcvt_l_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_l_s(ctx, &u.f_decode_insn3217)) return true; return false; case 0x3: /* 11000000 0011.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:62 */ - if (trans_fcvt_lu_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_lu_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x61: /* 1100001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11000010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:206 */ - if (trans_fcvt_w_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_w_d(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11000010 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:207 */ - if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3217)) return true; return false; case 0x2: /* 11000010 0010.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:67 */ - if (trans_fcvt_l_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_l_d(ctx, &u.f_decode_insn3217)) return true; return false; case 0x3: /* 11000010 0011.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:68 */ - if (trans_fcvt_lu_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_lu_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x68: /* 1101000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11010000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:179 */ - if (trans_fcvt_s_w(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_w(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11010000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:180 */ - if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3217)) return true; return false; case 0x2: /* 11010000 0010.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:63 */ - if (trans_fcvt_s_l(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_l(ctx, &u.f_decode_insn3217)) return true; return false; case 0x3: /* 11010000 0011.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:64 */ - if (trans_fcvt_s_lu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_lu(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x69: /* 1101001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11010010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:208 */ - if (trans_fcvt_d_w(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_w(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11010010 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:209 */ - if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3217)) return true; return false; case 0x2: /* 11010010 0010.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:70 */ - if (trans_fcvt_d_l(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_l(ctx, &u.f_decode_insn3217)) return true; return false; case 0x3: /* 11010010 0011.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:71 */ - if (trans_fcvt_d_lu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_lu(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x70: /* 1110000. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11100000 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:174 */ - if (trans_fmv_x_w(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_x_w(ctx, &u.f_decode_insn3218)) return true; return false; case 0x00001000: /* 11100000 0000.... .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:178 */ - if (trans_fclass_s(ctx, &u.f_decode_insn3213)) return true; + if (trans_fclass_s(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x71: /* 1110001. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11100010 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:69 */ - if (trans_fmv_x_d(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_x_d(ctx, &u.f_decode_insn3218)) return true; return false; case 0x00001000: /* 11100010 0000.... .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:205 */ - if (trans_fclass_d(ctx, &u.f_decode_insn3213)) return true; + if (trans_fclass_d(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x78: /* 1111000. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11110000 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:181 */ - if (trans_fmv_w_x(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_w_x(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x79: /* 1111001. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11110010 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:72 */ - if (trans_fmv_d_x(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_d_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x00000057: + /* ........ ........ ........ .1010111 */ + switch (insn & 0x80007000) { + case 0x00000000: + /* 0....... ........ .000.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vminu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmin_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmaxu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmax_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .000.... .1010111 */ + if (trans_vadc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .000.... .1010111 */ + if (trans_vmadc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x12: + /* 010010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100101. ........ .000.... .1010111 */ + if (trans_vsbc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x13: + /* 010011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100111. ........ .000.... .1010111 */ + if (trans_vmsbc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .000.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vvm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .000.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .000.... .1010111 */ + if (trans_vmv_v_v(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsltu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmslt_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00001000: + /* 0....... ........ .001.... .1010111 */ + switch (insn & 0x74000000) { + case 0x00000000: + /* 0000.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000000.. ........ .001.... .1010111 */ + if (trans_vfadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000010.. ........ .001.... .1010111 */ + if (trans_vfsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x04000000: + /* 0000.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x10000000: + /* 0001.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000100.. ........ .001.... .1010111 */ + if (trans_vfmin_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000110.. ........ .001.... .1010111 */ + if (trans_vfmax_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x14000000: + /* 0001.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000101.. ........ .001.... .1010111 */ + if (trans_vfredmin_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000111.. ........ .001.... .1010111 */ + if (trans_vfredmax_vs(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x20000000: + /* 0010.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 001000.. ........ .001.... .1010111 */ + if (trans_vfsgnj_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 001010.. ........ .001.... .1010111 */ + if (trans_vfsgnjx_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x24000000: + /* 0010.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 001001.. ........ .001.... .1010111 */ + if (trans_vfsgnjn_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x30000000: + /* 0011.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r2rd(ctx, &u.f_decode_insn3220, insn); + switch (insn & 0x0a0f8000) { + case 0x02000000: + /* 0011001. ....0000 0001.... .1010111 */ + if (trans_vfmv_f_s(ctx, &u.f_decode_insn3220)) return true; + return false; + } + return false; + case 0x60000000: + /* 0110.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011000.. ........ .001.... .1010111 */ + if (trans_vmfeq_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 011010.. ........ .001.... .1010111 */ + if (trans_vmford_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x64000000: + /* 0110.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011001.. ........ .001.... .1010111 */ + if (trans_vmfle_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 011011.. ........ .001.... .1010111 */ + if (trans_vmflt_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x70000000: + /* 0111.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011100.. ........ .001.... .1010111 */ + if (trans_vmfne_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + } + return false; + case 0x00002000: + /* 0....... ........ .010.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000001.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredand_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredor_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredxor_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredminu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmin_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmaxu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmax_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0011001. ........ .010.... .1010111 */ + if (trans_vext_x_v(ctx, &u.f_r)) return true; + return false; + } + return false; + case 0x14: + /* 010100.. ........ .010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmpopc_m(ctx, &u.f_rmr)) return true; + return false; + case 0x15: + /* 010101.. ........ .010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmfirst_m(ctx, &u.f_rmr)) return true; + return false; + case 0x16: + /* 010110.. ........ .010.... .1010111 */ + switch ((insn >> 15) & 0x1f) { + case 0x1: + /* 010110.. ....0000 1010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsbf_m(ctx, &u.f_rmr)) return true; + return false; + case 0x2: + /* 010110.. ....0001 0010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsof_m(ctx, &u.f_rmr)) return true; + return false; + case 0x3: + /* 010110.. ....0001 1010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsif_m(ctx, &u.f_rmr)) return true; + return false; + case 0x10: + /* 010110.. ....1000 0010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_viota_m(ctx, &u.f_rmr)) return true; + return false; + case 0x11: + /* 010110.. ....1000 1010.... .1010111 */ + decode_insn32_extract_r1_vm(ctx, &u.f_decode_insn3219, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 010110.0 00001000 1010.... .1010111 */ + if (trans_vid_v(ctx, &u.f_decode_insn3219)) return true; + return false; + } + return false; + } + return false; + case 0x17: + /* 010111.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vcompress_vm(ctx, &u.f_r)) return true; + return false; + case 0x18: + /* 011000.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmandnot_mm(ctx, &u.f_r)) return true; + return false; + case 0x19: + /* 011001.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmand_mm(ctx, &u.f_r)) return true; + return false; + case 0x1a: + /* 011010.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1b: + /* 011011.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmxor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1c: + /* 011100.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmornot_mm(ctx, &u.f_r)) return true; + return false; + case 0x1d: + /* 011101.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmnand_mm(ctx, &u.f_r)) return true; + return false; + case 0x1e: + /* 011110.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmnor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1f: + /* 011111.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmxnor_mm(ctx, &u.f_r)) return true; + return false; + } + return false; + case 0x00003000: + /* 0....... ........ .011.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrsub_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 001110.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslideup_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslidedown_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .011.... .1010111 */ + if (trans_vadc_vim(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .011.... .1010111 */ + if (trans_vmadc_vim(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .011.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vim(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .011.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .011.... .1010111 */ + if (trans_vmv_v_i(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 011110.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgtu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgt_vi(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00004000: + /* 0....... ........ .100.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vminu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmin_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmaxu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmax_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 001110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslideup_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslidedown_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .100.... .1010111 */ + if (trans_vadc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .100.... .1010111 */ + if (trans_vmadc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x12: + /* 010010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100101. ........ .100.... .1010111 */ + if (trans_vsbc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x13: + /* 010011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100111. ........ .100.... .1010111 */ + if (trans_vmsbc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .100.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vxm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .100.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .100.... .1010111 */ + if (trans_vmv_v_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsltu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmslt_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 011110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgtu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgt_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00005000: + /* 0....... ........ .101.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfmin_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfmax_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 001000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnj_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnjn_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnjx_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 001101.. ........ .101.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x3f) { + case 0x20: + /* 00110110 0000.... .101.... .1010111 */ + if (trans_vfmv_s_f(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .101.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vfmerge_vfm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .101.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .101.... .1010111 */ + if (trans_vfmv_v_f(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfeq_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfle_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmford_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmflt_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfne_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfgt_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfge_vf(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00006000: + /* 0....... ........ .110.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0xd: + /* 001101.. ........ .110.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x3f) { + case 0x20: + /* 00110110 0000.... .110.... .1010111 */ + if (trans_vmv_s_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + case 0xe: + /* 001110.. ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslide1up_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslide1down_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00007000: + /* 0....... ........ .111.... .1010111 */ + decode_insn32_extract_r2_zimm(ctx, &u.f_decode_insn3221, insn); + if (trans_vsetvli(ctx, &u.f_decode_insn3221)) return true; + return false; + case 0x80000000: + /* 1....... ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .000.... .1010111 */ + if (trans_vsaddu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .000.... .1010111 */ + if (trans_vsadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .000.... .1010111 */ + if (trans_vssubu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .000.... .1010111 */ + if (trans_vssub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .000.... .1010111 */ + if (trans_vaadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .000.... .1010111 */ + if (trans_vsll_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .000.... .1010111 */ + if (trans_vasub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .000.... .1010111 */ + if (trans_vsmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .000.... .1010111 */ + if (trans_vsrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .000.... .1010111 */ + if (trans_vsra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .000.... .1010111 */ + if (trans_vssrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .000.... .1010111 */ + if (trans_vssra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .000.... .1010111 */ + if (trans_vnsrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .000.... .1010111 */ + if (trans_vnsra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .000.... .1010111 */ + if (trans_vnclipu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .000.... .1010111 */ + if (trans_vnclip_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .000.... .1010111 */ + if (trans_vwredsumu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .000.... .1010111 */ + if (trans_vwredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .000.... .1010111 */ + if (trans_vwsmaccu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .000.... .1010111 */ + if (trans_vwsmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .000.... .1010111 */ + if (trans_vwsmaccsu_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80001000: + /* 1....... ........ .001.... .1010111 */ + switch (insn & 0x74000000) { + case 0x00000000: + /* 1000.0.. ........ .001.... .1010111 */ + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 100000.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfdiv_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100010.. ........ .001.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + switch ((insn >> 15) & 0x1f) { + case 0x0: + /* 100010.. ....0000 0001.... .1010111 */ + if (trans_vfcvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x1: + /* 100010.. ....0000 1001.... .1010111 */ + if (trans_vfcvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x2: + /* 100010.. ....0001 0001.... .1010111 */ + if (trans_vfcvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0x3: + /* 100010.. ....0001 1001.... .1010111 */ + if (trans_vfcvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0x8: + /* 100010.. ....0100 0001.... .1010111 */ + if (trans_vfwcvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x9: + /* 100010.. ....0100 1001.... .1010111 */ + if (trans_vfwcvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0xa: + /* 100010.. ....0101 0001.... .1010111 */ + if (trans_vfwcvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0xb: + /* 100010.. ....0101 1001.... .1010111 */ + if (trans_vfwcvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0xc: + /* 100010.. ....0110 0001.... .1010111 */ + if (trans_vfwcvt_f_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x10: + /* 100010.. ....1000 0001.... .1010111 */ + if (trans_vfncvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x11: + /* 100010.. ....1000 1001.... .1010111 */ + if (trans_vfncvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x12: + /* 100010.. ....1001 0001.... .1010111 */ + if (trans_vfncvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0x13: + /* 100010.. ....1001 1001.... .1010111 */ + if (trans_vfncvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0x14: + /* 100010.. ....1010 0001.... .1010111 */ + if (trans_vfncvt_f_f_v(ctx, &u.f_rmr)) return true; + return false; + } + return false; + } + return false; + case 0x04000000: + /* 1000.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + switch (insn & 0x080f8000) { + case 0x08000000: + /* 100011.. ....0000 0001.... .1010111 */ + if (trans_vfsqrt_v(ctx, &u.f_rmr)) return true; + return false; + case 0x08080000: + /* 100011.. ....1000 0001.... .1010111 */ + if (trans_vfclass_v(ctx, &u.f_rmr)) return true; + return false; + } + return false; + case 0x10000000: + /* 1001.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 100100.. ........ .001.... .1010111 */ + if (trans_vfmul_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x20000000: + /* 1010.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101000.. ........ .001.... .1010111 */ + if (trans_vfmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101010.. ........ .001.... .1010111 */ + if (trans_vfmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x24000000: + /* 1010.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101001.. ........ .001.... .1010111 */ + if (trans_vfnmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101011.. ........ .001.... .1010111 */ + if (trans_vfnmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x30000000: + /* 1011.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101100.. ........ .001.... .1010111 */ + if (trans_vfmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101110.. ........ .001.... .1010111 */ + if (trans_vfmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x34000000: + /* 1011.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101101.. ........ .001.... .1010111 */ + if (trans_vfnmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101111.. ........ .001.... .1010111 */ + if (trans_vfnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x40000000: + /* 1100.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 110000.. ........ .001.... .1010111 */ + if (trans_vfwadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 110010.. ........ .001.... .1010111 */ + if (trans_vfwsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x44000000: + /* 1100.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfwredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x50000000: + /* 1101.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 110100.. ........ .001.... .1010111 */ + if (trans_vfwadd_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 110110.. ........ .001.... .1010111 */ + if (trans_vfwsub_wv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x60000000: + /* 1110.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111000.. ........ .001.... .1010111 */ + if (trans_vfwmul_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x70000000: + /* 1111.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111100.. ........ .001.... .1010111 */ + if (trans_vfwmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 111110.. ........ .001.... .1010111 */ + if (trans_vfwmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x74000000: + /* 1111.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111101.. ........ .001.... .1010111 */ + if (trans_vfwnmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 111111.. ........ .001.... .1010111 */ + if (trans_vfwnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + } + return false; + case 0x80002000: + /* 1....... ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .010.... .1010111 */ + if (trans_vdivu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .010.... .1010111 */ + if (trans_vdiv_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .010.... .1010111 */ + if (trans_vremu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .010.... .1010111 */ + if (trans_vrem_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .010.... .1010111 */ + if (trans_vmulhu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .010.... .1010111 */ + if (trans_vmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .010.... .1010111 */ + if (trans_vmulhsu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .010.... .1010111 */ + if (trans_vmulh_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .010.... .1010111 */ + if (trans_vmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .010.... .1010111 */ + if (trans_vnmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .010.... .1010111 */ + if (trans_vmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .010.... .1010111 */ + if (trans_vnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .010.... .1010111 */ + if (trans_vwaddu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .010.... .1010111 */ + if (trans_vwadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .010.... .1010111 */ + if (trans_vwsubu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x13: + /* 110011.. ........ .010.... .1010111 */ + if (trans_vwsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .010.... .1010111 */ + if (trans_vwaddu_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x15: + /* 110101.. ........ .010.... .1010111 */ + if (trans_vwadd_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .010.... .1010111 */ + if (trans_vwsubu_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x17: + /* 110111.. ........ .010.... .1010111 */ + if (trans_vwsub_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .010.... .1010111 */ + if (trans_vwmulu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 111010.. ........ .010.... .1010111 */ + if (trans_vwmulsu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 111011.. ........ .010.... .1010111 */ + if (trans_vwmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .010.... .1010111 */ + if (trans_vwmaccu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .010.... .1010111 */ + if (trans_vwmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .010.... .1010111 */ + if (trans_vwmaccsu_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80003000: + /* 1....... ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .011.... .1010111 */ + if (trans_vsaddu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .011.... .1010111 */ + if (trans_vsadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .011.... .1010111 */ + if (trans_vaadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .011.... .1010111 */ + if (trans_vsll_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .011.... .1010111 */ + if (trans_vsrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .011.... .1010111 */ + if (trans_vsra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .011.... .1010111 */ + if (trans_vssrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .011.... .1010111 */ + if (trans_vssra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .011.... .1010111 */ + if (trans_vnsrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .011.... .1010111 */ + if (trans_vnsra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .011.... .1010111 */ + if (trans_vnclipu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .011.... .1010111 */ + if (trans_vnclip_vi(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80004000: + /* 1....... ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .100.... .1010111 */ + if (trans_vsaddu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .100.... .1010111 */ + if (trans_vsadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .100.... .1010111 */ + if (trans_vssubu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .100.... .1010111 */ + if (trans_vssub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .100.... .1010111 */ + if (trans_vaadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .100.... .1010111 */ + if (trans_vsll_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .100.... .1010111 */ + if (trans_vasub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .100.... .1010111 */ + if (trans_vsmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .100.... .1010111 */ + if (trans_vsrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .100.... .1010111 */ + if (trans_vsra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .100.... .1010111 */ + if (trans_vssrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .100.... .1010111 */ + if (trans_vssra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .100.... .1010111 */ + if (trans_vnsrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .100.... .1010111 */ + if (trans_vnsra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .100.... .1010111 */ + if (trans_vnclipu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .100.... .1010111 */ + if (trans_vnclip_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .100.... .1010111 */ + if (trans_vwsmaccu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .100.... .1010111 */ + if (trans_vwsmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .100.... .1010111 */ + if (trans_vwsmaccsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .100.... .1010111 */ + if (trans_vwsmaccus_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80005000: + /* 1....... ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .101.... .1010111 */ + if (trans_vfdiv_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .101.... .1010111 */ + if (trans_vfrdiv_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .101.... .1010111 */ + if (trans_vfmul_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .101.... .1010111 */ + if (trans_vfrsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .101.... .1010111 */ + if (trans_vfmadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .101.... .1010111 */ + if (trans_vfnmadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .101.... .1010111 */ + if (trans_vfmsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .101.... .1010111 */ + if (trans_vfnmsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .101.... .1010111 */ + if (trans_vfmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .101.... .1010111 */ + if (trans_vfnmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .101.... .1010111 */ + if (trans_vfmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .101.... .1010111 */ + if (trans_vfnmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .101.... .1010111 */ + if (trans_vfwadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .101.... .1010111 */ + if (trans_vfwsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .101.... .1010111 */ + if (trans_vfwadd_wf(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .101.... .1010111 */ + if (trans_vfwsub_wf(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .101.... .1010111 */ + if (trans_vfwmul_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .101.... .1010111 */ + if (trans_vfwmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .101.... .1010111 */ + if (trans_vfwnmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .101.... .1010111 */ + if (trans_vfwmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .101.... .1010111 */ + if (trans_vfwnmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80006000: + /* 1....... ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .110.... .1010111 */ + if (trans_vdivu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .110.... .1010111 */ + if (trans_vdiv_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .110.... .1010111 */ + if (trans_vremu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .110.... .1010111 */ + if (trans_vrem_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .110.... .1010111 */ + if (trans_vmulhu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .110.... .1010111 */ + if (trans_vmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .110.... .1010111 */ + if (trans_vmulhsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .110.... .1010111 */ + if (trans_vmulh_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .110.... .1010111 */ + if (trans_vmadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .110.... .1010111 */ + if (trans_vnmsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .110.... .1010111 */ + if (trans_vmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .110.... .1010111 */ + if (trans_vnmsac_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .110.... .1010111 */ + if (trans_vwaddu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .110.... .1010111 */ + if (trans_vwadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .110.... .1010111 */ + if (trans_vwsubu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x13: + /* 110011.. ........ .110.... .1010111 */ + if (trans_vwsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .110.... .1010111 */ + if (trans_vwaddu_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x15: + /* 110101.. ........ .110.... .1010111 */ + if (trans_vwadd_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .110.... .1010111 */ + if (trans_vwsubu_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x17: + /* 110111.. ........ .110.... .1010111 */ + if (trans_vwsub_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .110.... .1010111 */ + if (trans_vwmulu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 111010.. ........ .110.... .1010111 */ + if (trans_vwmulsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 111011.. ........ .110.... .1010111 */ + if (trans_vwmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .110.... .1010111 */ + if (trans_vwmaccu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .110.... .1010111 */ + if (trans_vwmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .110.... .1010111 */ + if (trans_vwmaccsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .110.... .1010111 */ + if (trans_vwmaccus_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80007000: + /* 1....... ........ .111.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + switch ((insn >> 25) & 0x3f) { + case 0x0: + /* 1000000. ........ .111.... .1010111 */ + if (trans_vsetvl(ctx, &u.f_r)) return true; return false; } return false; @@ -1568,32 +4285,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:90 */ if (trans_beq(ctx, &u.f_b)) return true; return false; case 0x1: /* ........ ........ .001.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:91 */ if (trans_bne(ctx, &u.f_b)) return true; return false; case 0x4: /* ........ ........ .100.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:92 */ if (trans_blt(ctx, &u.f_b)) return true; return false; case 0x5: /* ........ ........ .101.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:93 */ if (trans_bge(ctx, &u.f_b)) return true; return false; case 0x6: /* ........ ........ .110.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:94 */ if (trans_bltu(ctx, &u.f_b)) return true; return false; case 0x7: /* ........ ........ .111.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:95 */ if (trans_bgeu(ctx, &u.f_b)) return true; return false; } @@ -1604,14 +4315,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .1100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:89 */ if (trans_jalr(ctx, &u.f_i)) return true; return false; } return false; case 0x0000006f: /* ........ ........ ........ .1101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:88 */ decode_insn32_extract_j(ctx, &u.f_j, insn); if (trans_jal(ctx, &u.f_j)) return true; return false; @@ -1623,21 +4332,18 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe000f80) { case 0x00000000: /* 0000000. ........ .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x3ff) { case 0x0: /* 00000000 00000000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:74 */ if (trans_ecall(ctx, &u.f_empty)) return true; return false; case 0x20: /* 00000000 00010000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:75 */ if (trans_ebreak(ctx, &u.f_empty)) return true; return false; case 0x40: /* 00000000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:76 */ if (trans_uret(ctx, &u.f_empty)) return true; return false; } @@ -1647,28 +4353,25 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 20) & 0x1f) { case 0x2: /* 00010000 0010.... .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x1f) { case 0x0: /* 00010000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:77 */ if (trans_sret(ctx, &u.f_empty)) return true; return false; } return false; case 0x4: /* 00010000 0100.... .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:83 */ - decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3215, insn); - if (trans_sfence_vm(ctx, &u.f_decode_insn3215)) return true; + decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3223, insn); + if (trans_sfence_vm(ctx, &u.f_decode_insn3223)) return true; return false; case 0x5: /* 00010000 0101.... .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x1f) { case 0x0: /* 00010000 01010000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:79 */ if (trans_wfi(ctx, &u.f_empty)) return true; return false; } @@ -1677,70 +4380,60 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) return false; case 0x12000000: /* 0001001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:82 */ - decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3214, insn); - if (trans_sfence_vma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3222, insn); + if (trans_sfence_vma(ctx, &u.f_decode_insn3222)) return true; return false; case 0x22000000: /* 0010001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:81 */ - decode_insn32_extract_hfence_bvma(ctx, &u.f_decode_insn3214, insn); - if (trans_hfence_bvma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_hfence_vvma(ctx, &u.f_decode_insn3222, insn); + if (trans_hfence_vvma(ctx, &u.f_decode_insn3222)) return true; return false; case 0x30000000: /* 0011000. ........ .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x3ff) { case 0x40: /* 00110000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:78 */ if (trans_mret(ctx, &u.f_empty)) return true; return false; } return false; case 0x62000000: /* 0110001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:80 */ - decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3214, insn); - if (trans_hfence_gvma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3222, insn); + if (trans_hfence_gvma(ctx, &u.f_decode_insn3222)) return true; return false; } return false; case 0x1: /* ........ ........ .001.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:125 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrw(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrw(ctx, &u.f_decode_insn3214)) return true; return false; case 0x2: /* ........ ........ .010.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:126 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrs(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrs(ctx, &u.f_decode_insn3214)) return true; return false; case 0x3: /* ........ ........ .011.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:127 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrc(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrc(ctx, &u.f_decode_insn3214)) return true; return false; case 0x5: /* ........ ........ .101.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:128 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrwi(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrwi(ctx, &u.f_decode_insn3214)) return true; return false; case 0x6: /* ........ ........ .110.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:129 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrsi(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrsi(ctx, &u.f_decode_insn3214)) return true; return false; case 0x7: /* ........ ........ .111.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:130 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrci(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrci(ctx, &u.f_decode_insn3214)) return true; return false; } return false; diff --git a/qemu/target/riscv/translate.c b/qemu/target/riscv/translate.c index 792bc12fd0..37f11cc481 100644 --- a/qemu/target/riscv/translate.c +++ b/qemu/target/riscv/translate.c @@ -56,6 +56,13 @@ typedef struct DisasContext { to reset this known value. */ int frm; bool ext_ifencei; + /* vector extension */ + bool vill; + uint8_t lmul; + uint8_t sew; + uint16_t vlen; + uint16_t mlen; + bool vl_eq_vlmax; // Unicorn struct uc_struct *uc; @@ -557,6 +564,11 @@ static void decode_RV32_64C(DisasContext *ctx, uint16_t opcode) } } +static int ex_plus_1(DisasContext *ctx, int nf) +{ + return nf + 1; +} + #define EX_SH(amount) \ static int ex_shift_##amount(DisasContext *ctx, int imm) \ { \ @@ -733,6 +745,8 @@ static bool gen_shift(DisasContext *ctx, arg_r *a, #include "insn_trans/trans_rva.inc.c" #include "insn_trans/trans_rvf.inc.c" #include "insn_trans/trans_rvd.inc.c" +#include "insn_trans/trans_rvh.inc.c" +#include "insn_trans/trans_rvv.inc.c" #include "insn_trans/trans_privileged.inc.c" /* Include the auto-generated decoder for 16 bit insn */ @@ -779,13 +793,14 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cs->env_ptr; RISCVCPU *cpu = RISCV_CPU(cs); + uint32_t tb_flags = ctx->base.tb->flags; // unicorn setup ctx->uc = cs->uc; ctx->pc_succ_insn = ctx->base.pc_first; - ctx->mem_idx = ctx->base.tb->flags & TB_FLAGS_MMU_MASK; - ctx->mstatus_fs = ctx->base.tb->flags & TB_FLAGS_MSTATUS_FS; + ctx->mem_idx = tb_flags & TB_FLAGS_MMU_MASK; + ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; ctx->priv_ver = env->priv_ver; if (riscv_has_ext(env, RVH)) { @@ -807,6 +822,12 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->misa = env->misa; ctx->frm = -1; /* unknown rounding mode */ ctx->ext_ifencei = cpu->cfg.ext_ifencei; + ctx->vlen = cpu->cfg.vlen; + ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); + ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); + ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL); + ctx->mlen = 1 << (ctx->sew + 3 - ctx->lmul); + ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); } static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -965,6 +986,7 @@ void riscv_translate_init(struct uc_struct *uc) } tcg_ctx->cpu_pc = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, pc), "pc"); + tcg_ctx->cpu_vl = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, vl), "vl"); tcg_ctx->load_res = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, load_res), "load_res"); tcg_ctx->load_val = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, load_val), diff --git a/qemu/target/riscv/vector_helper.c b/qemu/target/riscv/vector_helper.c new file mode 100644 index 0000000000..1c726edf0a --- /dev/null +++ b/qemu/target/riscv/vector_helper.c @@ -0,0 +1,4913 @@ +/* + * RISC-V Vector Extension Helpers for QEMU. + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/memop.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "fpu/softfloat.h" +#include "tcg/tcg-gvec-desc.h" +#include "internals.h" +#include + +target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, + target_ulong s2) +{ + int vlmax, vl; + RISCVCPU *cpu = env_archcpu(env); + uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); + uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); + bool vill = FIELD_EX64(s2, VTYPE, VILL); + target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); + + if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { + /* only set vill bit. */ + FIELD_DP64(0, VTYPE, VILL, 1, env->vtype); + env->vl = 0; + env->vstart = 0; + return 0; + } + + vlmax = vext_get_vlmax(cpu, s2); + if (s1 <= vlmax) { + vl = s1; + } else { + vl = vlmax; + } + env->vl = vl; + env->vtype = s2; + env->vstart = 0; + return vl; +} + +/* + * Note that vector data is stored in host-endian 64-bit chunks, + * so addressing units smaller than that needs a host-endian fixup. + */ +#ifdef HOST_WORDS_BIGENDIAN +#define H1(x) ((x) ^ 7) +#define H1_2(x) ((x) ^ 6) +#define H1_4(x) ((x) ^ 4) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#define H8(x) ((x)) +#else +#define H1(x) (x) +#define H1_2(x) (x) +#define H1_4(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#define H8(x) (x) +#endif + +static inline uint32_t vext_nf(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, NF); +} + +static inline uint32_t vext_mlen(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, MLEN); +} + +static inline uint32_t vext_vm(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VM); +} + +static inline uint32_t vext_lmul(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, LMUL); +} + +static uint32_t vext_wd(uint32_t desc) +{ + return (simd_data(desc) >> 11) & 0x1; +} + +/* + * Get vector group length in bytes. Its range is [64, 2048]. + * + * As simd_desc support at most 256, the max vlen is 512 bits. + * So vlen in bytes is encoded as maxsz. + */ +static inline uint32_t vext_maxsz(uint32_t desc) +{ + return simd_maxsz(desc) << vext_lmul(desc); +} + +/* + * This function checks watchpoint before real load operation. + * + * In softmmu mode, the TLB API probe_access is enough for watchpoint check. + * In user mode, there is no watchpoint support now. + * + * It will trigger an exception if there is no mapping in TLB + * and page table walk can't fill the TLB entry. Then the guest + * software can return here after process the exception or never return. + */ +static void probe_pages(CPURISCVState *env, target_ulong addr, + target_ulong len, uintptr_t ra, + MMUAccessType access_type) +{ + target_ulong pagelen = -(addr | TARGET_PAGE_MASK); + target_ulong curlen = MIN(pagelen, len); + + probe_access(env, addr, curlen, access_type, + cpu_mmu_index(env, false), ra); + if (len > curlen) { + addr += curlen; + curlen = len - curlen; + probe_access(env, addr, curlen, access_type, + cpu_mmu_index(env, false), ra); + } +} + +#ifdef HOST_WORDS_BIGENDIAN +static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) +{ + /* + * Split the remaining range to two parts. + * The first part is in the last uint64_t unit. + * The second part start from the next uint64_t unit. + */ + int part1 = 0, part2 = tot - cnt; + if (cnt % 8) { + part1 = 8 - (cnt % 8); + part2 = tot - cnt - part1; + memset(QEMU_ALIGN_PTR_DOWN(tail, 8), 0, part1); + memset(QEMU_ALIGN_PTR_UP(tail, 8), 0, part2); + } else { + memset(tail, 0, part2); + } +} +#else +static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) +{ + memset(tail, 0, tot - cnt); +} +#endif + +static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int8_t *cur = ((int8_t *)vd + H1(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int16_t *cur = ((int16_t *)vd + H2(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int32_t *cur = ((int32_t *)vd + H4(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int64_t *cur = (int64_t *)vd + idx; + vext_clear(cur, cnt, tot); +} + +static inline void vext_set_elem_mask(void *v0, int mlen, int index, + uint8_t value) +{ + int idx = (index * mlen) / 64; + int pos = (index * mlen) % 64; + uint64_t old = ((uint64_t *)v0)[idx]; + ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value); +} + +static inline int vext_elem_mask(void *v0, int mlen, int index) +{ + int idx = (index * mlen) / 64; + int pos = (index * mlen) % 64; + return (((uint64_t *)v0)[idx] >> pos) & 1; +} + +/* elements operations for load and store */ +typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, + uint32_t idx, void *vd, uintptr_t retaddr); +typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot); + +#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ +static void NAME(CPURISCVState *env, abi_ptr addr, \ + uint32_t idx, void *vd, uintptr_t retaddr)\ +{ \ + MTYPE data; \ + ETYPE *cur = ((ETYPE *)vd + H(idx)); \ + data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ + *cur = data; \ +} \ + +GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) +GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) +GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) +GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) +GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) +GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) +GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) +GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) +GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) +GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) +GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) +GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) +GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) +GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) +GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) +GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) +GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) +GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) +GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) +GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) +GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) +GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) + +#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ +static void NAME(CPURISCVState *env, abi_ptr addr, \ + uint32_t idx, void *vd, uintptr_t retaddr)\ +{ \ + ETYPE data = *((ETYPE *)vd + H(idx)); \ + cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ +} + +GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) +GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) +GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) +GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) +GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) +GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) +GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) +GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) +GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) +GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) +GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) +GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) +GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) + +/* + *** stride: access vector element from strided memory + */ +static void +vext_ldst_stride(void *vd, void *v0, target_ulong base, + target_ulong stride, CPURISCVState *env, + uint32_t desc, uint32_t vm, + vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, base + stride * i, nf * msz, ra, access_type); + } + /* do real access */ + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + target_ulong addr = base + stride * i + k * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ + target_ulong stride, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vm = vext_vm(desc); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + target_ulong stride, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vm = vext_vm(desc); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ + NULL, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_STORE); \ +} + +GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) +GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) +GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) +GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) +GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) +GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) +GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) +GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) +GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) +GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) +GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) +GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) +GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) + +/* + *** unit-stride: access elements stored contiguously in memory + */ + +/* unmasked unit-stride load and store operation*/ +static void +vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, + vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access */ + probe_pages(env, base, env->vl * nf * msz, ra, access_type); + /* load bytes from guest memory */ + for (i = 0; i < env->vl; i++) { + k = 0; + while (k < nf) { + target_ulong addr = base + (i * nf + k) * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +/* + * masked unit-stride load and store operation will be a special case of stride, + * stride = NF * sizeof (MTYPE) + */ + +#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} \ + \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ +void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ + NULL, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_STORE); \ +} \ + \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_us(vd, base, env, desc, STORE_FN, NULL, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ +} + +GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) +GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) +GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) +GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) +GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) +GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) +GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) +GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) +GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) +GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) +GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) +GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) +GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) + +/* + *** index: access vector element from indexed memory + */ +typedef target_ulong vext_get_index_addr(target_ulong base, + uint32_t idx, void *vs2); + +#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ +static target_ulong NAME(target_ulong base, \ + uint32_t idx, void *vs2) \ +{ \ + return (base + *((ETYPE *)vs2 + H(idx))); \ +} + +GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) +GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) +GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) +GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) + +static inline void +vext_ldst_index(void *vd, void *v0, target_ulong base, + void *vs2, CPURISCVState *env, uint32_t desc, + vext_get_index_addr get_index_addr, + vext_ldst_elem_fn *ldst_elem, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t vm = vext_vm(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, + access_type); + } + /* load bytes from guest memory */ + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ + LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b, clearb) +GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h, clearh) +GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w, clearl) +GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d, clearq) +GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h, clearh) +GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w, clearl) +GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d, clearq) +GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w, clearl) +GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d, clearq) +GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b, clearb) +GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h, clearh) +GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w, clearl) +GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d, clearq) +GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b, clearb) +GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h, clearh) +GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w, clearl) +GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d, clearq) +GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh) +GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl) +GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq) +GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl) +GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq) + +#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ + STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\ + GETPC(), MMU_DATA_STORE); \ +} + +GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) +GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) +GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) +GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) +GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) +GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) +GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) +GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) +GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) +GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) +GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) +GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) +GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) + +/* + *** unit-stride fault-only-fisrt load instructions + */ +static inline void +vext_ldff(void *vd, void *v0, target_ulong base, + CPURISCVState *env, uint32_t desc, + vext_ldst_elem_fn *ldst_elem, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra) +{ + void *host; + uint32_t i, k, vl = 0; + uint32_t mlen = vext_mlen(desc); + uint32_t nf = vext_nf(desc); + uint32_t vm = vext_vm(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + target_ulong addr, offset, remain; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + addr = base + nf * i * msz; + if (i == 0) { + probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); + } else { + /* if it triggers an exception, no need to check watchpoint */ + remain = nf * msz; + while (remain > 0) { + offset = -(addr | TARGET_PAGE_MASK); + host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, + cpu_mmu_index(env, false)); + if (host) { +#ifdef CONFIG_USER_ONLY + if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { + vl = i; + goto ProbeSuccess; + } +#else + probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); +#endif + } else { + vl = i; + goto ProbeSuccess; + } + if (remain <= offset) { + break; + } + remain -= offset; + addr += offset; + } + } + } +ProbeSuccess: + /* load bytes from guest memory */ + if (vl != 0) { + env->vl = vl; + } + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + target_ulong addr = base + (i * nf + k) * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (vl != 0) { + return; + } + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } +} + +#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ +} + +GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +/* + *** Vector AMO Operations (Zvamo) + */ +typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr, + uint32_t wd, uint32_t idx, CPURISCVState *env, + uintptr_t retaddr); + +/* no atomic opreation for vector atomic insructions */ +#define DO_SWAP(N, M) (M) +#define DO_AND(N, M) (N & M) +#define DO_XOR(N, M) (N ^ M) +#define DO_OR(N, M) (N | M) +#define DO_ADD(N, M) (N + M) + +#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \ +static void \ +vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \ + uint32_t wd, uint32_t idx, \ + CPURISCVState *env, uintptr_t retaddr)\ +{ \ + typedef int##ESZ##_t ETYPE; \ + typedef int##MSZ##_t MTYPE; \ + typedef uint##MSZ##_t UMTYPE UNICORN_UNUSED; \ + ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \ + MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \ + \ + cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \ + if (wd) { \ + *pe3 = a; \ + } \ +} + +/* Signed min/max */ +#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) +#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) + +/* Unsigned min/max */ +#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) +#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) + +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l) +#ifdef TARGET_RISCV64 +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q) +#endif + +static inline void +vext_amo_noatomic(void *vs3, void *v0, target_ulong base, + void *vs2, CPURISCVState *env, uint32_t desc, + vext_get_index_addr get_index_addr, + vext_amo_noatomic_fn *noatomic_op, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra) +{ + uint32_t i; + target_long addr; + uint32_t wd = vext_wd(desc); + uint32_t vm = vext_vm(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD); + probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE); + } + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + addr = get_index_addr(base, i, vs2); + noatomic_op(vs3, addr, wd, i, env, ra); + } + clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz); +} + +#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \ + INDEX_FN, vext_##NAME##_noatomic_op, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC()); \ +} + +#ifdef TARGET_RISCV64 +GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq) +#endif +GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) +GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) + +/* + *** Vector Integer Arithmetic Instructions + */ + +/* expand macro args before macro */ +#define RVVCALL(macro, ...) macro(__VA_ARGS__) + +/* (TD, T1, T2, TX1, TX2) */ +#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t +#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t +#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t +#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t +#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t +#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t +#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t +#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t +#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t +#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t +#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t +#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t +#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t +#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t +#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t +#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t +#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t +#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t +#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t +#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t +#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t +#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t +#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t +#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t +#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t +#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t +#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t + +/* operation of two vector elements */ +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); + +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1); \ +} +#define DO_SUB(N, M) (N - M) +#define DO_RSUB(N, M) (M - N) + +RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) +RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) + +static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, uint32_t desc, + uint32_t esz, uint32_t dsz, + opivv2_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, vs1, vs2, i); + } + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate the helpers for OPIVV */ +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) + +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); + +/* + * (T1)s1 gives the real operator type. + * (TX1)(T1)s1 expands the operator type of widen or narrow operations. + */ +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ +} + +RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) +RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) +RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) + +static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, uint32_t desc, + uint32_t esz, uint32_t dsz, + opivx2_fn fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, s1, vs2, i); + } + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate the helpers for OPIVX */ +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq) + +void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + *(uint8_t *)((char*)d + i) = (uint8_t)b - *(uint8_t *)((char*)a + i); + } +} + +void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + *(uint16_t *)((char*)d + i) = (uint16_t)b - *(uint16_t *)((char*)a + i); + } +} + +void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + *(uint32_t *)((char*)d + i) = (uint32_t)b - *(uint32_t *)((char*)a + i); + } +} + +void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + *(uint64_t *)((char*)d + i) = b - *(uint64_t *)((char*)a + i); + } +} + +/* Vector Widening Integer Add/Subtract */ +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t +#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t +#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t +#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t +#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t +#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t +#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t +#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t +#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t +#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t +RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) +GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq) + +RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) +GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq) + +/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ +#define DO_VADC(N, M, C) (N + M + C) +#define DO_VSBC(N, M, C) (N - M - C) + +#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC, clearb) +GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh) +GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl) +GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq) + +GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC, clearb) +GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh) +GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl) +GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq) + +#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC, clearb) +GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh) +GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl) +GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq) + +GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC, clearb) +GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh) +GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl) +GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq) + +#ifdef _MSC_VER + #define DO_MADC(N, M, C) (C ? ((N) + (M) + 1) <= (N) : \ + ((N) + (M)) < (N)) +#else + #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ + (__typeof(N))(N + M) < N) +#endif +#define DO_MSBC(N, M, C) (C ? N <= M : N < M) + +#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) + +GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) + +#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) + +GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) + +/* Vector Bitwise Logical Instructions */ +RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) +RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) +RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) +RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) +RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) +RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) +RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) +RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) +RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) +GEN_VEXT_VV(vand_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vand_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vand_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vand_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vor_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vor_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vor_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vor_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) +RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) +RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) +RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) +RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) +RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) +RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) +RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) +RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) +GEN_VEXT_VX(vand_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vand_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vand_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vand_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vor_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vor_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vor_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vor_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq) + +/* Vector Single-Width Bit Shift Instructions */ +#define DO_SLL(N, M) (N << (M)) +#define DO_SRL(N, M) (N >> (M)) + +/* generate the helpers for shift instructions with two vector operators */ +#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TS1); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) + +/* generate the helpers for shift instructions with one vector and one scalar */ +#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) + +/* Vector Narrowing Integer Right Shift Instructions */ +GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) + +/* Vector Integer Comparison Instructions */ +#define DO_MSEQ(N, M) (N == M) +#define DO_MSNE(N, M) (N != M) +#define DO_MSLT(N, M) (N < M) +#define DO_MSLE(N, M) (N <= M) +#define DO_MSGT(N, M) (N > M) + +#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) + +GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) + +GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) + +#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)(target_long)s1)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) + +GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) + +GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) + +GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) + +/* Vector Integer Min/Max Instructions */ +RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) +RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) +GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) +RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) +GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmaxu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq) + +/* Vector Single-Width Integer Multiply Instructions */ +#define DO_MUL(N, M) (N * M) +RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) +GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq) + +static int8_t do_mulh_b(int8_t s2, int8_t s1) +{ + return (int16_t)s2 * (int16_t)s1 >> 8; +} + +static int16_t do_mulh_h(int16_t s2, int16_t s1) +{ + return (int32_t)s2 * (int32_t)s1 >> 16; +} + +static int32_t do_mulh_w(int32_t s2, int32_t s1) +{ + return (int64_t)s2 * (int64_t)s1 >> 32; +} + +static int64_t do_mulh_d(int64_t s2, int64_t s1) +{ + uint64_t hi_64, lo_64; + + muls64(&lo_64, &hi_64, s1, s2); + return hi_64; +} + +static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) +{ + return (uint16_t)s2 * (uint16_t)s1 >> 8; +} + +static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) +{ + return (uint32_t)s2 * (uint32_t)s1 >> 16; +} + +static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) +{ + return (uint64_t)s2 * (uint64_t)s1 >> 32; +} + +static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) +{ + uint64_t hi_64, lo_64; + + mulu64(&lo_64, &hi_64, s2, s1); + return hi_64; +} + +static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) +{ + return (int16_t)s2 * (uint16_t)s1 >> 8; +} + +static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) +{ + return (int32_t)s2 * (uint32_t)s1 >> 16; +} + +static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) +{ + return (int64_t)s2 * (uint64_t)s1 >> 32; +} + +/* + * Let A = signed operand, + * B = unsigned operand + * P = mulu64(A, B), unsigned product + * + * LET X = 2 ** 64 - A, 2's complement of A + * SP = signed product + * THEN + * IF A < 0 + * SP = -X * B + * = -(2 ** 64 - A) * B + * = A * B - 2 ** 64 * B + * = P - 2 ** 64 * B + * ELSE + * SP = P + * THEN + * HI_P -= (A < 0 ? B : 0) + */ + +static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) +{ + uint64_t hi_64, lo_64; + + mulu64(&lo_64, &hi_64, s2, s1); + + hi_64 -= s2 < 0 ? s1 : 0; + return hi_64; +} + +RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) +RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) +RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) +RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) +RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) +RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) +RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) +RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) +RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) +RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) +RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) +RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) +GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) +RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) +RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) +RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) +RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) +RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) +RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) +RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) +RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) +RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) +RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) +RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) +RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) +GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq) + +/* Vector Integer Divide Instructions */ +#ifdef _MSC_VER + #define DO_DIVU(N, M) (unlikely(M == 0) ? (UINT64_MAX) : (N) / (M)) + #define DO_REMU(N, M) (unlikely(M == 0) ? (N) : (N) % (M)) + #define DO_DIV(N, M) (unlikely(M == 0) ? (-1) :\ + unlikely((N == -(N)) && (M == -1)) ? (N) : (N) / (M)) + #define DO_REM(N, M) (unlikely(M == 0) ? (N) :\ + unlikely((N == -(N)) && (M == -1)) ? 0 : (N) % (M)) +#else + #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) + #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) + #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) + #define DO_REM(N, M) (unlikely(M == 0) ? N :\ + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) +#endif + +RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) +RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) +RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) +RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) +RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) +RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) +RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) +GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) +RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) +RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) +RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) +RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) +RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) +RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) +GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq) + +/* Vector Widening Integer Multiply Instructions */ +RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) +GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq) + +RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) +GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq) + +/* Vector Single-Width Integer Multiply-Add Instructions */ +#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, d); \ +} + +#define DO_MACC(N, M, D) (M * N + D) +#define DO_NMSAC(N, M, D) (-(M * N) + D) +#define DO_MADD(N, M, D) (M * D + N) +#define DO_NMSUB(N, M, D) (-(M * D) + N) +RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) +RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) +RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) +RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) +GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq) + +#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ +} + +RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) +RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) +RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) +RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) +GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq) + +/* Vector Widening Integer Multiply-Add Instructions */ +RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) +GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq) + +RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) +GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq) + +/* Vector Integer Merge and Move Instructions */ +#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + *((ETYPE *)vd + H(i)) = s1; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1, clearb) +GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh) +GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl) +GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1, clearb) +GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh) +GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl) +GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1); \ + *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1, clearb) +GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh) +GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl) +GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 : \ + (ETYPE)(target_long)s1); \ + *((ETYPE *)vd + H(i)) = d; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb) +GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh) +GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl) +GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq) + +/* + *** Vector Fixed-Point Arithmetic Instructions + */ + +/* Vector Single-Width Saturating Add and Subtract */ + +/* + * As fixed point instructions probably have round mode and saturation, + * define common macros for fixed point here. + */ +typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, + CPURISCVState *env, int vxrm); + +#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ +} + +static inline void +vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, + uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, + opivv2_rm_fn *fn) +{ + for (uint32_t i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, vs1, vs2, i, env, vxrm); + } +} + +static inline void +vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, + uint32_t desc, uint32_t esz, uint32_t dsz, + opivv2_rm_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + + switch (env->vxrm) { + case 0: /* rnu */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 0, fn); + break; + case 1: /* rne */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 1, fn); + break; + case 2: /* rdn */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 2, fn); + break; + default: /* rod */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 3, fn); + break; + } + + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate helpers for fixed point instructions with OPIVV format */ +#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t res = a + b; + if (res < a) { + res = UINT8_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, + uint16_t b) +{ + uint16_t res = a + b; + if (res < a) { + res = UINT16_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, + uint32_t b) +{ + uint32_t res = a + b; + if (res < a) { + res = UINT32_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, + uint64_t b) +{ + uint64_t res = a + b; + if (res < a) { + res = UINT64_MAX; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) +RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) +RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) +RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) +GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq) + +typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, + CPURISCVState *env, int vxrm); + +#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ +} + +static inline void +vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, + uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, + opivx2_rm_fn *fn) +{ + for (uint32_t i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, s1, vs2, i, env, vxrm); + } +} + +static inline void +vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, + uint32_t desc, uint32_t esz, uint32_t dsz, + opivx2_rm_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + + switch (env->vxrm) { + case 0: /* rnu */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 0, fn); + break; + case 1: /* rne */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 1, fn); + break; + case 2: /* rdn */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 2, fn); + break; + default: /* rod */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 3, fn); + break; + } + + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate helpers for fixed point instructions with OPIVX format */ +#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) +RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) +RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) +RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) +GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq) + +static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + int8_t res = a + b; + if ((res ^ a) & (res ^ b) & INT8_MIN) { + res = a > 0 ? INT8_MAX : INT8_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + int16_t res = a + b; + if ((res ^ a) & (res ^ b) & INT16_MIN) { + res = a > 0 ? INT16_MAX : INT16_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int32_t res = a + b; + if ((res ^ a) & (res ^ b) & INT32_MIN) { + res = a > 0 ? INT32_MAX : INT32_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a + b; + if ((res ^ a) & (res ^ b) & INT64_MIN) { + res = a > 0 ? INT64_MAX : INT64_MIN; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) +RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) +RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) +RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) +GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) +RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) +RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) +RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) +GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq) + +static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, + uint16_t b) +{ + uint16_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, + uint32_t b) +{ + uint32_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, + uint64_t b) +{ + uint64_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) +RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) +RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) +RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) +GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) +RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) +RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) +RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) +GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq) + +static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + int8_t res = a - b; + if ((res ^ a) & (a ^ b) & INT8_MIN) { + res = a > 0 ? INT8_MAX : INT8_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + int16_t res = a - b; + if ((res ^ a) & (a ^ b) & INT16_MIN) { + res = a > 0 ? INT16_MAX : INT16_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int32_t res = a - b; + if ((res ^ a) & (a ^ b) & INT32_MIN) { + res = a > 0 ? INT32_MAX : INT32_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a - b; + if ((res ^ a) & (a ^ b) & INT64_MIN) { + res = a > 0 ? INT64_MAX : INT64_MIN; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) +RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) +RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) +RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) +GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) +RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) +RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) +RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) +GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq) + +/* Vector Single-Width Averaging Add and Subtract */ +static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) +{ + uint8_t d = extract64(v, shift, 1); + uint8_t d1; + uint64_t D1, D2; + + if (shift == 0 || shift > 64) { + return 0; + } + + d1 = extract64(v, shift - 1, 1); + D1 = extract64(v, 0, shift); + if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ + return d1; + } else if (vxrm == 1) { /* round-to-nearest-even */ + if (shift > 1) { + D2 = extract64(v, 0, shift - 1); + return d1 & ((D2 != 0) | d); + } else { + return d1 & d; + } + } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ + return !d & (D1 != 0); + } + return 0; /* round-down (truncate) */ +} + +static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int64_t res = (int64_t)a + b; + uint8_t round = get_round(vxrm, res, 1); + + return (res >> 1) + round; +} + +static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a + b; + uint8_t round = get_round(vxrm, res, 1); + int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; + + /* With signed overflow, bit 64 is inverse of bit 63. */ + return ((res >> 1) ^ over) + round; +} + +RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) +GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) +GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq) + +static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int64_t res = (int64_t)a - b; + uint8_t round = get_round(vxrm, res, 1); + + return (res >> 1) + round; +} + +static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = (int64_t)a - b; + uint8_t round = get_round(vxrm, res, 1); + int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; + + /* With signed overflow, bit 64 is inverse of bit 63. */ + return ((res >> 1) ^ over) + round; +} + +RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) +GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) +GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq) + +/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ +static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + uint8_t round; + int16_t res; + + res = (int16_t)a * (int16_t)b; + round = get_round(vxrm, res, 7); + res = (res >> 7) + round; + + if (res > INT8_MAX) { + env->vxsat = 0x1; + return INT8_MAX; + } else if (res < INT8_MIN) { + env->vxsat = 0x1; + return INT8_MIN; + } else { + return res; + } +} + +static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + uint8_t round; + int32_t res; + + res = (int32_t)a * (int32_t)b; + round = get_round(vxrm, res, 15); + res = (res >> 15) + round; + + if (res > INT16_MAX) { + env->vxsat = 0x1; + return INT16_MAX; + } else if (res < INT16_MIN) { + env->vxsat = 0x1; + return INT16_MIN; + } else { + return res; + } +} + +static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + uint8_t round; + int64_t res; + + res = (int64_t)a * (int64_t)b; + round = get_round(vxrm, res, 31); + res = (res >> 31) + round; + + if (res > INT32_MAX) { + env->vxsat = 0x1; + return INT32_MAX; + } else if (res < INT32_MIN) { + env->vxsat = 0x1; + return INT32_MIN; + } else { + return res; + } +} + +static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + uint8_t round; + uint64_t hi_64, lo_64; + int64_t res; + + if (a == INT64_MIN && b == INT64_MIN) { + env->vxsat = 1; + return INT64_MAX; + } + + muls64(&lo_64, &hi_64, a, b); + round = get_round(vxrm, lo_64, 63); + /* + * Cannot overflow, as there are always + * 2 sign bits after multiply. + */ + res = (hi_64 << 1) | (lo_64 >> 63); + if (round) { + if (res == INT64_MAX) { + env->vxsat = 1; + } else { + res += 1; + } + } + return res; +} + +RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) +RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) +RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) +RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) +GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) +RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) +RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) +RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) +GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) + +/* Vector Widening Saturating Scaled Multiply-Add */ +static inline uint16_t +vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, + uint16_t c) +{ + uint8_t round; + uint16_t res = (uint16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return saddu16(env, vxrm, c, res); +} + +static inline uint32_t +vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, + uint32_t c) +{ + uint8_t round; + uint32_t res = (uint32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return saddu32(env, vxrm, c, res); +} + +static inline uint64_t +vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, + uint64_t c) +{ + uint8_t round; + uint64_t res = (uint64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return saddu64(env, vxrm, c, res); +} + +#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ +} + +RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) +RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) +RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) +GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq) + +#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ +} + +RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) +RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) +RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) +GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) +{ + uint8_t round; + int16_t res = (int16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return sadd16(env, vxrm, c, res); +} + +static inline int32_t +vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) +{ + uint8_t round; + int32_t res = (int32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return sadd32(env, vxrm, c, res); + +} + +static inline int64_t +vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) +{ + uint8_t round; + int64_t res = (int64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return sadd64(env, vxrm, c, res); +} + +RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) +RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) +RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) +GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq) +RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) +RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) +RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) +GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) +{ + uint8_t round; + int16_t res = a * (int16_t)b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return ssub16(env, vxrm, c, res); +} + +static inline int32_t +vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) +{ + uint8_t round; + int32_t res = a * (int32_t)b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return ssub32(env, vxrm, c, res); +} + +static inline int64_t +vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) +{ + uint8_t round; + int64_t res = a * (int64_t)b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return ssub64(env, vxrm, c, res); +} + +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) +GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) +GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) +{ + uint8_t round; + int16_t res = (int16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return ssub16(env, vxrm, c, res); +} + +static inline int32_t +vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) +{ + uint8_t round; + int32_t res = (int32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return ssub32(env, vxrm, c, res); +} + +static inline int64_t +vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) +{ + uint8_t round; + int64_t res = (int64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return ssub64(env, vxrm, c, res); +} + +RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) +RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) +RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) +GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq) + +/* Vector Single-Width Scaling Shift Instructions */ +static inline uint8_t +vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t round, shift = b & 0x7; + uint8_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint16_t +vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) +{ + uint8_t round, shift = b & 0xf; + uint16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint32_t +vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) +{ + uint8_t round, shift = b & 0x1f; + uint32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint64_t +vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) +{ + uint8_t round, shift = b & 0x3f; + uint64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) +RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) +RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) +RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) +GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) +RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) +RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) +RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) +GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq) + +static inline int8_t +vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + uint8_t round, shift = b & 0x7; + int8_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int16_t +vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + uint8_t round, shift = b & 0xf; + int16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int32_t +vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + uint8_t round, shift = b & 0x1f; + int32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int64_t +vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} + +RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) +RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) +RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) +RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) +GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) +RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) +RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) +RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) +GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq) + +/* Vector Narrowing Fixed-Point Clip Instructions */ +static inline int8_t +vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) +{ + uint8_t round, shift = b & 0xf; + int16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT8_MAX) { + env->vxsat = 0x1; + return INT8_MAX; + } else if (res < INT8_MIN) { + env->vxsat = 0x1; + return INT8_MIN; + } else { + return res; + } +} + +static inline int16_t +vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) +{ + uint8_t round, shift = b & 0x1f; + int32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT16_MAX) { + env->vxsat = 0x1; + return INT16_MAX; + } else if (res < INT16_MIN) { + env->vxsat = 0x1; + return INT16_MIN; + } else { + return res; + } +} + +static inline int32_t +vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT32_MAX) { + env->vxsat = 0x1; + return INT32_MAX; + } else if (res < INT32_MIN) { + env->vxsat = 0x1; + return INT32_MIN; + } else { + return res; + } +} + +RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) +RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) +RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) +GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl) + +RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) +RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) +RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) +GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl) + +static inline uint8_t +vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) +{ + uint8_t round, shift = b & 0xf; + uint16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT8_MAX) { + env->vxsat = 0x1; + return UINT8_MAX; + } else { + return res; + } +} + +static inline uint16_t +vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) +{ + uint8_t round, shift = b & 0x1f; + uint32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT16_MAX) { + env->vxsat = 0x1; + return UINT16_MAX; + } else { + return res; + } +} + +static inline uint32_t +vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT32_MAX) { + env->vxsat = 0x1; + return UINT32_MAX; + } else { + return res; + } +} + +RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) +RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) +RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) +GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl) + +RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) +RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) +RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) +GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl) + +/* + *** Vector Float Point Arithmetic Instructions + */ +/* Vector Single-Width Floating-Point Add/Subtract Instructions */ +#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ +} + +#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs1, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) +RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) +RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) +GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq) + +#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ +} + +#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, s1, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) +RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) +RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) +GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) +RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) +RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) +GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) +RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) +RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) +GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq) + +static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) +{ + return float16_sub(b, a, s); +} + +static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) +{ + return float32_sub(b, a, s); +} + +static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) +{ + return float64_sub(b, a, s); +} + +RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) +RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) +RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) +GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Add/Subtract Instructions */ +static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_add(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_add(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} + +RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) +RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) +GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) +RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) +GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq) + +static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_sub(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_sub(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} + +RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) +RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) +GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) +RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) +GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq) + +static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) +{ + return float32_add(a, float16_to_float32(b, true, s), s); +} + +static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) +{ + return float64_add(a, float32_to_float64(b, s), s); +} + +RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) +RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) +GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) +RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) +GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq) + +static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) +{ + return float32_sub(a, float16_to_float32(b, true, s), s); +} + +static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) +{ + return float64_sub(a, float32_to_float64(b, s), s); +} + +RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) +RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) +GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) +RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) +GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq) + +/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ +RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) +RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) +RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) +GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) +RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) +RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) +GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) +RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) +RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) +RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) +RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) +GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq) + +static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) +{ + return float16_div(b, a, s); +} + +static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) +{ + return float32_div(b, a, s); +} + +static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) +{ + return float64_div(b, a, s); +} + +RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) +RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) +RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) +GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Multiply */ +static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_mul(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_mul(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} +RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) +RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) +GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) +RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) +GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq) + +/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ +#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ +} + +static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, 0, s); +} + +static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, 0, s); +} + +static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, 0, s); +} + +RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) +RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) +RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq) + +#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ +} + +RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) +RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) +RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) +GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq) + +static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) +RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) +RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) +RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) +RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) +GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq) + +static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, float_muladd_negate_c, s); +} + +static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, float_muladd_negate_c, s); +} + +static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) +RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) +RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) +RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) +RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) +GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq) + +static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, float_muladd_negate_product, s); +} + +static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, float_muladd_negate_product, s); +} + +static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) +RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) +RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) +RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) +RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) +GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq) + +static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, 0, s); +} + +static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, 0, s); +} + +static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, 0, s); +} + +RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) +RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) +RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) +RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) +RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) +GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq) + +static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) +RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) +RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) +RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) +RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) +GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq) + +static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, float_muladd_negate_c, s); +} + +static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, float_muladd_negate_c, s); +} + +static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) +RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) +RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) +RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) +RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) +GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq) + +static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, float_muladd_negate_product, s); +} + +static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, float_muladd_negate_product, s); +} + +static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) +RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) +RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) +RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) +RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) +GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ +static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, 0, s); +} + +static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, 0, s); +} + +RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) +RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) +RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) +GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq) + +static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) +RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) +RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) +GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq) + +static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_c, s); +} + +static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) +RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) +RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) +GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq) + +static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_product, s); +} + +static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) +RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) +RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) +GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq) + +/* Vector Floating-Point Square-Root Instruction */ +/* (TD, T2, TX2) */ +#define OP_UU_H uint16_t, uint16_t, uint16_t +#define OP_UU_W uint32_t, uint32_t, uint32_t +#define OP_UU_D uint64_t, uint64_t, uint64_t + +#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ +} + +#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + if (vl == 0) { \ + return; \ + } \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) +RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) +RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) +GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq) + +/* Vector Floating-Point MIN/MAX Instructions */ +RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) +RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) +RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) +GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) +RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) +RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) +GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) +RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) +RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) +GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) +RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) +RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) +GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq) + +/* Vector Floating-Point Sign-Injection Instructions */ +static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(b, 0, 15, a); +} + +static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(b, 0, 31, a); +} + +static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(b, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) +RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) +RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) +RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) +RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) +GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq) + +static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(~b, 0, 15, a); +} + +static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(~b, 0, 31, a); +} + +static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(~b, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) +RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) +RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) +RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) +RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) +GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq) + +static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 15, a); +} + +static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 31, a); +} + +static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) +RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) +RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) +RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) +RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) +GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq) + +/* Vector Floating-Point Compare Instructions */ +#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, s1, &env->fp_status)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare == float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) + +#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) +GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) +GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) + +static bool vmfne16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +static bool vmfne32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +static bool vmfne64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) +GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) +GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) +GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) +GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) +GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) + +static bool float16_lt(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_less; +} + +GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) +GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) +GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) +GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) +GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) +GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) + +static bool float16_le(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_less || + compare == float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) +GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) +GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) +GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) +GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) +GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) + +static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_greater; +} + +static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare(a, b, s); + return compare == float_relation_greater; +} + +static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare(a, b, s); + return compare == float_relation_greater; +} + +GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) +GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) +GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) + +static bool vmfge16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +static bool vmfge32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +static bool vmfge64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) +GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) +GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) + +static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare == float_relation_unordered; +} + +GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) +GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) +GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) + +/* Vector Floating-Point Classify Instruction */ +#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2); \ +} + +#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs2, i); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +target_ulong fclass_h(uint64_t frs1) +{ + float16 f = frs1; + bool sign = float16_is_neg(f); + + if (float16_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float16_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float16_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float16_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +target_ulong fclass_s(uint64_t frs1) +{ + float32 f = frs1; + bool sign = float32_is_neg(f); + + if (float32_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float32_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float32_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float32_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +target_ulong fclass_d(uint64_t frs1) +{ + float64 f = frs1; + bool sign = float64_is_neg(f); + + if (float64_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float64_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float64_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float64_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) +RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) +RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) +GEN_VEXT_V(vfclass_v_h, 2, 2, clearh) +GEN_VEXT_V(vfclass_v_w, 4, 4, clearl) +GEN_VEXT_V(vfclass_v_d, 8, 8, clearq) + +/* Vector Floating-Point Merge Instruction */ +#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + *((ETYPE *)vd + H(i)) \ + = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh) +GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl) +GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq) + +/* Single-Width Floating-Point/Integer Type-Convert Instructions */ +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ +RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) +RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) +RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq) + +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ +RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) +RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) +RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq) + +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ +RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) +RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) +RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq) + +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ +RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) +RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) +RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq) + +/* Widening Floating-Point/Integer Type-Convert Instructions */ +/* (TD, T2, TX2) */ +#define WOP_UU_H uint32_t, uint16_t, uint16_t +#define WOP_UU_W uint64_t, uint32_t, uint32_t +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ +RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) +RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq) + +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ +RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) +RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq) + +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ +RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) +RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq) + +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ +RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) +RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq) + +/* + * vfwcvt.f.f.v vd, vs2, vm # + * Convert single-width float to double-width float. + */ +static uint32_t vfwcvtffv16(uint16_t a, float_status *s) +{ + return float16_to_float32(a, true, s); +} + +RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) +RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq) + +/* Narrowing Floating-Point/Integer Type-Convert Instructions */ +/* (TD, T2, TX2) */ +#define NOP_UU_H uint16_t, uint32_t, uint32_t +#define NOP_UU_W uint32_t, uint64_t, uint64_t +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ +RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) +RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) +GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl) + +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ +RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) +RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) +GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl) + +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ +RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) +RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl) + +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ +RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) +RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl) + +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ +static uint16_t vfncvtffv16(uint32_t a, float_status *s) +{ + return float32_to_float16(a, true, s); +} + +RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) +RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl) + +/* + *** Vector Reduction Operations + */ +/* Vector Single-Width Integer Reduction Instructions */ +#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ + TD s1 = *((TD *)vs1 + HD(0)); \ + \ + for (i = 0; i < vl; i++) { \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + s1 = OP(s1, (TD)s2); \ + } \ + *((TD *)vd + HD(0)) = s1; \ + CLEAR_FN(vd, 1, sizeof(TD), tot); \ +} + +/* vd[0] = sum(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb) +GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh) +GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl) +GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq) + +/* vd[0] = maxu(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb) +GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh) +GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl) +GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq) + +/* vd[0] = max(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb) +GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh) +GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl) +GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq) + +/* vd[0] = minu(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb) +GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh) +GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl) +GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq) + +/* vd[0] = min(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb) +GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh) +GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl) +GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq) + +/* vd[0] = and(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb) +GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh) +GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl) +GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq) + +/* vd[0] = or(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb) +GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh) +GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl) +GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq) + +/* vd[0] = xor(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb) +GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh) +GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl) +GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq) + +/* Vector Widening Integer Reduction Instructions */ +/* signed sum reduction into double-width accumulator */ +GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh) +GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl) +GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq) + +/* Unsigned sum reduction into double-width accumulator */ +GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh) +GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl) +GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq) + +/* Vector Single-Width Floating-Point Reduction Instructions */ +#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ + TD s1 = *((TD *)vs1 + HD(0)); \ + \ + for (i = 0; i < vl; i++) { \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + s1 = OP(s1, (TD)s2, &env->fp_status); \ + } \ + *((TD *)vd + HD(0)) = s1; \ + CLEAR_FN(vd, 1, sizeof(TD), tot); \ +} + +/* Unordered sum */ +GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh) +GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl) +GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq) + +/* Maximum value */ +GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh) +GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl) +GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq) + +/* Minimum value */ +GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh) +GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl) +GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq) + +/* Vector Widening Floating-Point Reduction Instructions */ +/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ +void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, + void *vs2, CPURISCVState *env, uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; + uint32_t s1 = *((uint32_t *)vs1 + H4(0)); + + for (i = 0; i < vl; i++) { + uint16_t s2 = *((uint16_t *)vs2 + H2(i)); + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), + &env->fp_status); + } + *((uint32_t *)vd + H4(0)) = s1; + clearl(vd, 1, sizeof(uint32_t), tot); +} + +void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, + void *vs2, CPURISCVState *env, uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; + uint64_t s1 = *((uint64_t *)vs1); + + for (i = 0; i < vl; i++) { + uint32_t s2 = *((uint32_t *)vs2 + H4(i)); + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), + &env->fp_status); + } + *((uint64_t *)vd) = s1; + clearq(vd, 1, sizeof(uint64_t), tot); +} + +/* + *** Vector Mask Operations + */ +/* Vector Mask-Register Logical Instructions */ +#define GEN_VEXT_MASK_VV(NAME, OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vl = env->vl; \ + uint32_t i; \ + int a, b; \ + \ + for (i = 0; i < vl; i++) { \ + a = vext_elem_mask(vs1, mlen, i); \ + b = vext_elem_mask(vs2, mlen, i); \ + vext_set_elem_mask(vd, mlen, i, OP(b, a)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +#define DO_NAND(N, M) (!(N & M)) +#define DO_ANDNOT(N, M) (N & !M) +#define DO_NOR(N, M) (!(N | M)) +#define DO_ORNOT(N, M) (N | !M) +#define DO_XNOR(N, M) (!(N ^ M)) + +GEN_VEXT_MASK_VV(vmand_mm, DO_AND) +GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) +GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) +GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) +GEN_VEXT_MASK_VV(vmor_mm, DO_OR) +GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) +GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) +GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) + +/* Vector mask population count vmpopc */ +target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + target_ulong cnt = 0; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + + for (i = 0; i < vl; i++) { + if (vm || vext_elem_mask(v0, mlen, i)) { + if (vext_elem_mask(vs2, mlen, i)) { + cnt++; + } + } + } + return cnt; +} + +/* vmfirst find-first-set mask bit*/ +target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + + for (i = 0; i < vl; i++) { + if (vm || vext_elem_mask(v0, mlen, i)) { + if (vext_elem_mask(vs2, mlen, i)) { + return i; + } + } + } + return -1LL; +} + +enum set_mask_type { + ONLY_FIRST = 1, + INCLUDE_FIRST, + BEFORE_FIRST, +}; + +static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc, enum set_mask_type type) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + bool first_mask_bit = false; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + /* write a zero to all following active elements */ + if (first_mask_bit) { + vext_set_elem_mask(vd, mlen, i, 0); + continue; + } + if (vext_elem_mask(vs2, mlen, i)) { + first_mask_bit = true; + if (type == BEFORE_FIRST) { + vext_set_elem_mask(vd, mlen, i, 0); + } else { + vext_set_elem_mask(vd, mlen, i, 1); + } + } else { + if (type == ONLY_FIRST) { + vext_set_elem_mask(vd, mlen, i, 0); + } else { + vext_set_elem_mask(vd, mlen, i, 1); + } + } + } + for (; i < vlmax; i++) { + vext_set_elem_mask(vd, mlen, i, 0); + } +} + +void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); +} + +void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); +} + +void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); +} + +/* Vector Iota Instruction */ +#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t sum = 0; \ + int i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = sum; \ + if (vext_elem_mask(vs2, mlen, i)) { \ + sum++; \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb) +GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh) +GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl) +GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq) + +/* Vector Element Index Instruction */ +#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + int i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = i; \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb) +GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh) +GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl) +GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) + +/* + *** Vector Permutation Instructions + */ + +/* Vector Slide Instructions */ +#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + target_ulong offset = s1, i; \ + \ + for (i = offset; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + target_ulong offset = s1, i; \ + \ + for (i = 0; i < vl; ++i) { \ + target_ulong j = i + offset; \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (i == 0) { \ + *((ETYPE *)vd + H(i)) = s1; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (i == vl - 1) { \ + *((ETYPE *)vd + H(i)) = s1; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq) + +/* Vector Register Gather Instruction */ +#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t index, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + index = *((ETYPE *)vs1 + H(i)); \ + if (index >= vlmax) { \ + *((ETYPE *)vd + H(i)) = 0; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ +GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb) +GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh) +GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl) +GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t index = s1, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (index >= vlmax) { \ + *((ETYPE *)vd + H(i)) = 0; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ +GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq) + +/* Vector Compress Instruction */ +#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vl = env->vl; \ + uint32_t num = 0, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vext_elem_mask(vs1, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ + num++; \ + } \ + CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* Compress into vd elements of vs2 where vs1 is enabled */ +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq) diff --git a/qemu/target/s390x/cpu_features_def.inc.h b/qemu/target/s390x/cpu_features_def.inc.h index 31dff0d84e..5942f81f16 100644 --- a/qemu/target/s390x/cpu_features_def.inc.h +++ b/qemu/target/s390x/cpu_features_def.inc.h @@ -107,6 +107,7 @@ DEF_FEAT(DEFLATE_BASE, "deflate-base", STFL, 151, "Deflate-conversion facility ( DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal-Enhancement Facility") DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)") DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility") +DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility") /* Features exposed via SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility") @@ -310,7 +311,7 @@ DEF_FEAT(PCC_CMAC_ETDEA_192, "pcc-cmac-etdea-128", PCC, 10, "PCC Compute-Last-Bl DEF_FEAT(PCC_CMAC_TDEA, "pcc-cmac-etdea-192", PCC, 11, "PCC Compute-Last-Block-CMAC-Using-EncryptedTDEA-192") DEF_FEAT(PCC_CMAC_AES_128, "pcc-cmac-aes-128", PCC, 18, "PCC Compute-Last-Block-CMAC-Using-AES-128") DEF_FEAT(PCC_CMAC_AES_192, "pcc-cmac-aes-192", PCC, 19, "PCC Compute-Last-Block-CMAC-Using-AES-192") -DEF_FEAT(PCC_CMAC_AES_256, "pcc-cmac-eaes-256", PCC, 20, "PCC Compute-Last-Block-CMAC-Using-AES-256") +DEF_FEAT(PCC_CMAC_AES_256, "pcc-cmac-aes-256", PCC, 20, "PCC Compute-Last-Block-CMAC-Using-AES-256") DEF_FEAT(PCC_CMAC_EAES_128, "pcc-cmac-eaes-128", PCC, 26, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-128") DEF_FEAT(PCC_CMAC_EAES_192, "pcc-cmac-eaes-192", PCC, 27, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-192") DEF_FEAT(PCC_CMAC_EAES_256, "pcc-cmac-eaes-256", PCC, 28, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-256") diff --git a/qemu/target/s390x/fpu_helper.c b/qemu/target/s390x/fpu_helper.c index 0fc39d7138..241260f605 100644 --- a/qemu/target/s390x/fpu_helper.c +++ b/qemu/target/s390x/fpu_helper.c @@ -112,7 +112,7 @@ static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr) } } -int float_comp_to_cc(CPUS390XState *env, int float_compare) +int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare) { switch (float_compare) { case float_relation_equal: @@ -368,7 +368,7 @@ uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al, /* 32-bit FP compare */ uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { - int cmp = float32_compare_quiet(f1, f2, &env->fpu_status); + FloatRelation cmp = float32_compare_quiet(f1, f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -376,7 +376,7 @@ uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2) /* 64-bit FP compare */ uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { - int cmp = float64_compare_quiet(f1, f2, &env->fpu_status); + FloatRelation cmp = float64_compare_quiet(f1, f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -385,9 +385,9 @@ uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl) { - int cmp = float128_compare_quiet(make_float128(ah, al), - make_float128(bh, bl), - &env->fpu_status); + FloatRelation cmp = float128_compare_quiet(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -675,7 +675,7 @@ uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, /* 32-bit FP compare and signal */ uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { - int cmp = float32_compare(f1, f2, &env->fpu_status); + FloatRelation cmp = float32_compare(f1, f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -683,7 +683,7 @@ uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2) /* 64-bit FP compare and signal */ uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { - int cmp = float64_compare(f1, f2, &env->fpu_status); + FloatRelation cmp = float64_compare(f1, f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -692,9 +692,9 @@ uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl) { - int cmp = float128_compare(make_float128(ah, al), - make_float128(bh, bl), - &env->fpu_status); + FloatRelation cmp = float128_compare(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } diff --git a/qemu/target/s390x/gen-features.c b/qemu/target/s390x/gen-features.c index 6278845b12..8ddeebc544 100644 --- a/qemu/target/s390x/gen-features.c +++ b/qemu/target/s390x/gen-features.c @@ -562,6 +562,7 @@ static uint16_t full_GEN15_GA1[] = { S390_FEAT_GROUP_MSA_EXT_9, S390_FEAT_GROUP_MSA_EXT_9_PCKMO, S390_FEAT_ETOKEN, + S390_FEAT_UNPACK, }; /* Default features (in order of release) diff --git a/qemu/target/s390x/helper.h b/qemu/target/s390x/helper.h index abd8dd2a97..ddcce6de88 100644 --- a/qemu/target/s390x/helper.h +++ b/qemu/target/s390x/helper.h @@ -202,10 +202,6 @@ DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32) DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32) -DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) -DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) -DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) -DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) diff --git a/qemu/target/s390x/insn-data.def b/qemu/target/s390x/insn-data.def index 1660c4d1f8..5ff795fa13 100644 --- a/qemu/target/s390x/insn-data.def +++ b/qemu/target/s390x/insn-data.def @@ -798,7 +798,7 @@ /* SQUARE ROOT */ F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP) F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP) - F(0xb316, SQXBR, RRE, Z, x2h, x2l, new, x1, sqxb, 0, IF_BFP) + F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_P, x1, sqxb, 0, IF_BFP) F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP) F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP) @@ -1147,8 +1147,8 @@ /* VECTOR POPULATION COUNT */ F(0xe750, VPOPCT, VRR_a, V, 0, 0, 0, 0, vpopct, 0, IF_VEC) /* VECTOR ELEMENT ROTATE LEFT LOGICAL */ - F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, verllv, 0, IF_VEC) - F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, verll, 0, IF_VEC) + F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC) + F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC) /* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */ F(0xe772, VERIM, VRI_d, V, 0, 0, 0, 0, verim, 0, IF_VEC) /* VECTOR ELEMENT SHIFT LEFT */ diff --git a/qemu/target/s390x/internal.h b/qemu/target/s390x/internal.h index 82cf8726be..cec0957fb4 100644 --- a/qemu/target/s390x/internal.h +++ b/qemu/target/s390x/internal.h @@ -11,6 +11,7 @@ #define S390X_INTERNAL_H #include "cpu.h" +#include "fpu/softfloat.h" #ifndef CONFIG_USER_ONLY QEMU_PACK(typedef struct LowCore { @@ -268,7 +269,7 @@ uint32_t set_cc_nz_f128(float128 v); uint8_t s390_softfloat_exc_to_ieee(unsigned int exc); int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3); void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode); -int float_comp_to_cc(CPUS390XState *env, int float_compare); +int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare); uint16_t float32_dcmask(CPUS390XState *env, float32 f1); uint16_t float64_dcmask(CPUS390XState *env, float64 f1); uint16_t float128_dcmask(CPUS390XState *env, float128 f1); diff --git a/qemu/target/s390x/translate.c b/qemu/target/s390x/translate.c index e41a3b73b0..dec5f4139c 100644 --- a/qemu/target/s390x/translate.c +++ b/qemu/target/s390x/translate.c @@ -3936,8 +3936,7 @@ static DisasJumpType op_risbg(DisasContext *s, DisasOps *o) pmask = 0x00000000ffffffffull; break; default: - // g_assert_not_reached(); - break; + g_assert_not_reached(); } /* MASK is the set of bits to be inserted from R2. diff --git a/qemu/target/s390x/translate_vx.inc.c b/qemu/target/s390x/translate_vx.inc.c index 568b6a2acb..bdf0aecf34 100644 --- a/qemu/target/s390x/translate_vx.inc.c +++ b/qemu/target/s390x/translate_vx.inc.c @@ -233,8 +233,8 @@ static void get_vec_element_ptr_i64(TCGContext *tcg_ctx, TCGv_ptr ptr, uint8_t r #define gen_gvec_mov(tcg_ctx, v1, v2) \ tcg_gen_gvec_mov(tcg_ctx, 0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \ 16) -#define gen_gvec_dup64i(tcg_ctx, v1, c) \ - tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(v1), 16, 16, c) +#define gen_gvec_dup_imm(tcg_ctx, es, v1, c) \ + tcg_gen_gvec_dup_imm(tcg_ctx, es, vec_full_reg_offset(v1), 16, 16, c); #define gen_gvec_fn_2(tcg_ctx, fn, es, v1, v2) \ tcg_gen_gvec_##fn(tcg_ctx, es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ 16, 16) @@ -318,31 +318,6 @@ static void gen_gvec128_4_i64(TCGContext *tcg_ctx, gen_gvec128_4_i64_fn fn, uint tcg_temp_free_i64(tcg_ctx, cl); } -static void gen_gvec_dupi(TCGContext *tcg_ctx, uint8_t es, uint8_t reg, uint64_t c) -{ - switch (es) { - case ES_8: - tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c); - break; - case ES_16: - tcg_gen_gvec_dup16i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c); - break; - case ES_32: - tcg_gen_gvec_dup32i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c); - break; - case ES_64: - gen_gvec_dup64i(tcg_ctx, reg, c); - break; - default: - g_assert_not_reached(); - } -} - -static void zero_vec(TCGContext *tcg_ctx, uint8_t reg) -{ - tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, 0); -} - static void gen_addi2_i64(TCGContext *tcg_ctx, TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah, uint64_t b) { @@ -400,8 +375,8 @@ static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o) * Masks for both 64 bit elements of the vector are the same. * Trust tcg to produce a good constant loading. */ - gen_gvec_dup64i(tcg_ctx, get_field(s, v1), - generate_byte_mask(i2 & 0xff)); + gen_gvec_dup_imm(tcg_ctx, ES_64, get_field(s, v1), + generate_byte_mask(i2 & 0xff)); } else { TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); @@ -437,7 +412,7 @@ static DisasJumpType op_vgm(DisasContext *s, DisasOps *o) } } - gen_gvec_dupi(tcg_ctx, es, get_field(s, v1), mask); + gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), mask); return DISAS_NEXT; } @@ -598,7 +573,7 @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o) t = tcg_temp_new_i64(tcg_ctx); tcg_gen_qemu_ld_i64(tcg_ctx, t, o->addr1, get_mem_index(s), MO_TE | es); - zero_vec(tcg_ctx, get_field(s, v1)); + gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), 0); write_vec_element_i64(tcg_ctx, t, get_field(s, v1), enr, es); tcg_temp_free_i64(tcg_ctx, t); return DISAS_NEXT; @@ -917,7 +892,7 @@ static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o) return DISAS_NORETURN; } - gen_gvec_dupi(tcg_ctx, es, get_field(s, v1), data); + gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), data); return DISAS_NEXT; } @@ -1414,7 +1389,7 @@ static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o) read_vec_element_i32(tcg_ctx, tmp, get_field(s, v2), i, ES_32); tcg_gen_add2_i32(tcg_ctx, tmp, sum, sum, sum, tmp, tmp); } - zero_vec(tcg_ctx, get_field(s, v1)); + gen_gvec_dup_imm(tcg_ctx, ES_32, get_field(s, v1), 0); write_vec_element_i32(tcg_ctx, sum, get_field(s, v1), 1, ES_32); tcg_temp_free_i32(tcg_ctx, tmp); @@ -1910,65 +1885,6 @@ static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o) return DISAS_NEXT; } -static void gen_rll_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx); - - tcg_gen_andi_i32(tcg_ctx, t0, b, 31); - tcg_gen_rotl_i32(tcg_ctx, d, a, t0); - tcg_temp_free_i32(tcg_ctx, t0); -} - -static void gen_rll_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t0 = tcg_temp_new_i64(tcg_ctx); - - tcg_gen_andi_i64(tcg_ctx, t0, b, 63); - tcg_gen_rotl_i64(tcg_ctx, d, a, t0); - tcg_temp_free_i64(tcg_ctx, t0); -} - -static DisasJumpType op_verllv(DisasContext *s, DisasOps *o) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - const uint8_t es = get_field(s, m4); - static const GVecGen3 g[4] = { - { .fno = gen_helper_gvec_verllv8, }, - { .fno = gen_helper_gvec_verllv16, }, - { .fni4 = gen_rll_i32, }, - { .fni8 = gen_rll_i64, }, - }; - - if (es > ES_64) { - gen_program_exception(s, PGM_SPECIFICATION); - return DISAS_NORETURN; - } - - gen_gvec_3(tcg_ctx, get_field(s, v1), get_field(s, v2), - get_field(s, v3), &g[es]); - return DISAS_NEXT; -} - -static DisasJumpType op_verll(DisasContext *s, DisasOps *o) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - const uint8_t es = get_field(s, m4); - static const GVecGen2s g[4] = { - { .fno = gen_helper_gvec_verll8, }, - { .fno = gen_helper_gvec_verll16, }, - { .fni4 = gen_rll_i32, }, - { .fni8 = gen_rll_i64, }, - }; - - if (es > ES_64) { - gen_program_exception(s, PGM_SPECIFICATION); - return DISAS_NORETURN; - } - gen_gvec_2s(tcg_ctx, get_field(s, v1), get_field(s, v3), o->addr1, - &g[es]); - return DISAS_NEXT; -} - static void gen_rim_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c) { TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); @@ -2035,6 +1951,9 @@ static DisasJumpType op_vesv(DisasContext *s, DisasOps *o) case 0x70: gen_gvec_fn_3(tcg_ctx, shlv, es, v1, v2, v3); break; + case 0x73: + gen_gvec_fn_3(tcg_ctx, rotlv, es, v1, v2, v3); + break; case 0x7a: gen_gvec_fn_3(tcg_ctx, sarv, es, v1, v2, v3); break; @@ -2067,6 +1986,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o) case 0x30: gen_gvec_fn_2i(tcg_ctx, shli, es, v1, v3, d2); break; + case 0x33: + gen_gvec_fn_2i(tcg_ctx, rotli, es, v1, v3, d2); + break; case 0x3a: gen_gvec_fn_2i(tcg_ctx, sari, es, v1, v3, d2); break; @@ -2084,6 +2006,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o) case 0x30: gen_gvec_fn_2s(tcg_ctx, shls, es, v1, v3, shift); break; + case 0x33: + gen_gvec_fn_2s(tcg_ctx, rotls, es, v1, v3, shift); + break; case 0x3a: gen_gvec_fn_2s(tcg_ctx, sars, es, v1, v3, shift); break; diff --git a/qemu/target/s390x/vec_fpu_helper.c b/qemu/target/s390x/vec_fpu_helper.c index e87ef56f04..09cb61fbeb 100644 --- a/qemu/target/s390x/vec_fpu_helper.c +++ b/qemu/target/s390x/vec_fpu_helper.c @@ -174,7 +174,7 @@ void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env, env->cc_op = wfc64(v1, v2, env, true, GETPC()); } -typedef int (*vfc64_fn)(float64 a, float64 b, float_status *status); +typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) { diff --git a/qemu/target/s390x/vec_int_helper.c b/qemu/target/s390x/vec_int_helper.c index b81441395c..a4e486a8b8 100644 --- a/qemu/target/s390x/vec_int_helper.c +++ b/qemu/target/s390x/vec_int_helper.c @@ -515,37 +515,6 @@ void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ DEF_VPOPCT(8) DEF_VPOPCT(16) -#define DEF_VERLLV(BITS) \ -void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \ - uint32_t desc) \ -{ \ - int i; \ - \ - for (i = 0; i < (128 / BITS); i++) { \ - const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ - const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ - \ - s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \ - } \ -} -DEF_VERLLV(8) -DEF_VERLLV(16) - -#define DEF_VERLL(BITS) \ -void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \ - uint32_t desc) \ -{ \ - int i; \ - \ - for (i = 0; i < (128 / BITS); i++) { \ - const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ - \ - s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \ - } \ -} -DEF_VERLL(8) -DEF_VERLL(16) - #define DEF_VERIM(BITS) \ void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ uint32_t desc) \ diff --git a/qemu/target/sparc/fop_helper.c b/qemu/target/sparc/fop_helper.c index 9eb9b75718..e6dd3fc313 100644 --- a/qemu/target/sparc/fop_helper.c +++ b/qemu/target/sparc/fop_helper.c @@ -264,7 +264,7 @@ void helper_fsqrtq(CPUSPARCState *env) #define GEN_FCMP(name, size, reg1, reg2, FS, E) \ target_ulong glue(helper_, name) (CPUSPARCState *env) \ { \ - int ret; \ + FloatRelation ret; \ target_ulong fsr; \ if (E) { \ ret = glue(size, _compare)(reg1, reg2, &env->fp_status); \ @@ -295,7 +295,7 @@ void helper_fsqrtq(CPUSPARCState *env) #define GEN_FCMP_T(name, size, FS, E) \ target_ulong glue(helper_, name)(CPUSPARCState *env, size src1, size src2)\ { \ - int ret; \ + FloatRelation ret; \ target_ulong fsr; \ if (E) { \ ret = glue(size, _compare)(src1, src2, &env->fp_status); \ diff --git a/qemu/target/tricore/translate.c b/qemu/target/tricore/translate.c index 75188b8be6..d8d9584787 100644 --- a/qemu/target/tricore/translate.c +++ b/qemu/target/tricore/translate.c @@ -52,7 +52,6 @@ static const char *regnames_d[] = { typedef struct DisasContext { DisasContextBase base; - CPUTriCoreState *env; target_ulong pc; // CCOp cc_op; /* Current CC operation */ target_ulong pc_succ_insn; diff --git a/qemu/tcg/README b/qemu/tcg/README index bfa2e4ed24..a64f67809b 100644 --- a/qemu/tcg/README +++ b/qemu/tcg/README @@ -605,10 +605,11 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. * shri_vec v0, v1, i2 * sari_vec v0, v1, i2 +* rotli_vec v0, v1, i2 * shrs_vec v0, v1, s2 * sars_vec v0, v1, s2 - Similarly for logical and arithmetic right shift. + Similarly for logical and arithmetic right shift, and left rotate. * shlv_vec v0, v1, v2 @@ -620,8 +621,10 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. * shrv_vec v0, v1, v2 * sarv_vec v0, v1, v2 +* rotlv_vec v0, v1, v2 +* rotrv_vec v0, v1, v2 - Similarly for logical and arithmetic right shift. + Similarly for logical and arithmetic right shift, and rotates. * cmp_vec v0, v1, v2, cond diff --git a/qemu/tcg/aarch64/tcg-target.h b/qemu/tcg/aarch64/tcg-target.h index 13993a70e5..e7673bb032 100644 --- a/qemu/tcg/aarch64/tcg-target.h +++ b/qemu/tcg/aarch64/tcg-target.h @@ -137,6 +137,9 @@ typedef enum { #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec 1 #define TCG_TARGET_HAS_abs_vec 1 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 0 #define TCG_TARGET_HAS_shi_vec 1 #define TCG_TARGET_HAS_shs_vec 0 #define TCG_TARGET_HAS_shv_vec 1 diff --git a/qemu/tcg/aarch64/tcg-target.inc.c b/qemu/tcg/aarch64/tcg-target.inc.c index 50c9e595bb..c1f5483651 100644 --- a/qemu/tcg/aarch64/tcg-target.inc.c +++ b/qemu/tcg/aarch64/tcg-target.inc.c @@ -557,6 +557,7 @@ typedef enum { I3614_SSHR = 0x0f000400, I3614_SSRA = 0x0f001400, I3614_SHL = 0x0f005400, + I3614_SLI = 0x2f005400, I3614_USHR = 0x2f000400, I3614_USRA = 0x2f001400, @@ -1504,11 +1505,21 @@ static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, static inline void tcg_out_mb(TCGContext *s, TCGArg a0) { static const uint32_t sync[] = { - [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, - [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, - [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, - [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, - [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, + [0] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, + [TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_LD_LD | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, + [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, + [TCG_MO_LD_ST | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_LD_ST | TCG_MO_LD_LD | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_ST_LD | TCG_MO_LD_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_LD_ST] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_LD_ST | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, }; tcg_out32(s, sync[a0 & TCG_MO_ALL]); } @@ -1659,9 +1670,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, tcg_insn_unit **label_ptr, int mem_index, bool is_read) { -#ifdef TARGET_ARM - struct uc_struct *uc = s->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = s->uc; unsigned a_bits = get_alignment_bits(opc); unsigned s_bits = opc & MO_SIZE; unsigned a_mask = (1u << a_bits) - 1; @@ -2422,6 +2431,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sari_vec: tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); break; + case INDEX_op_aa64_sli_vec: + tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); + break; case INDEX_op_shlv_vec: tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); break; @@ -2509,8 +2521,11 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign case INDEX_op_shlv_vec: case INDEX_op_bitsel_vec: return 1; + case INDEX_op_rotli_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: return -1; case INDEX_op_mul_vec: case INDEX_op_smax_vec: @@ -2528,14 +2543,24 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne TCGArg a0, ...) { va_list va; - TCGv_vec v0, v1, v2, t1; + TCGv_vec v0, v1, v2, t1, t2; + TCGArg a2; va_start(va, a0); v0 = temp_tcgv_vec(tcg_ctx, arg_temp(a0)); v1 = temp_tcgv_vec(tcg_ctx, arg_temp(va_arg(va, TCGArg))); - v2 = temp_tcgv_vec(tcg_ctx, arg_temp(va_arg(va, TCGArg))); + a2 = va_arg(va, TCGArg); + v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); switch (opc) { + case INDEX_op_rotli_vec: + t1 = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_shri_vec(tcg_ctx, vece, t1, v1, -a2 & ((8 << vece) - 1)); + vec_gen_4(tcg_ctx, INDEX_op_aa64_sli_vec, type, vece, + tcgv_vec_arg(tcg_ctx, v0), tcgv_vec_arg(tcg_ctx, t1), tcgv_vec_arg(tcg_ctx, v1), a2); + tcg_temp_free_vec(tcg_ctx, t1); + break; + case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: /* Right shifts are negative left shifts for AArch64. */ @@ -2548,6 +2573,35 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne tcg_temp_free_vec(tcg_ctx, t1); break; + case INDEX_op_rotlv_vec: + t1 = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_dupi_vec(tcg_ctx, vece, t1, 8 << vece); + tcg_gen_sub_vec(tcg_ctx, vece, t1, v2, t1); + /* Right shifts are negative left shifts for AArch64. */ + vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t1), + tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t1)); + vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, v0), + tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v2)); + tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t1); + tcg_temp_free_vec(tcg_ctx, t1); + break; + + case INDEX_op_rotrv_vec: + t1 = tcg_temp_new_vec(tcg_ctx, type); + t2 = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_neg_vec(tcg_ctx, vece, t1, v2); + tcg_gen_dupi_vec(tcg_ctx, vece, t2, 8 << vece); + tcg_gen_add_vec(tcg_ctx, vece, t2, t1, t2); + /* Right shifts are negative left shifts for AArch64. */ + vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t1), + tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t1)); + vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t2), + tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t2)); + tcg_gen_or_vec(tcg_ctx, vece, v0, t1, t2); + tcg_temp_free_vec(tcg_ctx, t1); + tcg_temp_free_vec(tcg_ctx, t2); + break; + default: g_assert_not_reached(); } @@ -2568,6 +2622,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } }; static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } }; static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } }; + static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } }; static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } }; static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } }; static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } }; @@ -2762,6 +2817,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) return &w_w_wZ; case INDEX_op_bitsel_vec: return &w_w_w_w; + case INDEX_op_aa64_sli_vec: + return &w_0_w; default: return NULL; diff --git a/qemu/tcg/aarch64/tcg-target.opc.h b/qemu/tcg/aarch64/tcg-target.opc.h index 26bfd9c460..bce30accd9 100644 --- a/qemu/tcg/aarch64/tcg-target.opc.h +++ b/qemu/tcg/aarch64/tcg-target.opc.h @@ -12,3 +12,4 @@ */ DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC) +DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC) diff --git a/qemu/tcg/arm/tcg-target.inc.c b/qemu/tcg/arm/tcg-target.inc.c index 467d063690..8884968fb3 100644 --- a/qemu/tcg/arm/tcg-target.inc.c +++ b/qemu/tcg/arm/tcg-target.inc.c @@ -1235,9 +1235,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, MemOp opc, int mem_index, bool is_load) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); int fast_off = TLB_MASK_TABLE_OFS(mem_index); diff --git a/qemu/tcg/i386/tcg-target.h b/qemu/tcg/i386/tcg-target.h index 24ba5d19be..8508e68e7c 100644 --- a/qemu/tcg/i386/tcg-target.h +++ b/qemu/tcg/i386/tcg-target.h @@ -183,6 +183,9 @@ extern bool have_avx2; #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_neg_vec 0 #define TCG_TARGET_HAS_abs_vec 1 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 0 #define TCG_TARGET_HAS_shi_vec 1 #define TCG_TARGET_HAS_shs_vec 1 #define TCG_TARGET_HAS_shv_vec have_avx2 diff --git a/qemu/tcg/i386/tcg-target.inc.c b/qemu/tcg/i386/tcg-target.inc.c index 15cc1c05d9..9cb46fe1be 100644 --- a/qemu/tcg/i386/tcg-target.inc.c +++ b/qemu/tcg/i386/tcg-target.inc.c @@ -1704,9 +1704,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, int mem_index, MemOp opc, tcg_insn_unit **label_ptr, int which) { -#ifdef TARGET_ARM - struct uc_struct *uc = s->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = s->uc; const TCGReg r0 = TCG_REG_L0; const TCGReg r1 = TCG_REG_L1; TCGType ttype = TCG_TYPE_I32; @@ -3195,6 +3193,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_shls_vec: case INDEX_op_shrs_vec: case INDEX_op_sars_vec: + case INDEX_op_rotls_vec: case INDEX_op_cmp_vec: case INDEX_op_x86_shufps_vec: case INDEX_op_x86_blend_vec: @@ -3233,6 +3232,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign case INDEX_op_xor_vec: case INDEX_op_andc_vec: return 1; + case INDEX_op_rotli_vec: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: return -1; @@ -3259,12 +3259,17 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign return vece >= MO_16; case INDEX_op_sars_vec: return vece >= MO_16 && vece <= MO_32; + case INDEX_op_rotls_vec: + return vece >= MO_16 ? -1 : 0; case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: return have_avx2 && vece >= MO_32; case INDEX_op_sarv_vec: return have_avx2 && vece == MO_32; + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: + return have_avx2 && vece >= MO_32 ? -1 : 0; case INDEX_op_mul_vec: if (vece == MO_8) { @@ -3293,7 +3298,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign } } -static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, bool shr, +static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCGOpcode opc, TCGv_vec v0, TCGv_vec v1, TCGArg imm) { TCGv_vec t1, t2; @@ -3303,26 +3308,31 @@ static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, boo t1 = tcg_temp_new_vec(tcg_ctx, type); t2 = tcg_temp_new_vec(tcg_ctx, type); - /* Unpack to W, shift, and repack. Tricky bits: - (1) Use punpck*bw x,x to produce DDCCBBAA, - i.e. duplicate in other half of the 16-bit lane. - (2) For right-shift, add 8 so that the high half of - the lane becomes zero. For left-shift, we must - shift up and down again. - (3) Step 2 leaves high half zero such that PACKUSWB - (pack with unsigned saturation) does not modify - the quantity. */ + /* + * Unpack to W, shift, and repack. Tricky bits: + * (1) Use punpck*bw x,x to produce DDCCBBAA, + * i.e. duplicate in other half of the 16-bit lane. + * (2) For right-shift, add 8 so that the high half of the lane + * becomes zero. For left-shift, and left-rotate, we must + * shift up and down again. + * (3) Step 2 leaves high half zero such that PACKUSWB + * (pack with unsigned saturation) does not modify + * the quantity. + */ vec_gen_3(tcg_ctx, INDEX_op_x86_punpckl_vec, type, MO_8, tcgv_vec_arg(tcg_ctx, t1), tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v1)); vec_gen_3(tcg_ctx, INDEX_op_x86_punpckh_vec, type, MO_8, tcgv_vec_arg(tcg_ctx, t2), tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v1)); - if (shr) { - tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, imm + 8); - tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, imm + 8); + if (opc != INDEX_op_rotli_vec) { + imm += 8; + } + if (opc == INDEX_op_shri_vec) { + tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, imm); + tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, imm); } else { - tcg_gen_shli_vec(tcg_ctx, MO_16, t1, t1, imm + 8); - tcg_gen_shli_vec(tcg_ctx, MO_16, t2, t2, imm + 8); + tcg_gen_shli_vec(tcg_ctx, MO_16, t1, t1, imm); + tcg_gen_shli_vec(tcg_ctx, MO_16, t2, t2, imm); tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, 8); tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, 8); } @@ -3389,6 +3399,61 @@ static void expand_vec_sari(TCGContext *tcg_ctx, TCGType type, unsigned vece, } } +static void expand_vec_rotli(TCGContext *tcg_ctx, TCGType type, unsigned vece, + TCGv_vec v0, TCGv_vec v1, TCGArg imm) +{ + TCGv_vec t; + + if (vece == MO_8) { + expand_vec_shi(tcg_ctx, type, vece, INDEX_op_rotli_vec, v0, v1, imm); + return; + } + + t = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_shli_vec(tcg_ctx, vece, t, v1, imm); + tcg_gen_shri_vec(tcg_ctx, vece, v0, v1, (8 << vece) - imm); + tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t); + tcg_temp_free_vec(tcg_ctx, t); +} + +static void expand_vec_rotls(TCGContext *tcg_ctx, TCGType type, unsigned vece, + TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh) +{ + TCGv_i32 rsh; + TCGv_vec t; + + tcg_debug_assert(vece != MO_8); + + t = tcg_temp_new_vec(tcg_ctx, type); + rsh = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_neg_i32(tcg_ctx, rsh, lsh); + tcg_gen_andi_i32(tcg_ctx, rsh, rsh, (8 << vece) - 1); + tcg_gen_shls_vec(tcg_ctx, vece, t, v1, lsh); + tcg_gen_shrs_vec(tcg_ctx, vece, v0, v1, rsh); + tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t); + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_i32(tcg_ctx, rsh); +} + +static void expand_vec_rotv(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec v1, TCGv_vec sh, bool right) +{ + TCGv_vec t = tcg_temp_new_vec(tcg_ctx, type); + + tcg_gen_dupi_vec(tcg_ctx, vece, t, 8 << vece); + tcg_gen_sub_vec(tcg_ctx, vece, t, t, sh); + if (right) { + tcg_gen_shlv_vec(tcg_ctx, vece, t, v1, t); + tcg_gen_shrv_vec(tcg_ctx, vece, v0, v1, sh); + } else { + tcg_gen_shrv_vec(tcg_ctx, vece, t, v1, t); + tcg_gen_shlv_vec(tcg_ctx, vece, v0, v1, sh); + } + tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t); + tcg_temp_free_vec(tcg_ctx, t); +} + static void expand_vec_mul(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) { @@ -3598,13 +3663,30 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne switch (opc) { case INDEX_op_shli_vec: case INDEX_op_shri_vec: - expand_vec_shi(tcg_ctx, type, vece, opc == INDEX_op_shri_vec, v0, v1, a2); + expand_vec_shi(tcg_ctx, type, vece, opc, v0, v1, a2); break; case INDEX_op_sari_vec: expand_vec_sari(tcg_ctx, type, vece, v0, v1, a2); break; + case INDEX_op_rotli_vec: + expand_vec_rotli(tcg_ctx, type, vece, v0, v1, a2); + break; + + case INDEX_op_rotls_vec: + expand_vec_rotls(tcg_ctx, type, vece, v0, v1, temp_tcgv_i32(tcg_ctx, arg_temp(a2))); + break; + + case INDEX_op_rotlv_vec: + v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); + expand_vec_rotv(tcg_ctx, type, vece, v0, v1, v2, false); + break; + case INDEX_op_rotrv_vec: + v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); + expand_vec_rotv(tcg_ctx, type, vece, v0, v1, v2, true); + break; + case INDEX_op_mul_vec: v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); expand_vec_mul(tcg_ctx, type, vece, v0, v1, v2); diff --git a/qemu/tcg/mips/tcg-target.inc.c b/qemu/tcg/mips/tcg-target.inc.c index ed5a9356c3..addf4c661d 100644 --- a/qemu/tcg/mips/tcg-target.inc.c +++ b/qemu/tcg/mips/tcg-target.inc.c @@ -1215,9 +1215,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, TCGReg addrh, TCGMemOpIdx oi, tcg_insn_unit *label_ptr[2], bool is_load) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif MemOp opc = get_memop(oi); unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); diff --git a/qemu/tcg/ppc/tcg-target.h b/qemu/tcg/ppc/tcg-target.h index 4fa21f0e71..be5b2901c3 100644 --- a/qemu/tcg/ppc/tcg-target.h +++ b/qemu/tcg/ppc/tcg-target.h @@ -161,6 +161,9 @@ extern bool have_vsx; #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec have_isa_3_00 #define TCG_TARGET_HAS_abs_vec 0 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 1 #define TCG_TARGET_HAS_shi_vec 0 #define TCG_TARGET_HAS_shs_vec 0 #define TCG_TARGET_HAS_shv_vec 1 diff --git a/qemu/tcg/ppc/tcg-target.inc.c b/qemu/tcg/ppc/tcg-target.inc.c index 00b7942901..a74e02c9d8 100644 --- a/qemu/tcg/ppc/tcg-target.inc.c +++ b/qemu/tcg/ppc/tcg-target.inc.c @@ -1885,9 +1885,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc, TCGReg addrlo, TCGReg addrhi, int mem_index, bool is_read) { -#ifdef TARGET_ARM - struct uc_struct *uc = s->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = s->uc; int cmp_off = (is_read ? offsetof(CPUTLBEntry, addr_read) @@ -2623,21 +2621,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_shl_i32: if (const_args[2]) { - tcg_out_shli32(s, args[0], args[1], args[2]); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_shli32(s, args[0], args[1], args[2] & 31); } else { tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); } break; case INDEX_op_shr_i32: if (const_args[2]) { - tcg_out_shri32(s, args[0], args[1], args[2]); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_shri32(s, args[0], args[1], args[2] & 31); } else { tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); } break; case INDEX_op_sar_i32: if (const_args[2]) { - tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31)); } else { tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); } @@ -2709,14 +2710,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_shl_i64: if (const_args[2]) { - tcg_out_shli64(s, args[0], args[1], args[2]); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_shli64(s, args[0], args[1], args[2] & 63); } else { tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); } break; case INDEX_op_shr_i64: if (const_args[2]) { - tcg_out_shri64(s, args[0], args[1], args[2]); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_shri64(s, args[0], args[1], args[2] & 63); } else { tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); } @@ -3008,6 +3011,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: + case INDEX_op_rotlv_vec: return vece <= MO_32 || have_isa_2_07; case INDEX_op_ssadd_vec: case INDEX_op_sssub_vec: @@ -3018,6 +3022,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: + case INDEX_op_rotli_vec: return vece <= MO_32 || have_isa_2_07 ? -1 : 0; case INDEX_op_neg_vec: return vece >= MO_32 && have_isa_3_00; @@ -3032,6 +3037,8 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign return 0; case INDEX_op_bitsel_vec: return have_vsx; + case INDEX_op_rotrv_vec: + return -1; default: return 0; } @@ -3314,7 +3321,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ppc_pkum_vec: insn = pkum_op[vece]; break; - case INDEX_op_ppc_rotl_vec: + case INDEX_op_rotlv_vec: insn = rotl_op[vece]; break; case INDEX_op_ppc_msum_vec: @@ -3422,7 +3429,7 @@ static void expand_vec_mul(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCG t3 = tcg_temp_new_vec(tcg_ctx, type); t4 = tcg_temp_new_vec(tcg_ctx, type); tcg_gen_dupi_vec(tcg_ctx, MO_8, t4, -16); - vec_gen_3(tcg_ctx, INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(tcg_ctx, t1), + vec_gen_3(tcg_ctx, INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(tcg_ctx, t1), tcgv_vec_arg(tcg_ctx, v2), tcgv_vec_arg(tcg_ctx, t4)); vec_gen_3(tcg_ctx, INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(tcg_ctx, t2), tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v2)); @@ -3447,7 +3454,7 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne TCGArg a0, ...) { va_list va; - TCGv_vec v0, v1, v2; + TCGv_vec v0, v1, v2, t0; TCGArg a2; va_start(va, a0); @@ -3465,6 +3472,9 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne case INDEX_op_sari_vec: expand_vec_shi(tcg_ctx, type, vece, v0, v1, a2, INDEX_op_sarv_vec); break; + case INDEX_op_rotli_vec: + expand_vec_shi(tcg_ctx, type, vece, v0, v1, a2, INDEX_op_rotlv_vec); + break; case INDEX_op_cmp_vec: v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); expand_vec_cmp(tcg_ctx, type, vece, v0, v1, v2, va_arg(va, TCGArg)); @@ -3473,6 +3483,13 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); expand_vec_mul(tcg_ctx, type, vece, v0, v1, v2); break; + case INDEX_op_rotlv_vec: + v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); + t0 = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_neg_vec(tcg_ctx, vece, t0, v2); + tcg_gen_rotlv_vec(tcg_ctx, vece, v0, v1, t0); + tcg_temp_free_vec(tcg_ctx, t0); + break; default: g_assert_not_reached(); } @@ -3677,12 +3694,13 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: case INDEX_op_ppc_mrgh_vec: case INDEX_op_ppc_mrgl_vec: case INDEX_op_ppc_muleu_vec: case INDEX_op_ppc_mulou_vec: case INDEX_op_ppc_pkum_vec: - case INDEX_op_ppc_rotl_vec: case INDEX_op_dup2_vec: return &v_v_v; case INDEX_op_not_vec: diff --git a/qemu/tcg/ppc/tcg-target.opc.h b/qemu/tcg/ppc/tcg-target.opc.h index 1373f77e82..db514403c3 100644 --- a/qemu/tcg/ppc/tcg-target.opc.h +++ b/qemu/tcg/ppc/tcg-target.opc.h @@ -30,4 +30,3 @@ DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC) DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC) DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC) DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC) -DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC) diff --git a/qemu/tcg/riscv/tcg-target.inc.c b/qemu/tcg/riscv/tcg-target.inc.c index 2a5d3347d3..3d34141092 100644 --- a/qemu/tcg/riscv/tcg-target.inc.c +++ b/qemu/tcg/riscv/tcg-target.inc.c @@ -502,10 +502,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, break; case R_RISCV_JAL: return reloc_jimm20(code_ptr, (tcg_insn_unit *)value); - break; case R_RISCV_CALL: return reloc_call(code_ptr, (tcg_insn_unit *)value); - break; default: tcg_abort(); } @@ -970,9 +968,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, TCGReg addrh, TCGMemOpIdx oi, tcg_insn_unit **label_ptr, bool is_load) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif MemOp opc = get_memop(oi); unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); diff --git a/qemu/tcg/s390/tcg-target.inc.c b/qemu/tcg/s390/tcg-target.inc.c index c8fa20046f..3d64a675eb 100644 --- a/qemu/tcg/s390/tcg-target.inc.c +++ b/qemu/tcg/s390/tcg-target.inc.c @@ -1547,9 +1547,7 @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, int mem_index, bool is_ld) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); unsigned s_mask = (1 << s_bits) - 1; diff --git a/qemu/tcg/sparc/tcg-target.inc.c b/qemu/tcg/sparc/tcg-target.inc.c index d4bc69d3b5..cf5533e8f4 100644 --- a/qemu/tcg/sparc/tcg-target.inc.c +++ b/qemu/tcg/sparc/tcg-target.inc.c @@ -1083,9 +1083,7 @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12)); static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, MemOp opc, int which) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif int fast_off = TLB_MASK_TABLE_OFS(mem_index); int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); int table_off = fast_off + offsetof(CPUTLBDescFast, table); diff --git a/qemu/tcg/tcg-op-gvec.c b/qemu/tcg/tcg-op-gvec.c index cab429c44a..13e4b287b8 100644 --- a/qemu/tcg/tcg-op-gvec.c +++ b/qemu/tcg/tcg-op-gvec.c @@ -325,11 +325,35 @@ void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint3 in units of LNSZ. This limits the expansion of inline code. */ static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz) { - if (oprsz % lnsz == 0) { - uint32_t lnct = oprsz / lnsz; - return lnct >= 1 && lnct <= MAX_UNROLL; + uint32_t q, r; + + if (oprsz < lnsz) { + return false; } - return false; + + q = oprsz / lnsz; + r = oprsz % lnsz; + tcg_debug_assert((r & 7) == 0); + + if (lnsz < 16) { + /* For sizes below 16, accept no remainder. */ + if (r != 0) { + return false; + } + } else { + /* + * Recall that ARM SVE allows vector sizes that are not a + * power of 2, but always a multiple of 16. The intent is + * that e.g. size == 80 would be expanded with 2x32 + 1x16. + * In addition, expand_clr needs to handle a multiple of 8. + * Thus we can handle the tail with one more operation per + * diminishing power of 2. + */ + q += ctpop32(r); + } + + return q <= MAX_UNROLL; + } static void expand_clr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t maxsz); @@ -404,22 +428,31 @@ static void gen_dup_i64(TCGContext *tcg_ctx, unsigned vece, TCGv_i64 out, TCGv_i static TCGType choose_vector_type(TCGContext *tcg_ctx, const TCGOpcode *list, unsigned vece, uint32_t size, bool prefer_i64) { - if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) { - /* - * Recall that ARM SVE allows vector sizes that are not a - * power of 2, but always a multiple of 16. The intent is - * that e.g. size == 80 would be expanded with 2x32 + 1x16. - * It is hard to imagine a case in which v256 is supported - * but v128 is not, but check anyway. - */ - if (tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V256, vece) - && (size % 32 == 0 - || tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece))) { - return TCG_TYPE_V256; - } - } - if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16) - && tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece)) { + /* + * Recall that ARM SVE allows vector sizes that are not a + * power of 2, but always a multiple of 16. The intent is + * that e.g. size == 80 would be expanded with 2x32 + 1x16. + * It is hard to imagine a case in which v256 is supported + * but v128 is not, but check anyway. + * In addition, expand_clr needs to handle a multiple of 8. + */ + if (TCG_TARGET_HAS_v256 && + check_size_impl(size, 32) && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V256, vece) && + (!(size & 16) || + (TCG_TARGET_HAS_v128 && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece))) && + (!(size & 8) || + (TCG_TARGET_HAS_v64 && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V64, vece)))) { + return TCG_TYPE_V256; + } + if (TCG_TARGET_HAS_v128 && + check_size_impl(size, 16) && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece) && + (!(size & 8) || + (TCG_TARGET_HAS_v64 && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V64, vece)))) { return TCG_TYPE_V128; } if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8) @@ -434,6 +467,18 @@ static void do_dup_store(TCGContext *tcg_ctx, TCGType type, uint32_t dofs, uint3 { uint32_t i = 0; + tcg_debug_assert(oprsz >= 8); + + /* + * This may be expand_clr for the tail of an operation, e.g. + * oprsz == 8 && maxsz == 64. The first 8 bytes of this store + * are misaligned wrt the maximum vector size, so do that first. + */ + if (dofs & 8) { + tcg_gen_stl_vec(tcg_ctx, t_vec, tcg_ctx->cpu_env, dofs + i, TCG_TYPE_V64); + i += 8; + } + switch (type) { case TCG_TYPE_V256: /* @@ -621,17 +666,22 @@ static void expand_clr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t maxsz) /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */ static void expand_2_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - void (*fni)(TCGContext *, TCGv_i32, TCGv_i32)) + bool load_dest, void (*fni)(TCGContext *, TCGv_i32, TCGv_i32)) { TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 t1 = tcg_temp_new_i32(tcg_ctx); uint32_t i; for (i = 0; i < oprsz; i += 4) { tcg_gen_ld_i32(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i); - fni(tcg_ctx, t0, t0); - tcg_gen_st_i32(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_i32(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); + } + fni(tcg_ctx, t1, t0); + tcg_gen_st_i32(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); } tcg_temp_free_i32(tcg_ctx, t0); + tcg_temp_free_i32(tcg_ctx, t1); } static void expand_2i_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -751,17 +801,22 @@ static void expand_4_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */ static void expand_2_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - void (*fni)(TCGContext *, TCGv_i64, TCGv_i64)) + bool load_dest, void (*fni)(TCGContext *, TCGv_i64, TCGv_i64)) { TCGv_i64 t0 = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 t1 = tcg_temp_new_i64(tcg_ctx); uint32_t i; for (i = 0; i < oprsz; i += 8) { tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i); - fni(tcg_ctx, t0, t0); - tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_i64(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); + } + fni(tcg_ctx, t1, t0); + tcg_gen_st_i64(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); } tcg_temp_free_i64(tcg_ctx, t0); + tcg_temp_free_i64(tcg_ctx, t1); } static void expand_2i_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -882,17 +937,23 @@ static void expand_4_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint /* Expand OPSZ bytes worth of two-operand operations using host vectors. */ static void expand_2_vec(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, + bool load_dest, void (*fni)(TCGContext *, unsigned, TCGv_vec, TCGv_vec)) { TCGv_vec t0 = tcg_temp_new_vec(tcg_ctx, type); + TCGv_vec t1 = tcg_temp_new_vec(tcg_ctx, type); uint32_t i; for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i); - fni(tcg_ctx, vece, t0, t0); - tcg_gen_st_vec(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_vec(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); + } + fni(tcg_ctx, vece, t1, t0); + tcg_gen_st_vec(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); } tcg_temp_free_vec(tcg_ctx, t0); + tcg_temp_free_vec(tcg_ctx, t1); } /* Expand OPSZ bytes worth of two-vector operands and an immediate operand @@ -1046,7 +1107,8 @@ void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_2_vec(tcg_ctx, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv); + expand_2_vec(tcg_ctx, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, + g->load_dest, g->fniv); if (some == oprsz) { break; } @@ -1056,17 +1118,19 @@ void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv); + expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, + g->load_dest, g->fniv); break; case TCG_TYPE_V64: - expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv); + expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, + g->load_dest, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_2_i64(tcg_ctx, dofs, aofs, oprsz, g->fni8); + expand_2_i64(tcg_ctx, dofs, aofs, oprsz, g->load_dest, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_2_i32(tcg_ctx, dofs, aofs, oprsz, g->fni4); + expand_2_i32(tcg_ctx, dofs, aofs, oprsz, g->load_dest, g->fni4); } else { assert(g->fno != NULL); tcg_gen_gvec_2_ool(tcg_ctx, dofs, aofs, oprsz, maxsz, g->data, g->fno); @@ -1543,32 +1607,11 @@ void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uin } } -void tcg_gen_gvec_dup64i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, uint64_t x) -{ - check_size_align(oprsz, maxsz, dofs); - do_dup(tcg_ctx, MO_64, dofs, oprsz, maxsz, NULL, NULL, x); -} - -void tcg_gen_gvec_dup32i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, uint32_t x) -{ - check_size_align(oprsz, maxsz, dofs); - do_dup(tcg_ctx, MO_32, dofs, oprsz, maxsz, NULL, NULL, x); -} - -void tcg_gen_gvec_dup16i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, uint16_t x) -{ - check_size_align(oprsz, maxsz, dofs); - do_dup(tcg_ctx, MO_16, dofs, oprsz, maxsz, NULL, NULL, x); -} - -void tcg_gen_gvec_dup8i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, uint8_t x) +void tcg_gen_gvec_dup_imm(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t oprsz, + uint32_t maxsz, uint64_t x) { check_size_align(oprsz, maxsz, dofs); - do_dup(tcg_ctx, MO_8, dofs, oprsz, maxsz, NULL, NULL, x); + do_dup(tcg_ctx, vece, dofs, oprsz, maxsz, NULL, NULL, x); } void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, @@ -2321,7 +2364,7 @@ void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_ }; if (aofs == bofs) { - tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, 0); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, 0); } else { tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g); } @@ -2338,7 +2381,7 @@ void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 }; if (aofs == bofs) { - tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, 0); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, 0); } else { tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g); } @@ -2355,7 +2398,7 @@ void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_ }; if (aofs == bofs) { - tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, -1); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, -1); } else { tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g); } @@ -2406,7 +2449,7 @@ void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_ }; if (aofs == bofs) { - tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, -1); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, -1); } else { tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g); } @@ -2654,6 +2697,74 @@ void tcg_gen_gvec_sari(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 } } +void tcg_gen_vec_rotl8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c) +{ + uint64_t mask = dup_const(MO_8, 0xff << c); + + tcg_gen_shli_i64(tcg_ctx, d, a, c); + tcg_gen_shri_i64(tcg_ctx, a, a, 8 - c); + tcg_gen_andi_i64(tcg_ctx, d, d, mask); + tcg_gen_andi_i64(tcg_ctx, a, a, ~mask); + tcg_gen_or_i64(tcg_ctx, d, d, a); +} + +void tcg_gen_vec_rotl16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c) +{ + uint64_t mask = dup_const(MO_16, 0xffff << c); + + tcg_gen_shli_i64(tcg_ctx, d, a, c); + tcg_gen_shri_i64(tcg_ctx, a, a, 16 - c); + tcg_gen_andi_i64(tcg_ctx, d, d, mask); + tcg_gen_andi_i64(tcg_ctx, a, a, ~mask); + tcg_gen_or_i64(tcg_ctx, d, d, a); +} + +void tcg_gen_gvec_rotli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen2i g[4] = { + { .fni8 = tcg_gen_vec_rotl8i_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl8i, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = tcg_gen_vec_rotl16i_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl16i, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotli_i32, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl32i, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotli_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl64i, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(shift >= 0 && shift < (8 << vece)); + if (shift == 0) { + tcg_gen_gvec_mov(tcg_ctx, vece, dofs, aofs, oprsz, maxsz); + } else { + tcg_gen_gvec_2i(tcg_ctx, dofs, aofs, oprsz, maxsz, shift, &g[vece]); + } +} + +void tcg_gen_gvec_rotri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(shift >= 0 && shift < (8 << vece)); + tcg_gen_gvec_rotli(tcg_ctx, vece, dofs, aofs, -shift & ((8 << vece) - 1), + oprsz, maxsz); +} + /* * Specialized generation vector shifts by a non-constant scalar. */ @@ -2868,6 +2979,28 @@ void tcg_gen_gvec_sars(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 do_gvec_shifts(tcg_ctx, vece, dofs, aofs, shift, oprsz, maxsz, &g); } +void tcg_gen_gvec_rotls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen2sh g = { + .fni4 = tcg_gen_rotl_i32, + .fni8 = tcg_gen_rotl_i64, + .fniv_s = tcg_gen_rotls_vec, + .fniv_v = tcg_gen_rotlv_vec, + .fno = { + gen_helper_gvec_rotl8i, + gen_helper_gvec_rotl16i, + gen_helper_gvec_rotl32i, + gen_helper_gvec_rotl64i, + }, + .s_list = { INDEX_op_rotls_vec, 0 }, + .v_list = { INDEX_op_rotlv_vec, 0 }, + }; + + tcg_debug_assert(vece <= MO_64); + do_gvec_shifts(tcg_ctx, vece, dofs, aofs, shift, oprsz, maxsz, &g); +} + /* * Expand D = A << (B % element bits) * @@ -3063,6 +3196,128 @@ void tcg_gen_gvec_sarv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } +/* + * Similarly for rotates. + */ + +static void tcg_gen_rotlv_mod_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + + tcg_gen_dupi_vec(tcg_ctx, vece, t, (8 << vece) - 1); + tcg_gen_and_vec(tcg_ctx, vece, t, t, b); + tcg_gen_rotlv_vec(tcg_ctx, vece, d, a, t); + tcg_temp_free_vec(tcg_ctx, t); +} + +static void tcg_gen_rotl_mod_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_andi_i32(tcg_ctx, t, b, 31); + tcg_gen_rotl_i32(tcg_ctx, d, a, t); + tcg_temp_free_i32(tcg_ctx, t); +} + +static void tcg_gen_rotl_mod_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_andi_i64(tcg_ctx, t, b, 63); + tcg_gen_rotl_i64(tcg_ctx, d, a, t); + tcg_temp_free_i64(tcg_ctx, t); +} + +void tcg_gen_gvec_rotlv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotl_mod_i32, + .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotl_mod_i64, + .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + +static void tcg_gen_rotrv_mod_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + + tcg_gen_dupi_vec(tcg_ctx, vece, t, (8 << vece) - 1); + tcg_gen_and_vec(tcg_ctx, vece, t, t, b); + tcg_gen_rotrv_vec(tcg_ctx, vece, d, a, t); + tcg_temp_free_vec(tcg_ctx, t); +} + +static void tcg_gen_rotr_mod_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_andi_i32(tcg_ctx, t, b, 31); + tcg_gen_rotr_i32(tcg_ctx, d, a, t); + tcg_temp_free_i32(tcg_ctx, t); +} + +static void tcg_gen_rotr_mod_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_andi_i64(tcg_ctx, t, b, 63); + tcg_gen_rotr_i64(tcg_ctx, d, a, t); + tcg_temp_free_i64(tcg_ctx, t); +} + +void tcg_gen_gvec_rotrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotr_mod_i32, + .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotr_mod_i64, + .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */ static void expand_cmp_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, TCGCond cond) diff --git a/qemu/tcg/tcg-op-vec.c b/qemu/tcg/tcg-op-vec.c index 99343962ac..02d3e22564 100644 --- a/qemu/tcg/tcg-op-vec.c +++ b/qemu/tcg/tcg-op-vec.c @@ -547,6 +547,18 @@ void tcg_gen_sari_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a do_shifti(tcg_ctx, INDEX_op_sari_vec, vece, r, a, i); } +void tcg_gen_rotli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) +{ + do_shifti(tcg_ctx, INDEX_op_rotli_vec, vece, r, a, i); +} + +void tcg_gen_rotri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) +{ + int bits = 8 << vece; + tcg_debug_assert(i >= 0 && i < bits); + do_shifti(tcg_ctx, INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1)); +} + void tcg_gen_cmp_vec(TCGContext *tcg_ctx, TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { @@ -647,7 +659,9 @@ static void do_minmax(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a TCGv_vec b, TCGOpcode opc, TCGCond cond) { if (!do_op3(tcg_ctx, vece, r, a, b, opc)) { + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); tcg_gen_cmpsel_vec(tcg_ctx, cond, vece, r, a, b, a, b); + tcg_swap_vecop_list(hold_list); } } @@ -686,8 +700,18 @@ void tcg_gen_sarv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_sarv_vec); } +void tcg_gen_rotlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_rotlv_vec); +} + +void tcg_gen_rotrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_rotrv_vec); +} + static void do_shifts(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, - TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v) + TCGv_i32 s, TCGOpcode opc) { TCGTemp *rt = tcgv_vec_temp(tcg_ctx, r); TCGTemp *at = tcgv_vec_temp(tcg_ctx, a); @@ -696,48 +720,41 @@ static void do_shifts(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a TCGArg ai = temp_arg(at); TCGArg si = temp_arg(st); TCGType type = rt->base_type; - const TCGOpcode *hold_list; int can; tcg_debug_assert(at->base_type >= type); - tcg_assert_listed_vecop(opc_s); - hold_list = tcg_swap_vecop_list(NULL); - can = tcg_can_emit_vec_op(tcg_ctx, opc_s, type, vece); + tcg_assert_listed_vecop(opc); + can = tcg_can_emit_vec_op(tcg_ctx, opc, type, vece); if (can > 0) { - vec_gen_3(tcg_ctx, opc_s, type, vece, ri, ai, si); + vec_gen_3(tcg_ctx, opc, type, vece, ri, ai, si); } else if (can < 0) { - tcg_expand_vec_op(tcg_ctx, opc_s, type, vece, ri, ai, si); + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); + tcg_expand_vec_op(tcg_ctx, opc, type, vece, ri, ai, si); + tcg_swap_vecop_list(hold_list); } else { - TCGv_vec vec_s = tcg_temp_new_vec(tcg_ctx, type); - - if (vece == MO_64) { - TCGv_i64 s64 = tcg_temp_new_i64(tcg_ctx); - tcg_gen_extu_i32_i64(tcg_ctx, s64, s); - tcg_gen_dup_i64_vec(tcg_ctx, MO_64, vec_s, s64); - tcg_temp_free_i64(tcg_ctx, s64); - } else { - tcg_gen_dup_i32_vec(tcg_ctx, vece, vec_s, s); - } - do_op3_nofail(tcg_ctx, vece, r, a, vec_s, opc_v); - tcg_temp_free_vec(tcg_ctx, vec_s); + g_assert_not_reached(); } - tcg_swap_vecop_list(hold_list); } void tcg_gen_shls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) { - do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec); + do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shls_vec); } void tcg_gen_shrs_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) { - do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec); + do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shrs_vec); } void tcg_gen_sars_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) { - do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec); + do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_sars_vec); +} + +void tcg_gen_rotls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s) +{ + do_shifts(tcg_ctx, vece, r, a, s, INDEX_op_rotls_vec); } void tcg_gen_bitsel_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, diff --git a/qemu/tcg/tcg-op.c b/qemu/tcg/tcg-op.c index 8a5865dfe8..d2d44666cb 100644 --- a/qemu/tcg/tcg-op.c +++ b/qemu/tcg/tcg-op.c @@ -538,9 +538,9 @@ void tcg_gen_rotl_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 } } -void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { - tcg_debug_assert(arg2 < 32); + tcg_debug_assert(arg2 >= 0 && arg2 < 32); /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(tcg_ctx, ret, arg1); @@ -578,9 +578,9 @@ void tcg_gen_rotr_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 } } -void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { - tcg_debug_assert(arg2 < 32); + tcg_debug_assert(arg2 >= 0 && arg2 < 32); /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(tcg_ctx, ret, arg1); @@ -2000,9 +2000,9 @@ void tcg_gen_rotl_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 #endif } -void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { - tcg_debug_assert(arg2 < 64); + tcg_debug_assert(arg2 >= 0 && arg2 < 64); /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(tcg_ctx, ret, arg1); @@ -2041,9 +2041,9 @@ void tcg_gen_rotr_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 #endif } -void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { - tcg_debug_assert(arg2 < 64); + tcg_debug_assert(arg2 >= 0 && arg2 < 64); /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(tcg_ctx, ret, arg1); diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index 1c9353032e..3d23487176 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -1411,6 +1411,13 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: return have_vec && TCG_TARGET_HAS_shv_vec; + case INDEX_op_rotli_vec: + return have_vec && TCG_TARGET_HAS_roti_vec; + case INDEX_op_rotls_vec: + return have_vec && TCG_TARGET_HAS_rots_vec; + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: + return have_vec && TCG_TARGET_HAS_rotv_vec; case INDEX_op_ssadd_vec: case INDEX_op_usadd_vec: case INDEX_op_sssub_vec: @@ -2779,34 +2786,68 @@ static bool liveness_pass_2(TCGContext *s) } /* Outputs become available. */ - for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); + if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { + arg_ts = arg_temp(op->args[0]); dir_ts = arg_ts->state_ptr; - if (!dir_ts) { - continue; + if (dir_ts) { + op->args[0] = temp_arg(dir_ts); + changes = true; + + /* The output is now live and modified. */ + arg_ts->state = 0; + + if (NEED_SYNC_ARG(0)) { + TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 + ? INDEX_op_st_i32 + : INDEX_op_st_i64); + TCGOp *sop = tcg_op_insert_after(s, op, sopc); + TCGTemp *out_ts = dir_ts; + + if (IS_DEAD_ARG(0)) { + out_ts = arg_temp(op->args[1]); + arg_ts->state = TS_DEAD; + tcg_op_remove(s, op); + } else { + arg_ts->state = TS_MEM; + } + + sop->args[0] = temp_arg(out_ts); + sop->args[1] = temp_arg(arg_ts->mem_base); + sop->args[2] = arg_ts->mem_offset; + } else { + tcg_debug_assert(!IS_DEAD_ARG(0)); + } } - op->args[i] = temp_arg(dir_ts); - changes = true; + } else { + for (i = 0; i < nb_oargs; i++) { + arg_ts = arg_temp(op->args[i]); + dir_ts = arg_ts->state_ptr; + if (!dir_ts) { + continue; + } + op->args[i] = temp_arg(dir_ts); + changes = true; - /* The output is now live and modified. */ - arg_ts->state = 0; + /* The output is now live and modified. */ + arg_ts->state = 0; - /* Sync outputs upon their last write. */ - if (NEED_SYNC_ARG(i)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_st_i32 - : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc); + /* Sync outputs upon their last write. */ + if (NEED_SYNC_ARG(i)) { + TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 + ? INDEX_op_st_i32 + : INDEX_op_st_i64); + TCGOp *sop = tcg_op_insert_after(s, op, sopc); - sop->args[0] = temp_arg(dir_ts); - sop->args[1] = temp_arg(arg_ts->mem_base); - sop->args[2] = arg_ts->mem_offset; + sop->args[0] = temp_arg(dir_ts); + sop->args[1] = temp_arg(arg_ts->mem_base); + sop->args[2] = arg_ts->mem_offset; - arg_ts->state = TS_MEM; - } - /* Drop outputs that are dead. */ - if (IS_DEAD_ARG(i)) { - arg_ts->state = TS_DEAD; + arg_ts->state = TS_MEM; + } + /* Drop outputs that are dead. */ + if (IS_DEAD_ARG(i)) { + arg_ts->state = TS_DEAD; + } } } } diff --git a/qemu/tricore.h b/qemu/tricore.h index e378868526..30cc3e9a50 100644 --- a/qemu/tricore.h +++ b/qemu/tricore.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_tricore #define tcg_gen_shr_i64 tcg_gen_shr_i64_tricore #define tcg_gen_st_i64 tcg_gen_st_i64_tricore +#define tcg_gen_add_i64 tcg_gen_add_i64_tricore +#define tcg_gen_sub_i64 tcg_gen_sub_i64_tricore #define tcg_gen_xor_i64 tcg_gen_xor_i64_tricore +#define tcg_gen_neg_i64 tcg_gen_neg_i64_tricore #define cpu_icount_to_ns cpu_icount_to_ns_tricore #define cpu_is_stopped cpu_is_stopped_tricore #define cpu_get_ticks cpu_get_ticks_tricore @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_tricore #define floatx80_mul floatx80_mul_tricore #define floatx80_div floatx80_div_tricore +#define floatx80_modrem floatx80_modrem_tricore +#define floatx80_mod floatx80_mod_tricore #define floatx80_rem floatx80_rem_tricore #define floatx80_sqrt floatx80_sqrt_tricore #define floatx80_eq floatx80_eq_tricore @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_tricore #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_tricore #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_tricore +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_tricore #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_tricore #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_tricore #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_tricore @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_tricore #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_tricore #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_tricore +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_tricore +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_tricore #define tcg_gen_gvec_sari tcg_gen_gvec_sari_tricore +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_tricore +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_tricore #define tcg_gen_gvec_shls tcg_gen_gvec_shls_tricore #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_tricore #define tcg_gen_gvec_sars tcg_gen_gvec_sars_tricore +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_tricore #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_tricore #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_tricore #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_tricore +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_tricore +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_tricore #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_tricore #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_tricore #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_tricore @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_tricore #define tcg_gen_shri_vec tcg_gen_shri_vec_tricore #define tcg_gen_sari_vec tcg_gen_sari_vec_tricore +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_tricore +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_tricore #define tcg_gen_cmp_vec tcg_gen_cmp_vec_tricore #define tcg_gen_add_vec tcg_gen_add_vec_tricore #define tcg_gen_sub_vec tcg_gen_sub_vec_tricore @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_tricore #define tcg_gen_shrv_vec tcg_gen_shrv_vec_tricore #define tcg_gen_sarv_vec tcg_gen_sarv_vec_tricore +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_tricore +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_tricore #define tcg_gen_shls_vec tcg_gen_shls_vec_tricore #define tcg_gen_shrs_vec tcg_gen_shrs_vec_tricore #define tcg_gen_sars_vec tcg_gen_sars_vec_tricore +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_tricore #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_tricore #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_tricore #define tb_htable_lookup tb_htable_lookup_tricore @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_tricore #define cpu_loop_exit_atomic cpu_loop_exit_atomic_tricore #define tlb_init tlb_init_tricore +#define tlb_destroy tlb_destroy_tricore #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_tricore #define tlb_flush tlb_flush_tricore #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_tricore @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_tricore #define get_page_addr_code_hostp get_page_addr_code_hostp_tricore #define get_page_addr_code get_page_addr_code_tricore +#define probe_access_flags probe_access_flags_tricore #define probe_access probe_access_tricore #define tlb_vaddr_to_host tlb_vaddr_to_host_tricore #define helper_ret_ldub_mmu helper_ret_ldub_mmu_tricore @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_tricore #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_tricore #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_tricore -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_tricore -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_tricore -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_tricore -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_tricore +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_tricore +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_tricore +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_tricore +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_tricore +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_tricore +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_tricore +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_tricore +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_tricore #define cpu_ldub_data_ra cpu_ldub_data_ra_tricore #define cpu_ldsb_data_ra cpu_ldsb_data_ra_tricore -#define cpu_lduw_data_ra cpu_lduw_data_ra_tricore -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_tricore -#define cpu_ldl_data_ra cpu_ldl_data_ra_tricore -#define cpu_ldq_data_ra cpu_ldq_data_ra_tricore +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_tricore +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_tricore +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_tricore +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_tricore +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_tricore +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_tricore +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_tricore +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_tricore #define cpu_ldub_data cpu_ldub_data_tricore #define cpu_ldsb_data cpu_ldsb_data_tricore -#define cpu_lduw_data cpu_lduw_data_tricore -#define cpu_ldsw_data cpu_ldsw_data_tricore -#define cpu_ldl_data cpu_ldl_data_tricore -#define cpu_ldq_data cpu_ldq_data_tricore +#define cpu_lduw_be_data cpu_lduw_be_data_tricore +#define cpu_lduw_le_data cpu_lduw_le_data_tricore +#define cpu_ldsw_be_data cpu_ldsw_be_data_tricore +#define cpu_ldsw_le_data cpu_ldsw_le_data_tricore +#define cpu_ldl_be_data cpu_ldl_be_data_tricore +#define cpu_ldl_le_data cpu_ldl_le_data_tricore +#define cpu_ldq_le_data cpu_ldq_le_data_tricore +#define cpu_ldq_be_data cpu_ldq_be_data_tricore #define helper_ret_stb_mmu helper_ret_stb_mmu_tricore #define helper_le_stw_mmu helper_le_stw_mmu_tricore #define helper_be_stw_mmu helper_be_stw_mmu_tricore @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_tricore #define helper_be_stq_mmu helper_be_stq_mmu_tricore #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_tricore -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_tricore -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_tricore -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_tricore +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_tricore +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_tricore +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_tricore +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_tricore +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_tricore +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_tricore #define cpu_stb_data_ra cpu_stb_data_ra_tricore -#define cpu_stw_data_ra cpu_stw_data_ra_tricore -#define cpu_stl_data_ra cpu_stl_data_ra_tricore -#define cpu_stq_data_ra cpu_stq_data_ra_tricore +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_tricore +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_tricore +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_tricore +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_tricore +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_tricore +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_tricore #define cpu_stb_data cpu_stb_data_tricore -#define cpu_stw_data cpu_stw_data_tricore -#define cpu_stl_data cpu_stl_data_tricore -#define cpu_stq_data cpu_stq_data_tricore +#define cpu_stw_be_data cpu_stw_be_data_tricore +#define cpu_stw_le_data cpu_stw_le_data_tricore +#define cpu_stl_be_data cpu_stl_be_data_tricore +#define cpu_stl_le_data cpu_stl_le_data_tricore +#define cpu_stq_be_data cpu_stq_be_data_tricore +#define cpu_stq_le_data cpu_stq_le_data_tricore #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_tricore #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_tricore #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_tricore @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_tricore #define cpu_ldl_code cpu_ldl_code_tricore #define cpu_ldq_code cpu_ldq_code_tricore +#define cpu_interrupt_handler cpu_interrupt_handler_tricore #define helper_div_i32 helper_div_i32_tricore #define helper_rem_i32 helper_rem_i32_tricore #define helper_divu_i32 helper_divu_i32_tricore @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_tricore #define helper_gvec_sar32i helper_gvec_sar32i_tricore #define helper_gvec_sar64i helper_gvec_sar64i_tricore +#define helper_gvec_rotl8i helper_gvec_rotl8i_tricore +#define helper_gvec_rotl16i helper_gvec_rotl16i_tricore +#define helper_gvec_rotl32i helper_gvec_rotl32i_tricore +#define helper_gvec_rotl64i helper_gvec_rotl64i_tricore #define helper_gvec_shl8v helper_gvec_shl8v_tricore #define helper_gvec_shl16v helper_gvec_shl16v_tricore #define helper_gvec_shl32v helper_gvec_shl32v_tricore @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_tricore #define helper_gvec_sar32v helper_gvec_sar32v_tricore #define helper_gvec_sar64v helper_gvec_sar64v_tricore +#define helper_gvec_rotl8v helper_gvec_rotl8v_tricore +#define helper_gvec_rotl16v helper_gvec_rotl16v_tricore +#define helper_gvec_rotl32v helper_gvec_rotl32v_tricore +#define helper_gvec_rotl64v helper_gvec_rotl64v_tricore +#define helper_gvec_rotr8v helper_gvec_rotr8v_tricore +#define helper_gvec_rotr16v helper_gvec_rotr16v_tricore +#define helper_gvec_rotr32v helper_gvec_rotr32v_tricore +#define helper_gvec_rotr64v helper_gvec_rotr64v_tricore #define helper_gvec_eq8 helper_gvec_eq8_tricore #define helper_gvec_ne8 helper_gvec_ne8_tricore #define helper_gvec_lt8 helper_gvec_lt8_tricore diff --git a/qemu/util/guest-random.c b/qemu/util/guest-random.c index 7c1fe7be4f..9a0f300ba4 100644 --- a/qemu/util/guest-random.c +++ b/qemu/util/guest-random.c @@ -78,4 +78,3 @@ void qemu_guest_random_seed_thread_part2(uint64_t seed) } #endif } - diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 0118257e9e..cec72b9667 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_x86_64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_x86_64 #define tcg_gen_st_i64 tcg_gen_st_i64_x86_64 +#define tcg_gen_add_i64 tcg_gen_add_i64_x86_64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_x86_64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_x86_64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_x86_64 #define cpu_icount_to_ns cpu_icount_to_ns_x86_64 #define cpu_is_stopped cpu_is_stopped_x86_64 #define cpu_get_ticks cpu_get_ticks_x86_64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_x86_64 #define floatx80_mul floatx80_mul_x86_64 #define floatx80_div floatx80_div_x86_64 +#define floatx80_modrem floatx80_modrem_x86_64 +#define floatx80_mod floatx80_mod_x86_64 #define floatx80_rem floatx80_rem_x86_64 #define floatx80_sqrt floatx80_sqrt_x86_64 #define floatx80_eq floatx80_eq_x86_64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_x86_64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_x86_64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_x86_64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_x86_64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_x86_64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_x86_64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_x86_64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_x86_64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_x86_64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_x86_64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_x86_64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_x86_64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_x86_64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_x86_64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_x86_64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_x86_64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_x86_64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_x86_64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_x86_64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_x86_64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_x86_64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_x86_64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_x86_64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_x86_64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_x86_64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_x86_64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_x86_64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_x86_64 #define tcg_gen_shri_vec tcg_gen_shri_vec_x86_64 #define tcg_gen_sari_vec tcg_gen_sari_vec_x86_64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_x86_64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_x86_64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_x86_64 #define tcg_gen_add_vec tcg_gen_add_vec_x86_64 #define tcg_gen_sub_vec tcg_gen_sub_vec_x86_64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_x86_64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_x86_64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_x86_64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_x86_64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_x86_64 #define tcg_gen_shls_vec tcg_gen_shls_vec_x86_64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_x86_64 #define tcg_gen_sars_vec tcg_gen_sars_vec_x86_64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_x86_64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_x86_64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_x86_64 #define tb_htable_lookup tb_htable_lookup_x86_64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_x86_64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_x86_64 #define tlb_init tlb_init_x86_64 +#define tlb_destroy tlb_destroy_x86_64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_x86_64 #define tlb_flush tlb_flush_x86_64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_x86_64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_x86_64 #define get_page_addr_code_hostp get_page_addr_code_hostp_x86_64 #define get_page_addr_code get_page_addr_code_x86_64 +#define probe_access_flags probe_access_flags_x86_64 #define probe_access probe_access_x86_64 #define tlb_vaddr_to_host tlb_vaddr_to_host_x86_64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_x86_64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_x86_64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_x86_64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_x86_64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_x86_64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_x86_64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_x86_64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_x86_64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_x86_64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_x86_64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_x86_64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_x86_64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_x86_64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_x86_64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_x86_64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_x86_64 #define cpu_ldub_data_ra cpu_ldub_data_ra_x86_64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_x86_64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_x86_64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_x86_64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_x86_64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_x86_64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_x86_64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_x86_64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_x86_64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_x86_64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_x86_64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_x86_64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_x86_64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_x86_64 #define cpu_ldub_data cpu_ldub_data_x86_64 #define cpu_ldsb_data cpu_ldsb_data_x86_64 -#define cpu_lduw_data cpu_lduw_data_x86_64 -#define cpu_ldsw_data cpu_ldsw_data_x86_64 -#define cpu_ldl_data cpu_ldl_data_x86_64 -#define cpu_ldq_data cpu_ldq_data_x86_64 +#define cpu_lduw_be_data cpu_lduw_be_data_x86_64 +#define cpu_lduw_le_data cpu_lduw_le_data_x86_64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_x86_64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_x86_64 +#define cpu_ldl_be_data cpu_ldl_be_data_x86_64 +#define cpu_ldl_le_data cpu_ldl_le_data_x86_64 +#define cpu_ldq_le_data cpu_ldq_le_data_x86_64 +#define cpu_ldq_be_data cpu_ldq_be_data_x86_64 #define helper_ret_stb_mmu helper_ret_stb_mmu_x86_64 #define helper_le_stw_mmu helper_le_stw_mmu_x86_64 #define helper_be_stw_mmu helper_be_stw_mmu_x86_64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_x86_64 #define helper_be_stq_mmu helper_be_stq_mmu_x86_64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_x86_64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_x86_64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_x86_64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_x86_64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_x86_64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_x86_64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_x86_64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_x86_64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_x86_64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_x86_64 #define cpu_stb_data_ra cpu_stb_data_ra_x86_64 -#define cpu_stw_data_ra cpu_stw_data_ra_x86_64 -#define cpu_stl_data_ra cpu_stl_data_ra_x86_64 -#define cpu_stq_data_ra cpu_stq_data_ra_x86_64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_x86_64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_x86_64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_x86_64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_x86_64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_x86_64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_x86_64 #define cpu_stb_data cpu_stb_data_x86_64 -#define cpu_stw_data cpu_stw_data_x86_64 -#define cpu_stl_data cpu_stl_data_x86_64 -#define cpu_stq_data cpu_stq_data_x86_64 +#define cpu_stw_be_data cpu_stw_be_data_x86_64 +#define cpu_stw_le_data cpu_stw_le_data_x86_64 +#define cpu_stl_be_data cpu_stl_be_data_x86_64 +#define cpu_stl_le_data cpu_stl_le_data_x86_64 +#define cpu_stq_be_data cpu_stq_be_data_x86_64 +#define cpu_stq_le_data cpu_stq_le_data_x86_64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_x86_64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_x86_64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_x86_64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_x86_64 #define cpu_ldl_code cpu_ldl_code_x86_64 #define cpu_ldq_code cpu_ldq_code_x86_64 +#define cpu_interrupt_handler cpu_interrupt_handler_x86_64 #define helper_div_i32 helper_div_i32_x86_64 #define helper_rem_i32 helper_rem_i32_x86_64 #define helper_divu_i32 helper_divu_i32_x86_64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_x86_64 #define helper_gvec_sar32i helper_gvec_sar32i_x86_64 #define helper_gvec_sar64i helper_gvec_sar64i_x86_64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_x86_64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_x86_64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_x86_64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_x86_64 #define helper_gvec_shl8v helper_gvec_shl8v_x86_64 #define helper_gvec_shl16v helper_gvec_shl16v_x86_64 #define helper_gvec_shl32v helper_gvec_shl32v_x86_64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_x86_64 #define helper_gvec_sar32v helper_gvec_sar32v_x86_64 #define helper_gvec_sar64v helper_gvec_sar64v_x86_64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_x86_64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_x86_64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_x86_64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_x86_64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_x86_64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_x86_64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_x86_64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_x86_64 #define helper_gvec_eq8 helper_gvec_eq8_x86_64 #define helper_gvec_ne8 helper_gvec_ne8_x86_64 #define helper_gvec_lt8 helper_gvec_lt8_x86_64 @@ -1419,6 +1473,7 @@ #define helper_xgetbv helper_xgetbv_x86_64 #define helper_xsetbv helper_xsetbv_x86_64 #define update_mxcsr_status update_mxcsr_status_x86_64 +#define update_mxcsr_from_sse_status update_mxcsr_from_sse_status_x86_64 #define helper_ldmxcsr helper_ldmxcsr_x86_64 #define helper_enter_mmx helper_enter_mmx_x86_64 #define helper_emms helper_emms_x86_64 diff --git a/symbols.sh b/symbols.sh index 11866f9334..4424fb4319 100755 --- a/symbols.sh +++ b/symbols.sh @@ -42,7 +42,10 @@ tcg_gen_sar_i64 \ tcg_gen_shl_i64 \ tcg_gen_shr_i64 \ tcg_gen_st_i64 \ +tcg_gen_add_i64 \ +tcg_gen_sub_i64 \ tcg_gen_xor_i64 \ +tcg_gen_neg_i64 \ cpu_icount_to_ns \ cpu_is_stopped \ cpu_get_ticks \ @@ -374,6 +377,8 @@ floatx80_add \ floatx80_sub \ floatx80_mul \ floatx80_div \ +floatx80_modrem \ +floatx80_mod \ floatx80_rem \ floatx80_sqrt \ floatx80_eq \ @@ -648,6 +653,7 @@ tcg_gen_gvec_mov \ tcg_gen_gvec_dup_i32 \ tcg_gen_gvec_dup_i64 \ tcg_gen_gvec_dup_mem \ +tcg_gen_gvec_dup_imm \ tcg_gen_gvec_dup64i \ tcg_gen_gvec_dup32i \ tcg_gen_gvec_dup16i \ @@ -702,13 +708,20 @@ tcg_gen_vec_shr16i_i64 \ tcg_gen_gvec_shri \ tcg_gen_vec_sar8i_i64 \ tcg_gen_vec_sar16i_i64 \ +tcg_gen_vec_rotl8i_i64 \ +tcg_gen_vec_rotl16i_i64 \ tcg_gen_gvec_sari \ +tcg_gen_gvec_rotli \ +tcg_gen_gvec_rotri \ tcg_gen_gvec_shls \ tcg_gen_gvec_shrs \ tcg_gen_gvec_sars \ +tcg_gen_gvec_rotls \ tcg_gen_gvec_shlv \ tcg_gen_gvec_shrv \ tcg_gen_gvec_sarv \ +tcg_gen_gvec_rotlv \ +tcg_gen_gvec_rotrv \ tcg_gen_gvec_cmp \ tcg_gen_gvec_bitsel \ tcg_can_emit_vecop_list \ @@ -745,6 +758,8 @@ tcg_gen_abs_vec \ tcg_gen_shli_vec \ tcg_gen_shri_vec \ tcg_gen_sari_vec \ +tcg_gen_rotli_vec \ +tcg_gen_rotri_vec \ tcg_gen_cmp_vec \ tcg_gen_add_vec \ tcg_gen_sub_vec \ @@ -760,9 +775,12 @@ tcg_gen_umax_vec \ tcg_gen_shlv_vec \ tcg_gen_shrv_vec \ tcg_gen_sarv_vec \ +tcg_gen_rotlv_vec \ +tcg_gen_rotrv_vec \ tcg_gen_shls_vec \ tcg_gen_shrs_vec \ tcg_gen_sars_vec \ +tcg_gen_rotls_vec \ tcg_gen_bitsel_vec \ tcg_gen_cmpsel_vec \ tb_htable_lookup \ @@ -774,6 +792,7 @@ cpu_loop_exit \ cpu_loop_exit_restore \ cpu_loop_exit_atomic \ tlb_init \ +tlb_destroy \ tlb_flush_by_mmuidx \ tlb_flush \ tlb_flush_by_mmuidx_all_cpus \ @@ -794,6 +813,7 @@ tlb_set_page_with_attrs \ tlb_set_page \ get_page_addr_code_hostp \ get_page_addr_code \ +probe_access_flags \ probe_access \ tlb_vaddr_to_host \ helper_ret_ldub_mmu \ @@ -810,22 +830,34 @@ helper_le_ldsl_mmu \ helper_be_ldsl_mmu \ cpu_ldub_mmuidx_ra \ cpu_ldsb_mmuidx_ra \ -cpu_lduw_mmuidx_ra \ -cpu_ldsw_mmuidx_ra \ -cpu_ldl_mmuidx_ra \ -cpu_ldq_mmuidx_ra \ +cpu_lduw_be_mmuidx_ra \ +cpu_lduw_le_mmuidx_ra \ +cpu_ldsw_be_mmuidx_ra \ +cpu_ldsw_le_mmuidx_ra \ +cpu_ldl_be_mmuidx_ra \ +cpu_ldl_le_mmuidx_ra \ +cpu_ldq_be_mmuidx_ra \ +cpu_ldq_le_mmuidx_ra \ cpu_ldub_data_ra \ cpu_ldsb_data_ra \ -cpu_lduw_data_ra \ -cpu_ldsw_data_ra \ -cpu_ldl_data_ra \ -cpu_ldq_data_ra \ +cpu_lduw_be_data_ra \ +cpu_lduw_le_data_ra \ +cpu_ldsw_be_data_ra \ +cpu_ldsw_le_data_ra \ +cpu_ldl_be_data_ra \ +cpu_ldl_le_data_ra \ +cpu_ldq_be_data_ra \ +cpu_ldq_le_data_ra \ cpu_ldub_data \ cpu_ldsb_data \ -cpu_lduw_data \ -cpu_ldsw_data \ -cpu_ldl_data \ -cpu_ldq_data \ +cpu_lduw_be_data \ +cpu_lduw_le_data \ +cpu_ldsw_be_data \ +cpu_ldsw_le_data \ +cpu_ldl_be_data \ +cpu_ldl_le_data \ +cpu_ldq_le_data \ +cpu_ldq_be_data \ helper_ret_stb_mmu \ helper_le_stw_mmu \ helper_be_stw_mmu \ @@ -834,17 +866,26 @@ helper_be_stl_mmu \ helper_le_stq_mmu \ helper_be_stq_mmu \ cpu_stb_mmuidx_ra \ -cpu_stw_mmuidx_ra \ -cpu_stl_mmuidx_ra \ -cpu_stq_mmuidx_ra \ +cpu_stw_be_mmuidx_ra \ +cpu_stw_le_mmuidx_ra \ +cpu_stl_be_mmuidx_ra \ +cpu_stl_le_mmuidx_ra \ +cpu_stq_be_mmuidx_ra \ +cpu_stq_le_mmuidx_ra \ cpu_stb_data_ra \ -cpu_stw_data_ra \ -cpu_stl_data_ra \ -cpu_stq_data_ra \ +cpu_stw_be_data_ra \ +cpu_stw_le_data_ra \ +cpu_stl_be_data_ra \ +cpu_stl_le_data_ra \ +cpu_stq_be_data_ra \ +cpu_stq_le_data_ra \ cpu_stb_data \ -cpu_stw_data \ -cpu_stl_data \ -cpu_stq_data \ +cpu_stw_be_data \ +cpu_stw_le_data \ +cpu_stl_be_data \ +cpu_stl_le_data \ +cpu_stq_be_data \ +cpu_stq_le_data \ helper_atomic_cmpxchgb_mmu \ helper_atomic_xchgb_mmu \ helper_atomic_fetch_addb_mmu \ @@ -1101,6 +1142,7 @@ cpu_ldub_code \ cpu_lduw_code \ cpu_ldl_code \ cpu_ldq_code \ +cpu_interrupt_handler \ helper_div_i32 \ helper_rem_i32 \ helper_divu_i32 \ @@ -1185,6 +1227,10 @@ helper_gvec_sar8i \ helper_gvec_sar16i \ helper_gvec_sar32i \ helper_gvec_sar64i \ +helper_gvec_rotl8i \ +helper_gvec_rotl16i \ +helper_gvec_rotl32i \ +helper_gvec_rotl64i \ helper_gvec_shl8v \ helper_gvec_shl16v \ helper_gvec_shl32v \ @@ -1197,6 +1243,14 @@ helper_gvec_sar8v \ helper_gvec_sar16v \ helper_gvec_sar32v \ helper_gvec_sar64v \ +helper_gvec_rotl8v \ +helper_gvec_rotl16v \ +helper_gvec_rotl32v \ +helper_gvec_rotl64v \ +helper_gvec_rotr8v \ +helper_gvec_rotr16v \ +helper_gvec_rotr32v \ +helper_gvec_rotr64v \ helper_gvec_eq8 \ helper_gvec_ne8 \ helper_gvec_lt8 \ @@ -1422,6 +1476,7 @@ helper_xrstor \ helper_xgetbv \ helper_xsetbv \ update_mxcsr_status \ +update_mxcsr_from_sse_status \ helper_ldmxcsr \ helper_enter_mmx \ helper_emms \ @@ -2604,6 +2659,102 @@ cmtst_op \ sri_op \ usra_op \ ssra_op \ +gen_gvec_ceq0 \ +gen_gvec_cge0 \ +gen_gvec_cgt0 \ +gen_gvec_cle0 \ +gen_gvec_clt0 \ +gen_gvec_cmtst \ +gen_gvec_mla \ +gen_gvec_mls \ +gen_gvec_saba \ +gen_gvec_sabd \ +gen_gvec_sli \ +gen_gvec_sqadd_qc \ +gen_gvec_sqrdmlah_qc \ +gen_gvec_sqrdmlsh_qc \ +gen_gvec_sqsub_qc \ +gen_gvec_sri \ +gen_gvec_srshr \ +gen_gvec_srsra \ +gen_gvec_sshl \ +gen_gvec_ssra \ +gen_gvec_uaba \ +gen_gvec_uabd \ +gen_gvec_uqadd_qc \ +gen_gvec_uqsub_qc \ +gen_gvec_urshr \ +gen_gvec_ursra \ +gen_gvec_ushl \ +gen_gvec_usra \ +helper_crypto_rax1 \ +helper_crypto_sha1c \ +helper_crypto_sha1m \ +helper_crypto_sha1p \ +helper_crypto_sha1su0 \ +helper_crypto_sm3tt1a \ +helper_crypto_sm3tt1b \ +helper_crypto_sm3tt2a \ +helper_crypto_sm3tt2b \ +helper_gvec_ceq0_b \ +helper_gvec_ceq0_h \ +helper_gvec_cge0_b \ +helper_gvec_cge0_h \ +helper_gvec_cgt0_b \ +helper_gvec_cgt0_h \ +helper_gvec_cle0_b \ +helper_gvec_cle0_h \ +helper_gvec_clt0_b \ +helper_gvec_clt0_h \ +helper_gvec_fabd_s \ +helper_gvec_saba_b \ +helper_gvec_saba_d \ +helper_gvec_saba_h \ +helper_gvec_saba_s \ +helper_gvec_sabd_b \ +helper_gvec_sabd_d \ +helper_gvec_sabd_h \ +helper_gvec_sabd_s \ +helper_gvec_sli_b \ +helper_gvec_sli_d \ +helper_gvec_sli_h \ +helper_gvec_sli_s \ +helper_gvec_sri_b \ +helper_gvec_sri_d \ +helper_gvec_sri_h \ +helper_gvec_sri_s \ +helper_gvec_srshr_b \ +helper_gvec_srshr_d \ +helper_gvec_srshr_h \ +helper_gvec_srshr_s \ +helper_gvec_srsra_b \ +helper_gvec_srsra_d \ +helper_gvec_srsra_h \ +helper_gvec_srsra_s \ +helper_gvec_ssra_b \ +helper_gvec_ssra_d \ +helper_gvec_ssra_h \ +helper_gvec_ssra_s \ +helper_gvec_uaba_b \ +helper_gvec_uaba_d \ +helper_gvec_uaba_h \ +helper_gvec_uaba_s \ +helper_gvec_uabd_b \ +helper_gvec_uabd_d \ +helper_gvec_uabd_h \ +helper_gvec_uabd_s \ +helper_gvec_urshr_b \ +helper_gvec_urshr_d \ +helper_gvec_urshr_h \ +helper_gvec_urshr_s \ +helper_gvec_ursra_b \ +helper_gvec_ursra_d \ +helper_gvec_ursra_h \ +helper_gvec_ursra_s \ +helper_gvec_usra_b \ +helper_gvec_usra_d \ +helper_gvec_usra_h \ +helper_gvec_usra_s \ " aarch64_SYMBOLS=" @@ -2930,6 +3081,11 @@ helper_v7m_tt \ arm_v7m_mmu_idx_all \ arm_v7m_mmu_idx_for_secstate_and_priv \ arm_v7m_mmu_idx_for_secstate \ +mte_probe1 \ +mte_check1 \ +mte_checkN \ +gen_helper_mte_check1 \ +gen_helper_mte_checkN \ helper_neon_qadd_u8 \ helper_neon_qadd_u16 \ helper_neon_qadd_u32 \ @@ -3169,6 +3325,21 @@ helper_autda \ helper_autdb \ helper_xpaci \ helper_xpacd \ +helper_mte_check1 \ +helper_mte_checkN \ +helper_mte_check_zva \ +helper_irg \ +helper_addsubg \ +helper_ldg \ +helper_stg \ +helper_stg_parallel \ +helper_stg_stub \ +helper_st2g \ +helper_st2g_parallel \ +helper_st2g_stub \ +helper_ldgm \ +helper_stgm \ +helper_stzgm_tags \ arm_is_psci_call \ arm_handle_psci_call \ helper_sve_predtest1 \ @@ -4061,6 +4232,7 @@ a64_translate_init \ gen_a64_set_pc_im \ unallocated_encoding \ new_tmp_a64 \ +new_tmp_a64_local \ new_tmp_a64_zero \ cpu_reg \ cpu_reg_sp \ @@ -4381,6 +4553,7 @@ helper_sret \ helper_mret \ helper_wfi \ helper_tlb_flush \ +helper_hyp_tlb_flush \ pmp_hart_has_privs \ pmpcfg_csr_write \ pmpcfg_csr_read \ @@ -4401,6 +4574,1008 @@ helper_fcvt_d_lu \ gen_helper_tlb_flush \ riscv_fpr_regnames \ riscv_int_regnames \ +fclass_d \ +fclass_h \ +fclass_s \ +helper_vaadd_vv_b \ +helper_vaadd_vv_d \ +helper_vaadd_vv_h \ +helper_vaadd_vv_w \ +helper_vaadd_vx_b \ +helper_vaadd_vx_d \ +helper_vaadd_vx_h \ +helper_vaadd_vx_w \ +helper_vadc_vvm_b \ +helper_vadc_vvm_d \ +helper_vadc_vvm_h \ +helper_vadc_vvm_w \ +helper_vadc_vxm_b \ +helper_vadc_vxm_d \ +helper_vadc_vxm_h \ +helper_vadc_vxm_w \ +helper_vadd_vv_b \ +helper_vadd_vv_d \ +helper_vadd_vv_h \ +helper_vadd_vv_w \ +helper_vadd_vx_b \ +helper_vadd_vx_d \ +helper_vadd_vx_h \ +helper_vadd_vx_w \ +helper_vamoaddw_v_w \ +helper_vamoandw_v_w \ +helper_vamomaxuw_v_w \ +helper_vamomaxw_v_w \ +helper_vamominuw_v_w \ +helper_vamominw_v_w \ +helper_vamoorw_v_w \ +helper_vamoswapw_v_w \ +helper_vamoxorw_v_w \ +helper_vand_vv_b \ +helper_vand_vv_d \ +helper_vand_vv_h \ +helper_vand_vv_w \ +helper_vand_vx_b \ +helper_vand_vx_d \ +helper_vand_vx_h \ +helper_vand_vx_w \ +helper_vasub_vv_b \ +helper_vasub_vv_d \ +helper_vasub_vv_h \ +helper_vasub_vv_w \ +helper_vasub_vx_b \ +helper_vasub_vx_d \ +helper_vasub_vx_h \ +helper_vasub_vx_w \ +helper_vcompress_vm_b \ +helper_vcompress_vm_d \ +helper_vcompress_vm_h \ +helper_vcompress_vm_w \ +helper_vdiv_vv_b \ +helper_vdiv_vv_d \ +helper_vdiv_vv_h \ +helper_vdiv_vv_w \ +helper_vdiv_vx_b \ +helper_vdiv_vx_d \ +helper_vdiv_vx_h \ +helper_vdiv_vx_w \ +helper_vdivu_vv_b \ +helper_vdivu_vv_d \ +helper_vdivu_vv_h \ +helper_vdivu_vv_w \ +helper_vdivu_vx_b \ +helper_vdivu_vx_d \ +helper_vdivu_vx_h \ +helper_vdivu_vx_w \ +helper_vec_rsubs16 \ +helper_vec_rsubs32 \ +helper_vec_rsubs64 \ +helper_vec_rsubs8 \ +helper_vfadd_vf_d \ +helper_vfadd_vf_h \ +helper_vfadd_vf_w \ +helper_vfadd_vv_d \ +helper_vfadd_vv_h \ +helper_vfadd_vv_w \ +helper_vfclass_v_d \ +helper_vfclass_v_h \ +helper_vfclass_v_w \ +helper_vfcvt_f_x_v_d \ +helper_vfcvt_f_x_v_h \ +helper_vfcvt_f_x_v_w \ +helper_vfcvt_f_xu_v_d \ +helper_vfcvt_f_xu_v_h \ +helper_vfcvt_f_xu_v_w \ +helper_vfcvt_x_f_v_d \ +helper_vfcvt_x_f_v_h \ +helper_vfcvt_x_f_v_w \ +helper_vfcvt_xu_f_v_d \ +helper_vfcvt_xu_f_v_h \ +helper_vfcvt_xu_f_v_w \ +helper_vfdiv_vf_d \ +helper_vfdiv_vf_h \ +helper_vfdiv_vf_w \ +helper_vfdiv_vv_d \ +helper_vfdiv_vv_h \ +helper_vfdiv_vv_w \ +helper_vfmacc_vf_d \ +helper_vfmacc_vf_h \ +helper_vfmacc_vf_w \ +helper_vfmacc_vv_d \ +helper_vfmacc_vv_h \ +helper_vfmacc_vv_w \ +helper_vfmadd_vf_d \ +helper_vfmadd_vf_h \ +helper_vfmadd_vf_w \ +helper_vfmadd_vv_d \ +helper_vfmadd_vv_h \ +helper_vfmadd_vv_w \ +helper_vfmax_vf_d \ +helper_vfmax_vf_h \ +helper_vfmax_vf_w \ +helper_vfmax_vv_d \ +helper_vfmax_vv_h \ +helper_vfmax_vv_w \ +helper_vfmerge_vfm_d \ +helper_vfmerge_vfm_h \ +helper_vfmerge_vfm_w \ +helper_vfmin_vf_d \ +helper_vfmin_vf_h \ +helper_vfmin_vf_w \ +helper_vfmin_vv_d \ +helper_vfmin_vv_h \ +helper_vfmin_vv_w \ +helper_vfmsac_vf_d \ +helper_vfmsac_vf_h \ +helper_vfmsac_vf_w \ +helper_vfmsac_vv_d \ +helper_vfmsac_vv_h \ +helper_vfmsac_vv_w \ +helper_vfmsub_vf_d \ +helper_vfmsub_vf_h \ +helper_vfmsub_vf_w \ +helper_vfmsub_vv_d \ +helper_vfmsub_vv_h \ +helper_vfmsub_vv_w \ +helper_vfmul_vf_d \ +helper_vfmul_vf_h \ +helper_vfmul_vf_w \ +helper_vfmul_vv_d \ +helper_vfmul_vv_h \ +helper_vfmul_vv_w \ +helper_vfncvt_f_f_v_h \ +helper_vfncvt_f_f_v_w \ +helper_vfncvt_f_x_v_h \ +helper_vfncvt_f_x_v_w \ +helper_vfncvt_f_xu_v_h \ +helper_vfncvt_f_xu_v_w \ +helper_vfncvt_x_f_v_h \ +helper_vfncvt_x_f_v_w \ +helper_vfncvt_xu_f_v_h \ +helper_vfncvt_xu_f_v_w \ +helper_vfnmacc_vf_d \ +helper_vfnmacc_vf_h \ +helper_vfnmacc_vf_w \ +helper_vfnmacc_vv_d \ +helper_vfnmacc_vv_h \ +helper_vfnmacc_vv_w \ +helper_vfnmadd_vf_d \ +helper_vfnmadd_vf_h \ +helper_vfnmadd_vf_w \ +helper_vfnmadd_vv_d \ +helper_vfnmadd_vv_h \ +helper_vfnmadd_vv_w \ +helper_vfnmsac_vf_d \ +helper_vfnmsac_vf_h \ +helper_vfnmsac_vf_w \ +helper_vfnmsac_vv_d \ +helper_vfnmsac_vv_h \ +helper_vfnmsac_vv_w \ +helper_vfnmsub_vf_d \ +helper_vfnmsub_vf_h \ +helper_vfnmsub_vf_w \ +helper_vfnmsub_vv_d \ +helper_vfnmsub_vv_h \ +helper_vfnmsub_vv_w \ +helper_vfrdiv_vf_d \ +helper_vfrdiv_vf_h \ +helper_vfrdiv_vf_w \ +helper_vfredmax_vs_d \ +helper_vfredmax_vs_h \ +helper_vfredmax_vs_w \ +helper_vfredmin_vs_d \ +helper_vfredmin_vs_h \ +helper_vfredmin_vs_w \ +helper_vfredsum_vs_d \ +helper_vfredsum_vs_h \ +helper_vfredsum_vs_w \ +helper_vfrsub_vf_d \ +helper_vfrsub_vf_h \ +helper_vfrsub_vf_w \ +helper_vfsgnj_vf_d \ +helper_vfsgnj_vf_h \ +helper_vfsgnj_vf_w \ +helper_vfsgnj_vv_d \ +helper_vfsgnj_vv_h \ +helper_vfsgnj_vv_w \ +helper_vfsgnjn_vf_d \ +helper_vfsgnjn_vf_h \ +helper_vfsgnjn_vf_w \ +helper_vfsgnjn_vv_d \ +helper_vfsgnjn_vv_h \ +helper_vfsgnjn_vv_w \ +helper_vfsgnjx_vf_d \ +helper_vfsgnjx_vf_h \ +helper_vfsgnjx_vf_w \ +helper_vfsgnjx_vv_d \ +helper_vfsgnjx_vv_h \ +helper_vfsgnjx_vv_w \ +helper_vfsqrt_v_d \ +helper_vfsqrt_v_h \ +helper_vfsqrt_v_w \ +helper_vfsub_vf_d \ +helper_vfsub_vf_h \ +helper_vfsub_vf_w \ +helper_vfsub_vv_d \ +helper_vfsub_vv_h \ +helper_vfsub_vv_w \ +helper_vfwadd_vf_h \ +helper_vfwadd_vf_w \ +helper_vfwadd_vv_h \ +helper_vfwadd_vv_w \ +helper_vfwadd_wf_h \ +helper_vfwadd_wf_w \ +helper_vfwadd_wv_h \ +helper_vfwadd_wv_w \ +helper_vfwcvt_f_f_v_h \ +helper_vfwcvt_f_f_v_w \ +helper_vfwcvt_f_x_v_h \ +helper_vfwcvt_f_x_v_w \ +helper_vfwcvt_f_xu_v_h \ +helper_vfwcvt_f_xu_v_w \ +helper_vfwcvt_x_f_v_h \ +helper_vfwcvt_x_f_v_w \ +helper_vfwcvt_xu_f_v_h \ +helper_vfwcvt_xu_f_v_w \ +helper_vfwmacc_vf_h \ +helper_vfwmacc_vf_w \ +helper_vfwmacc_vv_h \ +helper_vfwmacc_vv_w \ +helper_vfwmsac_vf_h \ +helper_vfwmsac_vf_w \ +helper_vfwmsac_vv_h \ +helper_vfwmsac_vv_w \ +helper_vfwmul_vf_h \ +helper_vfwmul_vf_w \ +helper_vfwmul_vv_h \ +helper_vfwmul_vv_w \ +helper_vfwnmacc_vf_h \ +helper_vfwnmacc_vf_w \ +helper_vfwnmacc_vv_h \ +helper_vfwnmacc_vv_w \ +helper_vfwnmsac_vf_h \ +helper_vfwnmsac_vf_w \ +helper_vfwnmsac_vv_h \ +helper_vfwnmsac_vv_w \ +helper_vfwredsum_vs_h \ +helper_vfwredsum_vs_w \ +helper_vfwsub_vf_h \ +helper_vfwsub_vf_w \ +helper_vfwsub_vv_h \ +helper_vfwsub_vv_w \ +helper_vfwsub_wf_h \ +helper_vfwsub_wf_w \ +helper_vfwsub_wv_h \ +helper_vfwsub_wv_w \ +helper_vid_v_b \ +helper_vid_v_d \ +helper_vid_v_h \ +helper_vid_v_w \ +helper_viota_m_b \ +helper_viota_m_d \ +helper_viota_m_h \ +helper_viota_m_w \ +helper_vlb_v_b \ +helper_vlb_v_b_mask \ +helper_vlb_v_d \ +helper_vlb_v_d_mask \ +helper_vlb_v_h \ +helper_vlb_v_h_mask \ +helper_vlb_v_w \ +helper_vlb_v_w_mask \ +helper_vlbff_v_b \ +helper_vlbff_v_d \ +helper_vlbff_v_h \ +helper_vlbff_v_w \ +helper_vlbu_v_b \ +helper_vlbu_v_b_mask \ +helper_vlbu_v_d \ +helper_vlbu_v_d_mask \ +helper_vlbu_v_h \ +helper_vlbu_v_h_mask \ +helper_vlbu_v_w \ +helper_vlbu_v_w_mask \ +helper_vlbuff_v_b \ +helper_vlbuff_v_d \ +helper_vlbuff_v_h \ +helper_vlbuff_v_w \ +helper_vle_v_b \ +helper_vle_v_b_mask \ +helper_vle_v_d \ +helper_vle_v_d_mask \ +helper_vle_v_h \ +helper_vle_v_h_mask \ +helper_vle_v_w \ +helper_vle_v_w_mask \ +helper_vleff_v_b \ +helper_vleff_v_d \ +helper_vleff_v_h \ +helper_vleff_v_w \ +helper_vlh_v_d \ +helper_vlh_v_d_mask \ +helper_vlh_v_h \ +helper_vlh_v_h_mask \ +helper_vlh_v_w \ +helper_vlh_v_w_mask \ +helper_vlhff_v_d \ +helper_vlhff_v_h \ +helper_vlhff_v_w \ +helper_vlhu_v_d \ +helper_vlhu_v_d_mask \ +helper_vlhu_v_h \ +helper_vlhu_v_h_mask \ +helper_vlhu_v_w \ +helper_vlhu_v_w_mask \ +helper_vlhuff_v_d \ +helper_vlhuff_v_h \ +helper_vlhuff_v_w \ +helper_vlsb_v_b \ +helper_vlsb_v_d \ +helper_vlsb_v_h \ +helper_vlsb_v_w \ +helper_vlsbu_v_b \ +helper_vlsbu_v_d \ +helper_vlsbu_v_h \ +helper_vlsbu_v_w \ +helper_vlse_v_b \ +helper_vlse_v_d \ +helper_vlse_v_h \ +helper_vlse_v_w \ +helper_vlsh_v_d \ +helper_vlsh_v_h \ +helper_vlsh_v_w \ +helper_vlshu_v_d \ +helper_vlshu_v_h \ +helper_vlshu_v_w \ +helper_vlsw_v_d \ +helper_vlsw_v_w \ +helper_vlswu_v_d \ +helper_vlswu_v_w \ +helper_vlw_v_d \ +helper_vlw_v_d_mask \ +helper_vlw_v_w \ +helper_vlw_v_w_mask \ +helper_vlwff_v_d \ +helper_vlwff_v_w \ +helper_vlwu_v_d \ +helper_vlwu_v_d_mask \ +helper_vlwu_v_w \ +helper_vlwu_v_w_mask \ +helper_vlwuff_v_d \ +helper_vlwuff_v_w \ +helper_vlxb_v_b \ +helper_vlxb_v_d \ +helper_vlxb_v_h \ +helper_vlxb_v_w \ +helper_vlxbu_v_b \ +helper_vlxbu_v_d \ +helper_vlxbu_v_h \ +helper_vlxbu_v_w \ +helper_vlxe_v_b \ +helper_vlxe_v_d \ +helper_vlxe_v_h \ +helper_vlxe_v_w \ +helper_vlxh_v_d \ +helper_vlxh_v_h \ +helper_vlxh_v_w \ +helper_vlxhu_v_d \ +helper_vlxhu_v_h \ +helper_vlxhu_v_w \ +helper_vlxw_v_d \ +helper_vlxw_v_w \ +helper_vlxwu_v_d \ +helper_vlxwu_v_w \ +helper_vmacc_vv_b \ +helper_vmacc_vv_d \ +helper_vmacc_vv_h \ +helper_vmacc_vv_w \ +helper_vmacc_vx_b \ +helper_vmacc_vx_d \ +helper_vmacc_vx_h \ +helper_vmacc_vx_w \ +helper_vmadc_vvm_b \ +helper_vmadc_vvm_d \ +helper_vmadc_vvm_h \ +helper_vmadc_vvm_w \ +helper_vmadc_vxm_b \ +helper_vmadc_vxm_d \ +helper_vmadc_vxm_h \ +helper_vmadc_vxm_w \ +helper_vmadd_vv_b \ +helper_vmadd_vv_d \ +helper_vmadd_vv_h \ +helper_vmadd_vv_w \ +helper_vmadd_vx_b \ +helper_vmadd_vx_d \ +helper_vmadd_vx_h \ +helper_vmadd_vx_w \ +helper_vmand_mm \ +helper_vmandnot_mm \ +helper_vmax_vv_b \ +helper_vmax_vv_d \ +helper_vmax_vv_h \ +helper_vmax_vv_w \ +helper_vmax_vx_b \ +helper_vmax_vx_d \ +helper_vmax_vx_h \ +helper_vmax_vx_w \ +helper_vmaxu_vv_b \ +helper_vmaxu_vv_d \ +helper_vmaxu_vv_h \ +helper_vmaxu_vv_w \ +helper_vmaxu_vx_b \ +helper_vmaxu_vx_d \ +helper_vmaxu_vx_h \ +helper_vmaxu_vx_w \ +helper_vmerge_vvm_b \ +helper_vmerge_vvm_d \ +helper_vmerge_vvm_h \ +helper_vmerge_vvm_w \ +helper_vmerge_vxm_b \ +helper_vmerge_vxm_d \ +helper_vmerge_vxm_h \ +helper_vmerge_vxm_w \ +helper_vmfeq_vf_d \ +helper_vmfeq_vf_h \ +helper_vmfeq_vf_w \ +helper_vmfeq_vv_d \ +helper_vmfeq_vv_h \ +helper_vmfeq_vv_w \ +helper_vmfge_vf_d \ +helper_vmfge_vf_h \ +helper_vmfge_vf_w \ +helper_vmfgt_vf_d \ +helper_vmfgt_vf_h \ +helper_vmfgt_vf_w \ +helper_vmfirst_m \ +helper_vmfle_vf_d \ +helper_vmfle_vf_h \ +helper_vmfle_vf_w \ +helper_vmfle_vv_d \ +helper_vmfle_vv_h \ +helper_vmfle_vv_w \ +helper_vmflt_vf_d \ +helper_vmflt_vf_h \ +helper_vmflt_vf_w \ +helper_vmflt_vv_d \ +helper_vmflt_vv_h \ +helper_vmflt_vv_w \ +helper_vmfne_vf_d \ +helper_vmfne_vf_h \ +helper_vmfne_vf_w \ +helper_vmfne_vv_d \ +helper_vmfne_vv_h \ +helper_vmfne_vv_w \ +helper_vmford_vf_d \ +helper_vmford_vf_h \ +helper_vmford_vf_w \ +helper_vmford_vv_d \ +helper_vmford_vv_h \ +helper_vmford_vv_w \ +helper_vmin_vv_b \ +helper_vmin_vv_d \ +helper_vmin_vv_h \ +helper_vmin_vv_w \ +helper_vmin_vx_b \ +helper_vmin_vx_d \ +helper_vmin_vx_h \ +helper_vmin_vx_w \ +helper_vminu_vv_b \ +helper_vminu_vv_d \ +helper_vminu_vv_h \ +helper_vminu_vv_w \ +helper_vminu_vx_b \ +helper_vminu_vx_d \ +helper_vminu_vx_h \ +helper_vminu_vx_w \ +helper_vmnand_mm \ +helper_vmnor_mm \ +helper_vmor_mm \ +helper_vmornot_mm \ +helper_vmpopc_m \ +helper_vmsbc_vvm_b \ +helper_vmsbc_vvm_d \ +helper_vmsbc_vvm_h \ +helper_vmsbc_vvm_w \ +helper_vmsbc_vxm_b \ +helper_vmsbc_vxm_d \ +helper_vmsbc_vxm_h \ +helper_vmsbc_vxm_w \ +helper_vmsbf_m \ +helper_vmseq_vv_b \ +helper_vmseq_vv_d \ +helper_vmseq_vv_h \ +helper_vmseq_vv_w \ +helper_vmseq_vx_b \ +helper_vmseq_vx_d \ +helper_vmseq_vx_h \ +helper_vmseq_vx_w \ +helper_vmsgt_vx_b \ +helper_vmsgt_vx_d \ +helper_vmsgt_vx_h \ +helper_vmsgt_vx_w \ +helper_vmsgtu_vx_b \ +helper_vmsgtu_vx_d \ +helper_vmsgtu_vx_h \ +helper_vmsgtu_vx_w \ +helper_vmsif_m \ +helper_vmsle_vv_b \ +helper_vmsle_vv_d \ +helper_vmsle_vv_h \ +helper_vmsle_vv_w \ +helper_vmsle_vx_b \ +helper_vmsle_vx_d \ +helper_vmsle_vx_h \ +helper_vmsle_vx_w \ +helper_vmsleu_vv_b \ +helper_vmsleu_vv_d \ +helper_vmsleu_vv_h \ +helper_vmsleu_vv_w \ +helper_vmsleu_vx_b \ +helper_vmsleu_vx_d \ +helper_vmsleu_vx_h \ +helper_vmsleu_vx_w \ +helper_vmslt_vv_b \ +helper_vmslt_vv_d \ +helper_vmslt_vv_h \ +helper_vmslt_vv_w \ +helper_vmslt_vx_b \ +helper_vmslt_vx_d \ +helper_vmslt_vx_h \ +helper_vmslt_vx_w \ +helper_vmsltu_vv_b \ +helper_vmsltu_vv_d \ +helper_vmsltu_vv_h \ +helper_vmsltu_vv_w \ +helper_vmsltu_vx_b \ +helper_vmsltu_vx_d \ +helper_vmsltu_vx_h \ +helper_vmsltu_vx_w \ +helper_vmsne_vv_b \ +helper_vmsne_vv_d \ +helper_vmsne_vv_h \ +helper_vmsne_vv_w \ +helper_vmsne_vx_b \ +helper_vmsne_vx_d \ +helper_vmsne_vx_h \ +helper_vmsne_vx_w \ +helper_vmsof_m \ +helper_vmul_vv_b \ +helper_vmul_vv_d \ +helper_vmul_vv_h \ +helper_vmul_vv_w \ +helper_vmul_vx_b \ +helper_vmul_vx_d \ +helper_vmul_vx_h \ +helper_vmul_vx_w \ +helper_vmulh_vv_b \ +helper_vmulh_vv_d \ +helper_vmulh_vv_h \ +helper_vmulh_vv_w \ +helper_vmulh_vx_b \ +helper_vmulh_vx_d \ +helper_vmulh_vx_h \ +helper_vmulh_vx_w \ +helper_vmulhsu_vv_b \ +helper_vmulhsu_vv_d \ +helper_vmulhsu_vv_h \ +helper_vmulhsu_vv_w \ +helper_vmulhsu_vx_b \ +helper_vmulhsu_vx_d \ +helper_vmulhsu_vx_h \ +helper_vmulhsu_vx_w \ +helper_vmulhu_vv_b \ +helper_vmulhu_vv_d \ +helper_vmulhu_vv_h \ +helper_vmulhu_vv_w \ +helper_vmulhu_vx_b \ +helper_vmulhu_vx_d \ +helper_vmulhu_vx_h \ +helper_vmulhu_vx_w \ +helper_vmv_v_v_b \ +helper_vmv_v_v_d \ +helper_vmv_v_v_h \ +helper_vmv_v_v_w \ +helper_vmv_v_x_b \ +helper_vmv_v_x_d \ +helper_vmv_v_x_h \ +helper_vmv_v_x_w \ +helper_vmxnor_mm \ +helper_vmxor_mm \ +helper_vnclip_vv_b \ +helper_vnclip_vv_h \ +helper_vnclip_vv_w \ +helper_vnclip_vx_b \ +helper_vnclip_vx_h \ +helper_vnclip_vx_w \ +helper_vnclipu_vv_b \ +helper_vnclipu_vv_h \ +helper_vnclipu_vv_w \ +helper_vnclipu_vx_b \ +helper_vnclipu_vx_h \ +helper_vnclipu_vx_w \ +helper_vnmsac_vv_b \ +helper_vnmsac_vv_d \ +helper_vnmsac_vv_h \ +helper_vnmsac_vv_w \ +helper_vnmsac_vx_b \ +helper_vnmsac_vx_d \ +helper_vnmsac_vx_h \ +helper_vnmsac_vx_w \ +helper_vnmsub_vv_b \ +helper_vnmsub_vv_d \ +helper_vnmsub_vv_h \ +helper_vnmsub_vv_w \ +helper_vnmsub_vx_b \ +helper_vnmsub_vx_d \ +helper_vnmsub_vx_h \ +helper_vnmsub_vx_w \ +helper_vnsra_vv_b \ +helper_vnsra_vv_h \ +helper_vnsra_vv_w \ +helper_vnsra_vx_b \ +helper_vnsra_vx_h \ +helper_vnsra_vx_w \ +helper_vnsrl_vv_b \ +helper_vnsrl_vv_h \ +helper_vnsrl_vv_w \ +helper_vnsrl_vx_b \ +helper_vnsrl_vx_h \ +helper_vnsrl_vx_w \ +helper_vor_vv_b \ +helper_vor_vv_d \ +helper_vor_vv_h \ +helper_vor_vv_w \ +helper_vor_vx_b \ +helper_vor_vx_d \ +helper_vor_vx_h \ +helper_vor_vx_w \ +helper_vredand_vs_b \ +helper_vredand_vs_d \ +helper_vredand_vs_h \ +helper_vredand_vs_w \ +helper_vredmax_vs_b \ +helper_vredmax_vs_d \ +helper_vredmax_vs_h \ +helper_vredmax_vs_w \ +helper_vredmaxu_vs_b \ +helper_vredmaxu_vs_d \ +helper_vredmaxu_vs_h \ +helper_vredmaxu_vs_w \ +helper_vredmin_vs_b \ +helper_vredmin_vs_d \ +helper_vredmin_vs_h \ +helper_vredmin_vs_w \ +helper_vredminu_vs_b \ +helper_vredminu_vs_d \ +helper_vredminu_vs_h \ +helper_vredminu_vs_w \ +helper_vredor_vs_b \ +helper_vredor_vs_d \ +helper_vredor_vs_h \ +helper_vredor_vs_w \ +helper_vredsum_vs_b \ +helper_vredsum_vs_d \ +helper_vredsum_vs_h \ +helper_vredsum_vs_w \ +helper_vredxor_vs_b \ +helper_vredxor_vs_d \ +helper_vredxor_vs_h \ +helper_vredxor_vs_w \ +helper_vrem_vv_b \ +helper_vrem_vv_d \ +helper_vrem_vv_h \ +helper_vrem_vv_w \ +helper_vrem_vx_b \ +helper_vrem_vx_d \ +helper_vrem_vx_h \ +helper_vrem_vx_w \ +helper_vremu_vv_b \ +helper_vremu_vv_d \ +helper_vremu_vv_h \ +helper_vremu_vv_w \ +helper_vremu_vx_b \ +helper_vremu_vx_d \ +helper_vremu_vx_h \ +helper_vremu_vx_w \ +helper_vrgather_vv_b \ +helper_vrgather_vv_d \ +helper_vrgather_vv_h \ +helper_vrgather_vv_w \ +helper_vrgather_vx_b \ +helper_vrgather_vx_d \ +helper_vrgather_vx_h \ +helper_vrgather_vx_w \ +helper_vrsub_vx_b \ +helper_vrsub_vx_d \ +helper_vrsub_vx_h \ +helper_vrsub_vx_w \ +helper_vsadd_vv_b \ +helper_vsadd_vv_d \ +helper_vsadd_vv_h \ +helper_vsadd_vv_w \ +helper_vsadd_vx_b \ +helper_vsadd_vx_d \ +helper_vsadd_vx_h \ +helper_vsadd_vx_w \ +helper_vsaddu_vv_b \ +helper_vsaddu_vv_d \ +helper_vsaddu_vv_h \ +helper_vsaddu_vv_w \ +helper_vsaddu_vx_b \ +helper_vsaddu_vx_d \ +helper_vsaddu_vx_h \ +helper_vsaddu_vx_w \ +helper_vsb_v_b \ +helper_vsb_v_b_mask \ +helper_vsb_v_d \ +helper_vsb_v_d_mask \ +helper_vsb_v_h \ +helper_vsb_v_h_mask \ +helper_vsb_v_w \ +helper_vsb_v_w_mask \ +helper_vsbc_vvm_b \ +helper_vsbc_vvm_d \ +helper_vsbc_vvm_h \ +helper_vsbc_vvm_w \ +helper_vsbc_vxm_b \ +helper_vsbc_vxm_d \ +helper_vsbc_vxm_h \ +helper_vsbc_vxm_w \ +helper_vse_v_b \ +helper_vse_v_b_mask \ +helper_vse_v_d \ +helper_vse_v_d_mask \ +helper_vse_v_h \ +helper_vse_v_h_mask \ +helper_vse_v_w \ +helper_vse_v_w_mask \ +helper_vsetvl \ +helper_vsh_v_d \ +helper_vsh_v_d_mask \ +helper_vsh_v_h \ +helper_vsh_v_h_mask \ +helper_vsh_v_w \ +helper_vsh_v_w_mask \ +helper_vslide1down_vx_b \ +helper_vslide1down_vx_d \ +helper_vslide1down_vx_h \ +helper_vslide1down_vx_w \ +helper_vslide1up_vx_b \ +helper_vslide1up_vx_d \ +helper_vslide1up_vx_h \ +helper_vslide1up_vx_w \ +helper_vslidedown_vx_b \ +helper_vslidedown_vx_d \ +helper_vslidedown_vx_h \ +helper_vslidedown_vx_w \ +helper_vslideup_vx_b \ +helper_vslideup_vx_d \ +helper_vslideup_vx_h \ +helper_vslideup_vx_w \ +helper_vsll_vv_b \ +helper_vsll_vv_d \ +helper_vsll_vv_h \ +helper_vsll_vv_w \ +helper_vsll_vx_b \ +helper_vsll_vx_d \ +helper_vsll_vx_h \ +helper_vsll_vx_w \ +helper_vsmul_vv_b \ +helper_vsmul_vv_d \ +helper_vsmul_vv_h \ +helper_vsmul_vv_w \ +helper_vsmul_vx_b \ +helper_vsmul_vx_d \ +helper_vsmul_vx_h \ +helper_vsmul_vx_w \ +helper_vsra_vv_b \ +helper_vsra_vv_d \ +helper_vsra_vv_h \ +helper_vsra_vv_w \ +helper_vsra_vx_b \ +helper_vsra_vx_d \ +helper_vsra_vx_h \ +helper_vsra_vx_w \ +helper_vsrl_vv_b \ +helper_vsrl_vv_d \ +helper_vsrl_vv_h \ +helper_vsrl_vv_w \ +helper_vsrl_vx_b \ +helper_vsrl_vx_d \ +helper_vsrl_vx_h \ +helper_vsrl_vx_w \ +helper_vssb_v_b \ +helper_vssb_v_d \ +helper_vssb_v_h \ +helper_vssb_v_w \ +helper_vsse_v_b \ +helper_vsse_v_d \ +helper_vsse_v_h \ +helper_vsse_v_w \ +helper_vssh_v_d \ +helper_vssh_v_h \ +helper_vssh_v_w \ +helper_vssra_vv_b \ +helper_vssra_vv_d \ +helper_vssra_vv_h \ +helper_vssra_vv_w \ +helper_vssra_vx_b \ +helper_vssra_vx_d \ +helper_vssra_vx_h \ +helper_vssra_vx_w \ +helper_vssrl_vv_b \ +helper_vssrl_vv_d \ +helper_vssrl_vv_h \ +helper_vssrl_vv_w \ +helper_vssrl_vx_b \ +helper_vssrl_vx_d \ +helper_vssrl_vx_h \ +helper_vssrl_vx_w \ +helper_vssub_vv_b \ +helper_vssub_vv_d \ +helper_vssub_vv_h \ +helper_vssub_vv_w \ +helper_vssub_vx_b \ +helper_vssub_vx_d \ +helper_vssub_vx_h \ +helper_vssub_vx_w \ +helper_vssubu_vv_b \ +helper_vssubu_vv_d \ +helper_vssubu_vv_h \ +helper_vssubu_vv_w \ +helper_vssubu_vx_b \ +helper_vssubu_vx_d \ +helper_vssubu_vx_h \ +helper_vssubu_vx_w \ +helper_vssw_v_d \ +helper_vssw_v_w \ +helper_vsub_vv_b \ +helper_vsub_vv_d \ +helper_vsub_vv_h \ +helper_vsub_vv_w \ +helper_vsub_vx_b \ +helper_vsub_vx_d \ +helper_vsub_vx_h \ +helper_vsub_vx_w \ +helper_vsw_v_d \ +helper_vsw_v_d_mask \ +helper_vsw_v_w \ +helper_vsw_v_w_mask \ +helper_vsxb_v_b \ +helper_vsxb_v_d \ +helper_vsxb_v_h \ +helper_vsxb_v_w \ +helper_vsxe_v_b \ +helper_vsxe_v_d \ +helper_vsxe_v_h \ +helper_vsxe_v_w \ +helper_vsxh_v_d \ +helper_vsxh_v_h \ +helper_vsxh_v_w \ +helper_vsxw_v_d \ +helper_vsxw_v_w \ +helper_vwadd_vv_b \ +helper_vwadd_vv_h \ +helper_vwadd_vv_w \ +helper_vwadd_vx_b \ +helper_vwadd_vx_h \ +helper_vwadd_vx_w \ +helper_vwadd_wv_b \ +helper_vwadd_wv_h \ +helper_vwadd_wv_w \ +helper_vwadd_wx_b \ +helper_vwadd_wx_h \ +helper_vwadd_wx_w \ +helper_vwaddu_vv_b \ +helper_vwaddu_vv_h \ +helper_vwaddu_vv_w \ +helper_vwaddu_vx_b \ +helper_vwaddu_vx_h \ +helper_vwaddu_vx_w \ +helper_vwaddu_wv_b \ +helper_vwaddu_wv_h \ +helper_vwaddu_wv_w \ +helper_vwaddu_wx_b \ +helper_vwaddu_wx_h \ +helper_vwaddu_wx_w \ +helper_vwmacc_vv_b \ +helper_vwmacc_vv_h \ +helper_vwmacc_vv_w \ +helper_vwmacc_vx_b \ +helper_vwmacc_vx_h \ +helper_vwmacc_vx_w \ +helper_vwmaccsu_vv_b \ +helper_vwmaccsu_vv_h \ +helper_vwmaccsu_vv_w \ +helper_vwmaccsu_vx_b \ +helper_vwmaccsu_vx_h \ +helper_vwmaccsu_vx_w \ +helper_vwmaccu_vv_b \ +helper_vwmaccu_vv_h \ +helper_vwmaccu_vv_w \ +helper_vwmaccu_vx_b \ +helper_vwmaccu_vx_h \ +helper_vwmaccu_vx_w \ +helper_vwmaccus_vx_b \ +helper_vwmaccus_vx_h \ +helper_vwmaccus_vx_w \ +helper_vwmul_vv_b \ +helper_vwmul_vv_h \ +helper_vwmul_vv_w \ +helper_vwmul_vx_b \ +helper_vwmul_vx_h \ +helper_vwmul_vx_w \ +helper_vwmulsu_vv_b \ +helper_vwmulsu_vv_h \ +helper_vwmulsu_vv_w \ +helper_vwmulsu_vx_b \ +helper_vwmulsu_vx_h \ +helper_vwmulsu_vx_w \ +helper_vwmulu_vv_b \ +helper_vwmulu_vv_h \ +helper_vwmulu_vv_w \ +helper_vwmulu_vx_b \ +helper_vwmulu_vx_h \ +helper_vwmulu_vx_w \ +helper_vwredsum_vs_b \ +helper_vwredsum_vs_h \ +helper_vwredsum_vs_w \ +helper_vwredsumu_vs_b \ +helper_vwredsumu_vs_h \ +helper_vwredsumu_vs_w \ +helper_vwsmacc_vv_b \ +helper_vwsmacc_vv_h \ +helper_vwsmacc_vv_w \ +helper_vwsmacc_vx_b \ +helper_vwsmacc_vx_h \ +helper_vwsmacc_vx_w \ +helper_vwsmaccsu_vv_b \ +helper_vwsmaccsu_vv_h \ +helper_vwsmaccsu_vv_w \ +helper_vwsmaccsu_vx_b \ +helper_vwsmaccsu_vx_h \ +helper_vwsmaccsu_vx_w \ +helper_vwsmaccu_vv_b \ +helper_vwsmaccu_vv_h \ +helper_vwsmaccu_vv_w \ +helper_vwsmaccu_vx_b \ +helper_vwsmaccu_vx_h \ +helper_vwsmaccu_vx_w \ +helper_vwsmaccus_vx_b \ +helper_vwsmaccus_vx_h \ +helper_vwsmaccus_vx_w \ +helper_vwsub_vv_b \ +helper_vwsub_vv_h \ +helper_vwsub_vv_w \ +helper_vwsub_vx_b \ +helper_vwsub_vx_h \ +helper_vwsub_vx_w \ +helper_vwsub_wv_b \ +helper_vwsub_wv_h \ +helper_vwsub_wv_w \ +helper_vwsub_wx_b \ +helper_vwsub_wx_h \ +helper_vwsub_wx_w \ +helper_vwsubu_vv_b \ +helper_vwsubu_vv_h \ +helper_vwsubu_vv_w \ +helper_vwsubu_vx_b \ +helper_vwsubu_vx_h \ +helper_vwsubu_vx_w \ +helper_vwsubu_wv_b \ +helper_vwsubu_wv_h \ +helper_vwsubu_wv_w \ +helper_vwsubu_wx_b \ +helper_vwsubu_wx_h \ +helper_vwsubu_wx_w \ +helper_vxor_vv_b \ +helper_vxor_vv_d \ +helper_vxor_vv_h \ +helper_vxor_vv_w \ +helper_vxor_vx_b \ +helper_vxor_vx_d \ +helper_vxor_vx_h \ +helper_vxor_vx_w \ " riscv64_SYMBOLS=${riscv32_SYMBOLS} @@ -4791,7 +5966,6 @@ cpu_rddsp \ helper_rddsp \ helper_cfc1 \ helper_ctc1 \ -ieee_ex_to_mips \ helper_float_sqrt_d \ helper_float_sqrt_s \ helper_float_cvtd_s \ @@ -5346,23 +6520,59 @@ helper_msa_srari_df \ helper_msa_srlri_df \ helper_msa_binsli_df \ helper_msa_binsri_df \ -helper_msa_subv_df \ -helper_msa_subs_s_df \ -helper_msa_subs_u_df \ -helper_msa_subsus_u_df \ -helper_msa_subsuu_s_df \ -helper_msa_mulv_df \ -helper_msa_dotp_s_df \ -helper_msa_dotp_u_df \ +helper_msa_subv_b \ +helper_msa_subv_h \ +helper_msa_subv_w \ +helper_msa_subv_d \ +helper_msa_subs_s_b \ +helper_msa_subs_s_h \ +helper_msa_subs_s_w \ +helper_msa_subs_s_d \ +helper_msa_subs_u_b \ +helper_msa_subs_u_h \ +helper_msa_subs_u_w \ +helper_msa_subs_u_d \ +helper_msa_subsus_u_b \ +helper_msa_subsus_u_h \ +helper_msa_subsus_u_w \ +helper_msa_subsus_u_d \ +helper_msa_subsuu_s_b \ +helper_msa_subsuu_s_h \ +helper_msa_subsuu_s_w \ +helper_msa_subsuu_s_d \ +helper_msa_mulv_b \ +helper_msa_mulv_h \ +helper_msa_mulv_w \ +helper_msa_mulv_d \ +helper_msa_dotp_s_h \ +helper_msa_dotp_s_w \ +helper_msa_dotp_s_d \ +helper_msa_dotp_u_h \ +helper_msa_dotp_u_w \ +helper_msa_dotp_u_d \ helper_msa_mul_q_df \ helper_msa_mulr_q_df \ helper_msa_sld_df \ -helper_msa_maddv_df \ -helper_msa_msubv_df \ -helper_msa_dpadd_s_df \ -helper_msa_dpadd_u_df \ -helper_msa_dpsub_s_df \ -helper_msa_dpsub_u_df \ +helper_msa_maddv_b \ +helper_msa_maddv_h \ +helper_msa_maddv_w \ +helper_msa_maddv_d \ +helper_msa_msubv_b \ +helper_msa_msubv_h \ +helper_msa_msubv_w \ +helper_msa_msubv_d \ +helper_msa_dpadd_s_h \ +helper_msa_dpadd_s_w \ +helper_msa_dpadd_s_d \ +helper_msa_dpadd_u_h \ +helper_msa_dpadd_u_w \ +helper_msa_dpadd_u_d \ +helper_msa_dpsub_s_h \ +helper_msa_dpsub_s_w \ +helper_msa_dpsub_s_d \ +helper_msa_dpsub_u_h \ +helper_msa_dpsub_u_w \ +helper_msa_dpsub_u_d \ helper_msa_binsl_df \ helper_msa_binsr_df \ helper_msa_madd_q_df \ @@ -5797,7 +7007,6 @@ helper_bfffo_reg \ helper_bfffo_mem \ helper_chk \ helper_chk2 \ -floatx80_mod \ floatx80_getman \ floatx80_getexp \ floatx80_scale \ @@ -6110,6 +7319,33 @@ helper_stvewx \ helper_tbegin \ helper_load_dump_spr \ helper_store_dump_spr \ +store_fpscr \ +helper_store_fpscr \ +helper_float_check_status \ +helper_reset_fpstatus \ +helper_fadd \ +helper_fsub \ +helper_fmul \ +helper_fdiv \ +helper_fctiw \ +helper_fctiwz \ +helper_fctiwuz \ +helper_fctid \ +helper_fctidz \ +helper_fctidu \ +helper_fctiduz \ +helper_fcfid \ +helper_fcfids \ +helper_fcfidu \ +helper_fcfidus \ +helper_frin \ +helper_friz \ +helper_frip \ +helper_frim \ +helper_fmadd \ +helper_fnmadd \ +helper_fmsub \ +helper_fnmsub \ helper_hfscr_facility_check \ helper_fscr_facility_check \ helper_msr_facility_check \ @@ -6261,8 +7497,245 @@ ppc_booke_timers_init \ ppc_hash32_handle_mmu_fault \ gen_helper_store_booke_tsr \ gen_helper_store_booke_tcr \ +gen_helper_store_fpscr \ store_booke_tcr \ ppc_hash32_get_phys_page_debug \ +helper_compute_fprf_float128 \ +helper_compute_fprf_float16 \ +helper_compute_fprf_float32 \ +helper_compute_fprf_float64 \ +helper_efdadd \ +helper_efdcfs \ +helper_efdcfsf \ +helper_efdcfsi \ +helper_efdcfsid \ +helper_efdcfuf \ +helper_efdcfui \ +helper_efdcfuid \ +helper_efdcmpeq \ +helper_efdcmpgt \ +helper_efdcmplt \ +helper_efdctsf \ +helper_efdctsi \ +helper_efdctsidz \ +helper_efdctsiz \ +helper_efdctuf \ +helper_efdctui \ +helper_efdctuidz \ +helper_efdctuiz \ +helper_efddiv \ +helper_efdmul \ +helper_efdsub \ +helper_efdtsteq \ +helper_efdtstgt \ +helper_efdtstlt \ +helper_efsadd \ +helper_efscfd \ +helper_efscfsf \ +helper_efscfsi \ +helper_efscfuf \ +helper_efscfui \ +helper_efscmpeq \ +helper_efscmpgt \ +helper_efscmplt \ +helper_efsctsf \ +helper_efsctsi \ +helper_efsctsiz \ +helper_efsctuf \ +helper_efsctui \ +helper_efsctuiz \ +helper_efsdiv \ +helper_efsmul \ +helper_efssub \ +helper_efststeq \ +helper_efststgt \ +helper_efststlt \ +helper_evfsadd \ +helper_evfscfsf \ +helper_evfscfsi \ +helper_evfscfuf \ +helper_evfscfui \ +helper_evfscmpeq \ +helper_evfscmpgt \ +helper_evfscmplt \ +helper_evfsctsf \ +helper_evfsctsi \ +helper_evfsctsiz \ +helper_evfsctuf \ +helper_evfsctui \ +helper_evfsctuiz \ +helper_evfsdiv \ +helper_evfsmul \ +helper_evfssub \ +helper_evfststeq \ +helper_evfststgt \ +helper_evfststlt \ +helper_fcmpo \ +helper_fcmpu \ +helper_fctiwu \ +helper_fpscr_clrbit \ +helper_fpscr_setbit \ +helper_fre \ +helper_fres \ +helper_frsp \ +helper_frsqrte \ +helper_fsel \ +helper_fsqrt \ +helper_ftdiv \ +helper_ftsqrt \ +helper_todouble \ +helper_tosingle \ +helper_xsadddp \ +helper_xsaddqp \ +helper_xsaddsp \ +helper_xscmpeqdp \ +helper_xscmpexpdp \ +helper_xscmpexpqp \ +helper_xscmpgedp \ +helper_xscmpgtdp \ +helper_xscmpnedp \ +helper_xscmpodp \ +helper_xscmpoqp \ +helper_xscmpudp \ +helper_xscmpuqp \ +helper_xscvdphp \ +helper_xscvdpqp \ +helper_xscvdpsp \ +helper_xscvdpspn \ +helper_xscvdpsxds \ +helper_xscvdpsxws \ +helper_xscvdpuxds \ +helper_xscvdpuxws \ +helper_xscvhpdp \ +helper_xscvqpdp \ +helper_xscvqpsdz \ +helper_xscvqpswz \ +helper_xscvqpudz \ +helper_xscvqpuwz \ +helper_xscvsdqp \ +helper_xscvspdp \ +helper_xscvspdpn \ +helper_xscvsxddp \ +helper_xscvsxdsp \ +helper_xscvudqp \ +helper_xscvuxddp \ +helper_xscvuxdsp \ +helper_xsdivdp \ +helper_xsdivqp \ +helper_xsdivsp \ +helper_xsmadddp \ +helper_xsmaddsp \ +helper_xsmaxcdp \ +helper_xsmaxdp \ +helper_xsmaxjdp \ +helper_xsmincdp \ +helper_xsmindp \ +helper_xsminjdp \ +helper_xsmsubdp \ +helper_xsmsubsp \ +helper_xsmuldp \ +helper_xsmulqp \ +helper_xsmulsp \ +helper_xsnmadddp \ +helper_xsnmaddsp \ +helper_xsnmsubdp \ +helper_xsnmsubsp \ +helper_xsrdpi \ +helper_xsrdpic \ +helper_xsrdpim \ +helper_xsrdpip \ +helper_xsrdpiz \ +helper_xsredp \ +helper_xsresp \ +helper_xsrqpi \ +helper_xsrqpxp \ +helper_xsrsp \ +helper_xsrsqrtedp \ +helper_xsrsqrtesp \ +helper_xssqrtdp \ +helper_xssqrtqp \ +helper_xssqrtsp \ +helper_xssubdp \ +helper_xssubqp \ +helper_xssubsp \ +helper_xstdivdp \ +helper_xstsqrtdp \ +helper_xststdcdp \ +helper_xststdcqp \ +helper_xststdcsp \ +helper_xvadddp \ +helper_xvaddsp \ +helper_xvcmpeqdp \ +helper_xvcmpeqsp \ +helper_xvcmpgedp \ +helper_xvcmpgesp \ +helper_xvcmpgtdp \ +helper_xvcmpgtsp \ +helper_xvcmpnedp \ +helper_xvcmpnesp \ +helper_xvcvdpsp \ +helper_xvcvdpsxds \ +helper_xvcvdpsxws \ +helper_xvcvdpuxds \ +helper_xvcvdpuxws \ +helper_xvcvhpsp \ +helper_xvcvspdp \ +helper_xvcvsphp \ +helper_xvcvspsxds \ +helper_xvcvspsxws \ +helper_xvcvspuxds \ +helper_xvcvspuxws \ +helper_xvcvsxddp \ +helper_xvcvsxdsp \ +helper_xvcvsxwdp \ +helper_xvcvsxwsp \ +helper_xvcvuxddp \ +helper_xvcvuxdsp \ +helper_xvcvuxwdp \ +helper_xvcvuxwsp \ +helper_xvdivdp \ +helper_xvdivsp \ +helper_xvmadddp \ +helper_xvmaddsp \ +helper_xvmaxdp \ +helper_xvmaxsp \ +helper_xvmindp \ +helper_xvminsp \ +helper_xvmsubdp \ +helper_xvmsubsp \ +helper_xvmuldp \ +helper_xvmulsp \ +helper_xvnmadddp \ +helper_xvnmaddsp \ +helper_xvnmsubdp \ +helper_xvnmsubsp \ +helper_xvrdpi \ +helper_xvrdpic \ +helper_xvrdpim \ +helper_xvrdpip \ +helper_xvrdpiz \ +helper_xvredp \ +helper_xvresp \ +helper_xvrspi \ +helper_xvrspic \ +helper_xvrspim \ +helper_xvrspip \ +helper_xvrspiz \ +helper_xvrsqrtedp \ +helper_xvrsqrtesp \ +helper_xvsqrtdp \ +helper_xvsqrtsp \ +helper_xvsubdp \ +helper_xvsubsp \ +helper_xvtdivdp \ +helper_xvtdivsp \ +helper_xvtsqrtdp \ +helper_xvtsqrtsp \ +helper_xvtstdcdp \ +helper_xvtstdcsp \ +helper_xvxsigsp \ +helper_xxperm \ +helper_xxpermr \ " ppc64_SYMBOLS=${ppc_SYMBOLS} @@ -6290,26 +7763,26 @@ ARCHS="x86_64 arm aarch64 riscv32 riscv64 mips mipsel mips64 mips64el sparc spar for arch in $ARCHS; do -echo "Generating header for $arch" -echo "/* Autogen header for Unicorn Engine - DONOT MODIFY */" > $SOURCE_DIR/qemu/$arch.h -echo "#ifndef UNICORN_AUTOGEN_${arch}_H" >> $SOURCE_DIR/qemu/$arch.h -echo "#define UNICORN_AUTOGEN_${arch}_H" >> $SOURCE_DIR/qemu/$arch.h -echo "#ifndef UNICORN_ARCH_POSTFIX" >> $SOURCE_DIR/qemu/$arch.h -echo "#define UNICORN_ARCH_POSTFIX _$arch" >> $SOURCE_DIR/qemu/$arch.h -echo "#endif" >> $SOURCE_DIR/qemu/$arch.h + echo "Generating header for $arch" + echo "/* Autogen header for Unicorn Engine - DONOT MODIFY */" >$SOURCE_DIR/qemu/$arch.h + echo "#ifndef UNICORN_AUTOGEN_${arch}_H" >>$SOURCE_DIR/qemu/$arch.h + echo "#define UNICORN_AUTOGEN_${arch}_H" >>$SOURCE_DIR/qemu/$arch.h + echo "#ifndef UNICORN_ARCH_POSTFIX" >>$SOURCE_DIR/qemu/$arch.h + echo "#define UNICORN_ARCH_POSTFIX _$arch" >>$SOURCE_DIR/qemu/$arch.h + echo "#endif" >>$SOURCE_DIR/qemu/$arch.h -for loop in $COMMON_SYMBOLS; do - echo "#define $loop ${loop}_${arch}" >> $SOURCE_DIR/qemu/$arch.h -done + for loop in $COMMON_SYMBOLS; do + echo "#define $loop ${loop}_${arch}" >>$SOURCE_DIR/qemu/$arch.h + done -ARCH_SYMBOLS=$(eval echo '$'"${arch}_SYMBOLS") + ARCH_SYMBOLS=$(eval echo '$'"${arch}_SYMBOLS") -#echo ${ARCH_SYMBOLS} + #echo ${ARCH_SYMBOLS} -for loop in $ARCH_SYMBOLS; do - echo "#define $loop ${loop}_${arch}" >> $SOURCE_DIR/qemu/$arch.h -done + for loop in $ARCH_SYMBOLS; do + echo "#define $loop ${loop}_${arch}" >>$SOURCE_DIR/qemu/$arch.h + done -echo "#endif" >> $SOURCE_DIR/qemu/$arch.h + echo "#endif" >>$SOURCE_DIR/qemu/$arch.h done diff --git a/uc.c b/uc.c index 57cb6e8c76..fd29765080 100644 --- a/uc.c +++ b/uc.c @@ -11,7 +11,6 @@ #include #endif -#include // nanosleep #include #include "uc_priv.h" From b4325f6a8a047c0fdfd557155e86a073acb1aef6 Mon Sep 17 00:00:00 2001 From: Zhang Date: Sun, 6 Apr 2025 13:26:20 +0800 Subject: [PATCH 2/4] Squashed commit of the following: [qemu]It compiles! [qemu]In interpreter mode, disable allocation of _EXEC memory [qemu][uc]Define interpreter mode in build options [qemu]Copy tci from upstream qemu of corresponding version --- CMakeLists.txt | 16 + qemu/accel/tcg/translate-all.c | 6 + qemu/configure | 17 +- qemu/include/tcg/tcg.h | 1 + qemu/tcg/tcg.c | 6 + qemu/tcg/tci.c | 1275 ++++++++++++++++++++++++++++++++ qemu/tcg/tci/README | 130 ++++ qemu/tcg/tci/tcg-target.h | 213 ++++++ qemu/tcg/tci/tcg-target.inc.c | 896 ++++++++++++++++++++++ 9 files changed, 2559 insertions(+), 1 deletion(-) create mode 100644 qemu/tcg/tci.c create mode 100644 qemu/tcg/tci/README create mode 100644 qemu/tcg/tci/tcg-target.h create mode 100644 qemu/tcg/tci/tcg-target.inc.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 81c56aa5e9..78db47b688 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,6 +88,7 @@ option(UNICORN_BUILD_TESTS "Build unicorn tests" ${PROJECT_IS_TOP_LEVEL}) option(UNICORN_INSTALL "Enable unicorn installation" ${PROJECT_IS_TOP_LEVEL}) set(UNICORN_ARCH "x86;arm;aarch64;riscv;mips;sparc;m68k;ppc;s390x;tricore" CACHE STRING "Enabled unicorn architectures") option(UNICORN_TRACER "Trace unicorn execution" OFF) +option(UNICORN_INTERPRETER "Use interpreter mode" OFF) foreach(ARCH_LOOP ${UNICORN_ARCH}) string(TOUPPER "${ARCH_LOOP}" ARCH_LOOP) @@ -277,6 +278,10 @@ else() endwhile(TRUE) endif() + if(UNICORN_INTERPRETER) + set(UNICORN_TARGET_ARCH "tci") + endif() + set(EXTRA_CFLAGS "--extra-cflags=") if(UNICORN_HAS_X86) set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_X86 ") @@ -362,10 +367,17 @@ else() set(TARGET_LIST "${TARGET_LIST} ") # GEN config-host.mak & target directories + set(UNICORN_EXECUTION_MODE "") + if(UNICORN_INTERPRETER) + set(UNICORN_EXECUTION_MODE "--enable-interpreter") + else() + set(UNICORN_EXECUTION_MODE "--disable-interpreter") + endif() execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/configure --cc=${CMAKE_C_COMPILER} ${EXTRA_CFLAGS} ${TARGET_LIST} + ${UNICORN_EXECUTION_MODE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/scripts/create_config @@ -507,6 +519,10 @@ set(UNICORN_ARCH_COMMON qemu/softmmu/unicorn_vtlb.c ) +if(UNICORN_INTERPRETER) + list(APPEND UNICORN_ARCH_COMMON qemu/tcg/tci.c) +endif() + if(UNICORN_HAS_X86) add_library(x86_64-softmmu STATIC ${UNICORN_ARCH_COMMON} diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index d240f35c87..0524fefd30 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -1019,12 +1019,18 @@ void free_code_gen_buffer(struct uc_struct *uc) static inline void *alloc_code_gen_buffer(struct uc_struct *uc) { TCGContext *tcg_ctx = uc->tcg_ctx; +#if CONFIG_TCG_INTERPRETER + int prot = PROT_WRITE | PROT_READ; +#else int prot = PROT_WRITE | PROT_READ | PROT_EXEC; +#endif int flags = MAP_PRIVATE | MAP_ANONYMOUS; size_t size = tcg_ctx->code_gen_buffer_size; void *buf; +#if !CONFIG_TCG_INTERPRETER #ifdef USE_MAP_JIT flags |= MAP_JIT; +#endif #endif buf = mmap(NULL, size, prot, flags, -1, 0); if (buf == MAP_FAILED) { diff --git a/qemu/configure b/qemu/configure index 47d4a4c6ef..f52b5b9531 100755 --- a/qemu/configure +++ b/qemu/configure @@ -272,6 +272,7 @@ supported_cpu="no" supported_os="no" bogus_os="no" malloc_trim="" +interpreter="yes" # parse CC options first for opt do @@ -308,6 +309,10 @@ for opt do eval "cross_cc_${cc_arch}=\$optarg" cross_cc_vars="$cross_cc_vars cross_cc_${cc_arch}" ;; + --enable-interpreter) interpreter="yes" + ;; + --disable-interpreter) interpreter="no" + ;; esac done # OS specific @@ -689,6 +694,10 @@ for opt do ;; --disable-debug-info) ;; + --enable-interpreter) + ;; + --disable-interpreter) + ;; --cross-cc-*) ;; --cpu=*) @@ -922,6 +931,7 @@ disabled with --disable-FEATURE, default is enabled if available: jemalloc jemalloc support avx2 AVX2 optimization support avx512f AVX512F optimization support + interpreter Interpreter mode NOTE: The object files are built at the place where configure is launched EOF @@ -2137,7 +2147,7 @@ fi ########################################## # check for Apple Silicon JIT function -if [ "$darwin" = "yes" ] ; then +if [ "$darwin" = "yes" ] && [ "$interpreter" = "no" ] ; then cat > $TMPC << EOF #include int main() { pthread_jit_write_protect_np(0); return 0;} @@ -2313,6 +2323,7 @@ echo "tcmalloc support $tcmalloc" echo "jemalloc support $jemalloc" echo "avx2 optimization $avx2_opt" echo "avx512f optimization $avx512f_opt" +echo "interpreter $interpreter" if test "$supported_cpu" = "no"; then echo @@ -2557,6 +2568,10 @@ if test "$have_sprr_mrs" = "yes" ; then echo "HAVE_SPRR_MRS=y" >> $config_host_mak fi +if test "$interpreter" = "yes" ; then + echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak +fi + # Hold two types of flag: # CONFIG_THREAD_SETNAME_BYTHREAD - we've got a way of setting the name on # a thread we have a handle to diff --git a/qemu/include/tcg/tcg.h b/qemu/include/tcg/tcg.h index 966103e25d..695609df0c 100644 --- a/qemu/include/tcg/tcg.h +++ b/qemu/include/tcg/tcg.h @@ -706,6 +706,7 @@ struct TCGContext { struct jit_code_entry *one_entry; /* qemu/tcg/tcg-common.c */ TCGOpDef *tcg_op_defs; + size_t tcg_op_defs_max; // Unicorn engine variables struct uc_struct *uc; diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index 3d23487176..0befcea492 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -62,6 +62,10 @@ #include +#if CONFIG_TCG_INTERPRETER +#include "tcg/tcg.h" +#endif + /* Forward declarations for functions declared in tcg-target.inc.c and used here. */ static void tcg_target_init(TCGContext *s); @@ -666,6 +670,7 @@ static const TCGOpDef tcg_op_defs_org[] = { #include "tcg/tcg-opc.h" #undef DEF }; +static const size_t tcg_op_defs_max_org = ARRAY_SIZE(tcg_op_defs_org); static void process_op_defs(TCGContext *s); static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, @@ -734,6 +739,7 @@ void tcg_context_init(TCGContext *s) // copy original tcg_op_defs_org for private usage s->tcg_op_defs = g_malloc0(sizeof(tcg_op_defs_org)); memcpy(s->tcg_op_defs, tcg_op_defs_org, sizeof(tcg_op_defs_org)); + s->tcg_op_defs_max = tcg_op_defs_max_org; /* Count total number of arguments and allocate the corresponding space */ diff --git a/qemu/tcg/tci.c b/qemu/tcg/tci.c new file mode 100644 index 0000000000..46fe9ce63f --- /dev/null +++ b/qemu/tcg/tci.c @@ -0,0 +1,1275 @@ +/* + * Tiny Code Interpreter for QEMU + * + * Copyright (c) 2009, 2011, 2016 Stefan Weil + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "qemu/osdep.h" + +/* Enable TCI assertions only when debugging TCG (and without NDEBUG defined). + * Without assertions, the interpreter runs much faster. */ +#if defined(CONFIG_DEBUG_TCG) +# define tci_assert(cond) assert(cond) +#else +# define tci_assert(cond) ((void)0) +#endif + +#include "qemu-common.h" +#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */ +#include "exec/cpu_ldst.h" +#include "tcg/tcg-op.h" + +/* Marker for missing code. */ +#define TODO() \ + do { \ + fprintf(stderr, "TODO %s:%u: %s()\n", \ + __FILE__, __LINE__, __func__); \ + tcg_abort(); \ + } while (0) + +#if MAX_OPC_PARAM_IARGS != 6 +# error Fix needed, number of supported input arguments changed! +#endif +#if TCG_TARGET_REG_BITS == 32 +typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong); +#else +typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong); +#endif + +static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index) +{ + tci_assert(index < TCG_TARGET_NB_REGS); + return regs[index]; +} + +#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 +static int8_t tci_read_reg8s(const tcg_target_ulong *regs, TCGReg index) +{ + return (int8_t)tci_read_reg(regs, index); +} +#endif + +#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 +static int16_t tci_read_reg16s(const tcg_target_ulong *regs, TCGReg index) +{ + return (int16_t)tci_read_reg(regs, index); +} +#endif + +#if TCG_TARGET_REG_BITS == 64 +static int32_t tci_read_reg32s(const tcg_target_ulong *regs, TCGReg index) +{ + return (int32_t)tci_read_reg(regs, index); +} +#endif + +static uint8_t tci_read_reg8(const tcg_target_ulong *regs, TCGReg index) +{ + return (uint8_t)tci_read_reg(regs, index); +} + +static uint16_t tci_read_reg16(const tcg_target_ulong *regs, TCGReg index) +{ + return (uint16_t)tci_read_reg(regs, index); +} + +static uint32_t tci_read_reg32(const tcg_target_ulong *regs, TCGReg index) +{ + return (uint32_t)tci_read_reg(regs, index); +} + +#if TCG_TARGET_REG_BITS == 64 +static uint64_t tci_read_reg64(const tcg_target_ulong *regs, TCGReg index) +{ + return tci_read_reg(regs, index); +} +#endif + +static void +tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value) +{ + tci_assert(index < TCG_TARGET_NB_REGS); + tci_assert(index != TCG_AREG0); + tci_assert(index != TCG_REG_CALL_STACK); + regs[index] = value; +} + +#if TCG_TARGET_REG_BITS == 64 +static void +tci_write_reg32s(tcg_target_ulong *regs, TCGReg index, int32_t value) +{ + tci_write_reg(regs, index, value); +} +#endif + +static void tci_write_reg8(tcg_target_ulong *regs, TCGReg index, uint8_t value) +{ + tci_write_reg(regs, index, value); +} + +static void +tci_write_reg16(tcg_target_ulong *regs, TCGReg index, uint16_t value) +{ + tci_write_reg(regs, index, value); +} + +static void +tci_write_reg32(tcg_target_ulong *regs, TCGReg index, uint32_t value) +{ + tci_write_reg(regs, index, value); +} + +#if TCG_TARGET_REG_BITS == 32 +static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index, + uint32_t low_index, uint64_t value) +{ + tci_write_reg(regs, low_index, value); + tci_write_reg(regs, high_index, value >> 32); +} +#elif TCG_TARGET_REG_BITS == 64 +static void +tci_write_reg64(tcg_target_ulong *regs, TCGReg index, uint64_t value) +{ + tci_write_reg(regs, index, value); +} +#endif + +#if TCG_TARGET_REG_BITS == 32 +/* Create a 64 bit value from two 32 bit values. */ +static uint64_t tci_uint64(uint32_t high, uint32_t low) +{ + return ((uint64_t)high << 32) + low; +} +#endif + +/* Read constant (native size) from bytecode. */ +static tcg_target_ulong tci_read_i(uint8_t **tb_ptr) +{ + tcg_target_ulong value = *(tcg_target_ulong *)(*tb_ptr); + *tb_ptr += sizeof(value); + return value; +} + +/* Read unsigned constant (32 bit) from bytecode. */ +static uint32_t tci_read_i32(uint8_t **tb_ptr) +{ + uint32_t value = *(uint32_t *)(*tb_ptr); + *tb_ptr += sizeof(value); + return value; +} + +/* Read signed constant (32 bit) from bytecode. */ +static int32_t tci_read_s32(uint8_t **tb_ptr) +{ + int32_t value = *(int32_t *)(*tb_ptr); + *tb_ptr += sizeof(value); + return value; +} + +#if TCG_TARGET_REG_BITS == 64 +/* Read constant (64 bit) from bytecode. */ +static uint64_t tci_read_i64(uint8_t **tb_ptr) +{ + uint64_t value = *(uint64_t *)(*tb_ptr); + *tb_ptr += sizeof(value); + return value; +} +#endif + +/* Read indexed register (native size) from bytecode. */ +static tcg_target_ulong +tci_read_r(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + tcg_target_ulong value = tci_read_reg(regs, **tb_ptr); + *tb_ptr += 1; + return value; +} + +/* Read indexed register (8 bit) from bytecode. */ +static uint8_t tci_read_r8(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + uint8_t value = tci_read_reg8(regs, **tb_ptr); + *tb_ptr += 1; + return value; +} + +#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 +/* Read indexed register (8 bit signed) from bytecode. */ +static int8_t tci_read_r8s(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + int8_t value = tci_read_reg8s(regs, **tb_ptr); + *tb_ptr += 1; + return value; +} +#endif + +/* Read indexed register (16 bit) from bytecode. */ +static uint16_t tci_read_r16(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + uint16_t value = tci_read_reg16(regs, **tb_ptr); + *tb_ptr += 1; + return value; +} + +#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 +/* Read indexed register (16 bit signed) from bytecode. */ +static int16_t tci_read_r16s(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + int16_t value = tci_read_reg16s(regs, **tb_ptr); + *tb_ptr += 1; + return value; +} +#endif + +/* Read indexed register (32 bit) from bytecode. */ +static uint32_t tci_read_r32(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + uint32_t value = tci_read_reg32(regs, **tb_ptr); + *tb_ptr += 1; + return value; +} + +#if TCG_TARGET_REG_BITS == 32 +/* Read two indexed registers (2 * 32 bit) from bytecode. */ +static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + uint32_t low = tci_read_r32(regs, tb_ptr); + return tci_uint64(tci_read_r32(regs, tb_ptr), low); +} +#elif TCG_TARGET_REG_BITS == 64 +/* Read indexed register (32 bit signed) from bytecode. */ +static int32_t tci_read_r32s(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + int32_t value = tci_read_reg32s(regs, **tb_ptr); + *tb_ptr += 1; + return value; +} + +/* Read indexed register (64 bit) from bytecode. */ +static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + uint64_t value = tci_read_reg64(regs, **tb_ptr); + *tb_ptr += 1; + return value; +} +#endif + +/* Read indexed register(s) with target address from bytecode. */ +static target_ulong +tci_read_ulong(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + target_ulong taddr = tci_read_r(regs, tb_ptr); +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + taddr += (uint64_t)tci_read_r(regs, tb_ptr) << 32; +#endif + return taddr; +} + +/* Read indexed register or constant (native size) from bytecode. */ +static tcg_target_ulong +tci_read_ri(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + tcg_target_ulong value; + TCGReg r = **tb_ptr; + *tb_ptr += 1; + if (r == TCG_CONST) { + value = tci_read_i(tb_ptr); + } else { + value = tci_read_reg(regs, r); + } + return value; +} + +/* Read indexed register or constant (32 bit) from bytecode. */ +static uint32_t tci_read_ri32(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + uint32_t value; + TCGReg r = **tb_ptr; + *tb_ptr += 1; + if (r == TCG_CONST) { + value = tci_read_i32(tb_ptr); + } else { + value = tci_read_reg32(regs, r); + } + return value; +} + +#if TCG_TARGET_REG_BITS == 32 +/* Read two indexed registers or constants (2 * 32 bit) from bytecode. */ +static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + uint32_t low = tci_read_ri32(regs, tb_ptr); + return tci_uint64(tci_read_ri32(regs, tb_ptr), low); +} +#elif TCG_TARGET_REG_BITS == 64 +/* Read indexed register or constant (64 bit) from bytecode. */ +static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr) +{ + uint64_t value; + TCGReg r = **tb_ptr; + *tb_ptr += 1; + if (r == TCG_CONST) { + value = tci_read_i64(tb_ptr); + } else { + value = tci_read_reg64(regs, r); + } + return value; +} +#endif + +static tcg_target_ulong tci_read_label(uint8_t **tb_ptr) +{ + tcg_target_ulong label = tci_read_i(tb_ptr); + tci_assert(label != 0); + return label; +} + +static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) +{ + bool result = false; + int32_t i0 = u0; + int32_t i1 = u1; + switch (condition) { + case TCG_COND_EQ: + result = (u0 == u1); + break; + case TCG_COND_NE: + result = (u0 != u1); + break; + case TCG_COND_LT: + result = (i0 < i1); + break; + case TCG_COND_GE: + result = (i0 >= i1); + break; + case TCG_COND_LE: + result = (i0 <= i1); + break; + case TCG_COND_GT: + result = (i0 > i1); + break; + case TCG_COND_LTU: + result = (u0 < u1); + break; + case TCG_COND_GEU: + result = (u0 >= u1); + break; + case TCG_COND_LEU: + result = (u0 <= u1); + break; + case TCG_COND_GTU: + result = (u0 > u1); + break; + default: + TODO(); + } + return result; +} + +static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) +{ + bool result = false; + int64_t i0 = u0; + int64_t i1 = u1; + switch (condition) { + case TCG_COND_EQ: + result = (u0 == u1); + break; + case TCG_COND_NE: + result = (u0 != u1); + break; + case TCG_COND_LT: + result = (i0 < i1); + break; + case TCG_COND_GE: + result = (i0 >= i1); + break; + case TCG_COND_LE: + result = (i0 <= i1); + break; + case TCG_COND_GT: + result = (i0 > i1); + break; + case TCG_COND_LTU: + result = (u0 < u1); + break; + case TCG_COND_GEU: + result = (u0 >= u1); + break; + case TCG_COND_LEU: + result = (u0 <= u1); + break; + case TCG_COND_GTU: + result = (u0 > u1); + break; + default: + TODO(); + } + return result; +} + +#ifdef CONFIG_SOFTMMU +# define qemu_ld_ub \ + helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr) +# define qemu_ld_leuw \ + helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr) +# define qemu_ld_leul \ + helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr) +# define qemu_ld_leq \ + helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr) +# define qemu_ld_beuw \ + helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr) +# define qemu_ld_beul \ + helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr) +# define qemu_ld_beq \ + helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr) +# define qemu_st_b(X) \ + helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) +# define qemu_st_lew(X) \ + helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) +# define qemu_st_lel(X) \ + helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) +# define qemu_st_leq(X) \ + helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) +# define qemu_st_bew(X) \ + helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) +# define qemu_st_bel(X) \ + helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) +# define qemu_st_beq(X) \ + helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) +#else +# define qemu_ld_ub ldub_p(g2h(taddr)) +# define qemu_ld_leuw lduw_le_p(g2h(taddr)) +# define qemu_ld_leul (uint32_t)ldl_le_p(g2h(taddr)) +# define qemu_ld_leq ldq_le_p(g2h(taddr)) +# define qemu_ld_beuw lduw_be_p(g2h(taddr)) +# define qemu_ld_beul (uint32_t)ldl_be_p(g2h(taddr)) +# define qemu_ld_beq ldq_be_p(g2h(taddr)) +# define qemu_st_b(X) stb_p(g2h(taddr), X) +# define qemu_st_lew(X) stw_le_p(g2h(taddr), X) +# define qemu_st_lel(X) stl_le_p(g2h(taddr), X) +# define qemu_st_leq(X) stq_le_p(g2h(taddr), X) +# define qemu_st_bew(X) stw_be_p(g2h(taddr), X) +# define qemu_st_bel(X) stl_be_p(g2h(taddr), X) +# define qemu_st_beq(X) stq_be_p(g2h(taddr), X) +#endif + +/* Interpret pseudo code in tb. */ +uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) +{ + tcg_target_ulong regs[TCG_TARGET_NB_REGS]; + long tcg_temps[CPU_TEMP_BUF_NLONGS]; + uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS); + uintptr_t ret = 0; + + regs[TCG_AREG0] = (tcg_target_ulong)env; + regs[TCG_REG_CALL_STACK] = sp_value; + tci_assert(tb_ptr); + + for (;;) { + TCGOpcode opc = tb_ptr[0]; +#if defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG) + uint8_t op_size = tb_ptr[1]; + uint8_t *old_code_ptr = tb_ptr; +#endif + tcg_target_ulong t0; + tcg_target_ulong t1; + tcg_target_ulong t2; + tcg_target_ulong label; + TCGCond condition; + target_ulong taddr; + uint8_t tmp8; + uint16_t tmp16; + uint32_t tmp32; + uint64_t tmp64; +#if TCG_TARGET_REG_BITS == 32 + uint64_t v64; +#endif + TCGMemOpIdx oi; + +#if defined(GETPC) + tci_tb_ptr = (uintptr_t)tb_ptr; +#endif + + /* Skip opcode and size entry. */ + tb_ptr += 2; + + switch (opc) { + case INDEX_op_call: + t0 = tci_read_ri(regs, &tb_ptr); +#if TCG_TARGET_REG_BITS == 32 + tmp64 = ((helper_function)t0)(tci_read_reg(regs, TCG_REG_R0), + tci_read_reg(regs, TCG_REG_R1), + tci_read_reg(regs, TCG_REG_R2), + tci_read_reg(regs, TCG_REG_R3), + tci_read_reg(regs, TCG_REG_R5), + tci_read_reg(regs, TCG_REG_R6), + tci_read_reg(regs, TCG_REG_R7), + tci_read_reg(regs, TCG_REG_R8), + tci_read_reg(regs, TCG_REG_R9), + tci_read_reg(regs, TCG_REG_R10), + tci_read_reg(regs, TCG_REG_R11), + tci_read_reg(regs, TCG_REG_R12)); + tci_write_reg(regs, TCG_REG_R0, tmp64); + tci_write_reg(regs, TCG_REG_R1, tmp64 >> 32); +#else + tmp64 = ((helper_function)t0)(tci_read_reg(regs, TCG_REG_R0), + tci_read_reg(regs, TCG_REG_R1), + tci_read_reg(regs, TCG_REG_R2), + tci_read_reg(regs, TCG_REG_R3), + tci_read_reg(regs, TCG_REG_R5), + tci_read_reg(regs, TCG_REG_R6)); + tci_write_reg(regs, TCG_REG_R0, tmp64); +#endif + break; + case INDEX_op_br: + label = tci_read_label(&tb_ptr); + tci_assert(tb_ptr == old_code_ptr + op_size); + tb_ptr = (uint8_t *)label; + continue; + case INDEX_op_setcond_i32: + t0 = *tb_ptr++; + t1 = tci_read_r32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + condition = *tb_ptr++; + tci_write_reg32(regs, t0, tci_compare32(t1, t2, condition)); + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_setcond2_i32: + t0 = *tb_ptr++; + tmp64 = tci_read_r64(regs, &tb_ptr); + v64 = tci_read_ri64(regs, &tb_ptr); + condition = *tb_ptr++; + tci_write_reg32(regs, t0, tci_compare64(tmp64, v64, condition)); + break; +#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_setcond_i64: + t0 = *tb_ptr++; + t1 = tci_read_r64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + condition = *tb_ptr++; + tci_write_reg64(regs, t0, tci_compare64(t1, t2, condition)); + break; +#endif + case INDEX_op_mov_i32: + t0 = *tb_ptr++; + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); + break; + case INDEX_op_movi_i32: + t0 = *tb_ptr++; + t1 = tci_read_i32(&tb_ptr); + tci_write_reg32(regs, t0, t1); + break; + + /* Load/store operations (32 bit). */ + + case INDEX_op_ld8u_i32: + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_write_reg8(regs, t0, *(uint8_t *)(t1 + t2)); + break; + case INDEX_op_ld8s_i32: + TODO(); + break; + case INDEX_op_ld16u_i32: + TODO(); + break; + case INDEX_op_ld16s_i32: + TODO(); + break; + case INDEX_op_ld_i32: + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_write_reg32(regs, t0, *(uint32_t *)(t1 + t2)); + break; + case INDEX_op_st8_i32: + t0 = tci_read_r8(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + *(uint8_t *)(t1 + t2) = t0; + break; + case INDEX_op_st16_i32: + t0 = tci_read_r16(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + *(uint16_t *)(t1 + t2) = t0; + break; + case INDEX_op_st_i32: + t0 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_assert(t1 != sp_value || (int32_t)t2 < 0); + *(uint32_t *)(t1 + t2) = t0; + break; + + /* Arithmetic operations (32 bit). */ + + case INDEX_op_add_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 + t2); + break; + case INDEX_op_sub_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 - t2); + break; + case INDEX_op_mul_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 * t2); + break; +#if TCG_TARGET_HAS_div_i32 + case INDEX_op_div_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, (int32_t)t1 / (int32_t)t2); + break; + case INDEX_op_divu_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 / t2); + break; + case INDEX_op_rem_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, (int32_t)t1 % (int32_t)t2); + break; + case INDEX_op_remu_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 % t2); + break; +#elif TCG_TARGET_HAS_div2_i32 + case INDEX_op_div2_i32: + case INDEX_op_divu2_i32: + TODO(); + break; +#endif + case INDEX_op_and_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 & t2); + break; + case INDEX_op_or_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 | t2); + break; + case INDEX_op_xor_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 ^ t2); + break; + + /* Shift/rotate operations (32 bit). */ + + case INDEX_op_shl_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 << (t2 & 31)); + break; + case INDEX_op_shr_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 >> (t2 & 31)); + break; + case INDEX_op_sar_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, ((int32_t)t1 >> (t2 & 31))); + break; +#if TCG_TARGET_HAS_rot_i32 + case INDEX_op_rotl_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, rol32(t1, t2 & 31)); + break; + case INDEX_op_rotr_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, ror32(t1, t2 & 31)); + break; +#endif +#if TCG_TARGET_HAS_deposit_i32 + case INDEX_op_deposit_i32: + t0 = *tb_ptr++; + t1 = tci_read_r32(regs, &tb_ptr); + t2 = tci_read_r32(regs, &tb_ptr); + tmp16 = *tb_ptr++; + tmp8 = *tb_ptr++; + tmp32 = (((1 << tmp8) - 1) << tmp16); + tci_write_reg32(regs, t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32)); + break; +#endif + case INDEX_op_brcond_i32: + t0 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_ri32(regs, &tb_ptr); + condition = *tb_ptr++; + label = tci_read_label(&tb_ptr); + if (tci_compare32(t0, t1, condition)) { + tci_assert(tb_ptr == old_code_ptr + op_size); + tb_ptr = (uint8_t *)label; + continue; + } + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_add2_i32: + t0 = *tb_ptr++; + t1 = *tb_ptr++; + tmp64 = tci_read_r64(regs, &tb_ptr); + tmp64 += tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t1, t0, tmp64); + break; + case INDEX_op_sub2_i32: + t0 = *tb_ptr++; + t1 = *tb_ptr++; + tmp64 = tci_read_r64(regs, &tb_ptr); + tmp64 -= tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t1, t0, tmp64); + break; + case INDEX_op_brcond2_i32: + tmp64 = tci_read_r64(regs, &tb_ptr); + v64 = tci_read_ri64(regs, &tb_ptr); + condition = *tb_ptr++; + label = tci_read_label(&tb_ptr); + if (tci_compare64(tmp64, v64, condition)) { + tci_assert(tb_ptr == old_code_ptr + op_size); + tb_ptr = (uint8_t *)label; + continue; + } + break; + case INDEX_op_mulu2_i32: + t0 = *tb_ptr++; + t1 = *tb_ptr++; + t2 = tci_read_r32(regs, &tb_ptr); + tmp64 = tci_read_r32(regs, &tb_ptr); + tci_write_reg64(regs, t1, t0, t2 * tmp64); + break; +#endif /* TCG_TARGET_REG_BITS == 32 */ +#if TCG_TARGET_HAS_ext8s_i32 + case INDEX_op_ext8s_i32: + t0 = *tb_ptr++; + t1 = tci_read_r8s(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); + break; +#endif +#if TCG_TARGET_HAS_ext16s_i32 + case INDEX_op_ext16s_i32: + t0 = *tb_ptr++; + t1 = tci_read_r16s(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); + break; +#endif +#if TCG_TARGET_HAS_ext8u_i32 + case INDEX_op_ext8u_i32: + t0 = *tb_ptr++; + t1 = tci_read_r8(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); + break; +#endif +#if TCG_TARGET_HAS_ext16u_i32 + case INDEX_op_ext16u_i32: + t0 = *tb_ptr++; + t1 = tci_read_r16(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); + break; +#endif +#if TCG_TARGET_HAS_bswap16_i32 + case INDEX_op_bswap16_i32: + t0 = *tb_ptr++; + t1 = tci_read_r16(regs, &tb_ptr); + tci_write_reg32(regs, t0, bswap16(t1)); + break; +#endif +#if TCG_TARGET_HAS_bswap32_i32 + case INDEX_op_bswap32_i32: + t0 = *tb_ptr++; + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg32(regs, t0, bswap32(t1)); + break; +#endif +#if TCG_TARGET_HAS_not_i32 + case INDEX_op_not_i32: + t0 = *tb_ptr++; + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg32(regs, t0, ~t1); + break; +#endif +#if TCG_TARGET_HAS_neg_i32 + case INDEX_op_neg_i32: + t0 = *tb_ptr++; + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg32(regs, t0, -t1); + break; +#endif +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_mov_i64: + t0 = *tb_ptr++; + t1 = tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); + break; + case INDEX_op_movi_i64: + t0 = *tb_ptr++; + t1 = tci_read_i64(&tb_ptr); + tci_write_reg64(regs, t0, t1); + break; + + /* Load/store operations (64 bit). */ + + case INDEX_op_ld8u_i64: + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_write_reg8(regs, t0, *(uint8_t *)(t1 + t2)); + break; + case INDEX_op_ld8s_i64: + TODO(); + break; + case INDEX_op_ld16u_i64: + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_write_reg16(regs, t0, *(uint16_t *)(t1 + t2)); + break; + case INDEX_op_ld16s_i64: + TODO(); + break; + case INDEX_op_ld32u_i64: + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_write_reg32(regs, t0, *(uint32_t *)(t1 + t2)); + break; + case INDEX_op_ld32s_i64: + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_write_reg32s(regs, t0, *(int32_t *)(t1 + t2)); + break; + case INDEX_op_ld_i64: + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_write_reg64(regs, t0, *(uint64_t *)(t1 + t2)); + break; + case INDEX_op_st8_i64: + t0 = tci_read_r8(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + *(uint8_t *)(t1 + t2) = t0; + break; + case INDEX_op_st16_i64: + t0 = tci_read_r16(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + *(uint16_t *)(t1 + t2) = t0; + break; + case INDEX_op_st32_i64: + t0 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + *(uint32_t *)(t1 + t2) = t0; + break; + case INDEX_op_st_i64: + t0 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_s32(&tb_ptr); + tci_assert(t1 != sp_value || (int32_t)t2 < 0); + *(uint64_t *)(t1 + t2) = t0; + break; + + /* Arithmetic operations (64 bit). */ + + case INDEX_op_add_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 + t2); + break; + case INDEX_op_sub_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 - t2); + break; + case INDEX_op_mul_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 * t2); + break; +#if TCG_TARGET_HAS_div_i64 + case INDEX_op_div_i64: + case INDEX_op_divu_i64: + case INDEX_op_rem_i64: + case INDEX_op_remu_i64: + TODO(); + break; +#elif TCG_TARGET_HAS_div2_i64 + case INDEX_op_div2_i64: + case INDEX_op_divu2_i64: + TODO(); + break; +#endif + case INDEX_op_and_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 & t2); + break; + case INDEX_op_or_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 | t2); + break; + case INDEX_op_xor_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 ^ t2); + break; + + /* Shift/rotate operations (64 bit). */ + + case INDEX_op_shl_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 << (t2 & 63)); + break; + case INDEX_op_shr_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 >> (t2 & 63)); + break; + case INDEX_op_sar_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, ((int64_t)t1 >> (t2 & 63))); + break; +#if TCG_TARGET_HAS_rot_i64 + case INDEX_op_rotl_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, rol64(t1, t2 & 63)); + break; + case INDEX_op_rotr_i64: + t0 = *tb_ptr++; + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, ror64(t1, t2 & 63)); + break; +#endif +#if TCG_TARGET_HAS_deposit_i64 + case INDEX_op_deposit_i64: + t0 = *tb_ptr++; + t1 = tci_read_r64(regs, &tb_ptr); + t2 = tci_read_r64(regs, &tb_ptr); + tmp16 = *tb_ptr++; + tmp8 = *tb_ptr++; + tmp64 = (((1ULL << tmp8) - 1) << tmp16); + tci_write_reg64(regs, t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64)); + break; +#endif + case INDEX_op_brcond_i64: + t0 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_ri64(regs, &tb_ptr); + condition = *tb_ptr++; + label = tci_read_label(&tb_ptr); + if (tci_compare64(t0, t1, condition)) { + tci_assert(tb_ptr == old_code_ptr + op_size); + tb_ptr = (uint8_t *)label; + continue; + } + break; +#if TCG_TARGET_HAS_ext8u_i64 + case INDEX_op_ext8u_i64: + t0 = *tb_ptr++; + t1 = tci_read_r8(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); + break; +#endif +#if TCG_TARGET_HAS_ext8s_i64 + case INDEX_op_ext8s_i64: + t0 = *tb_ptr++; + t1 = tci_read_r8s(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); + break; +#endif +#if TCG_TARGET_HAS_ext16s_i64 + case INDEX_op_ext16s_i64: + t0 = *tb_ptr++; + t1 = tci_read_r16s(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); + break; +#endif +#if TCG_TARGET_HAS_ext16u_i64 + case INDEX_op_ext16u_i64: + t0 = *tb_ptr++; + t1 = tci_read_r16(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); + break; +#endif +#if TCG_TARGET_HAS_ext32s_i64 + case INDEX_op_ext32s_i64: +#endif + case INDEX_op_ext_i32_i64: + t0 = *tb_ptr++; + t1 = tci_read_r32s(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); + break; +#if TCG_TARGET_HAS_ext32u_i64 + case INDEX_op_ext32u_i64: +#endif + case INDEX_op_extu_i32_i64: + t0 = *tb_ptr++; + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); + break; +#if TCG_TARGET_HAS_bswap16_i64 + case INDEX_op_bswap16_i64: + t0 = *tb_ptr++; + t1 = tci_read_r16(regs, &tb_ptr); + tci_write_reg64(regs, t0, bswap16(t1)); + break; +#endif +#if TCG_TARGET_HAS_bswap32_i64 + case INDEX_op_bswap32_i64: + t0 = *tb_ptr++; + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg64(regs, t0, bswap32(t1)); + break; +#endif +#if TCG_TARGET_HAS_bswap64_i64 + case INDEX_op_bswap64_i64: + t0 = *tb_ptr++; + t1 = tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t0, bswap64(t1)); + break; +#endif +#if TCG_TARGET_HAS_not_i64 + case INDEX_op_not_i64: + t0 = *tb_ptr++; + t1 = tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t0, ~t1); + break; +#endif +#if TCG_TARGET_HAS_neg_i64 + case INDEX_op_neg_i64: + t0 = *tb_ptr++; + t1 = tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t0, -t1); + break; +#endif +#endif /* TCG_TARGET_REG_BITS == 64 */ + + /* QEMU specific operations. */ + + case INDEX_op_exit_tb: + ret = *(uint64_t *)tb_ptr; + goto exit; + break; + case INDEX_op_goto_tb: + /* Jump address is aligned */ + tb_ptr = QEMU_ALIGN_PTR_UP(tb_ptr, 4); + t0 = atomic_read((int32_t *)tb_ptr); + tb_ptr += sizeof(int32_t); + tci_assert(tb_ptr == old_code_ptr + op_size); + tb_ptr += (int32_t)t0; + continue; + case INDEX_op_qemu_ld_i32: + t0 = *tb_ptr++; + taddr = tci_read_ulong(regs, &tb_ptr); + oi = tci_read_i(&tb_ptr); + switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { + case MO_UB: + tmp32 = qemu_ld_ub; + break; + case MO_SB: + tmp32 = (int8_t)qemu_ld_ub; + break; + case MO_LEUW: + tmp32 = qemu_ld_leuw; + break; + case MO_LESW: + tmp32 = (int16_t)qemu_ld_leuw; + break; + case MO_LEUL: + tmp32 = qemu_ld_leul; + break; + case MO_BEUW: + tmp32 = qemu_ld_beuw; + break; + case MO_BESW: + tmp32 = (int16_t)qemu_ld_beuw; + break; + case MO_BEUL: + tmp32 = qemu_ld_beul; + break; + default: + tcg_abort(); + } + tci_write_reg(regs, t0, tmp32); + break; + case INDEX_op_qemu_ld_i64: + t0 = *tb_ptr++; + if (TCG_TARGET_REG_BITS == 32) { + t1 = *tb_ptr++; + } + taddr = tci_read_ulong(regs, &tb_ptr); + oi = tci_read_i(&tb_ptr); + switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { + case MO_UB: + tmp64 = qemu_ld_ub; + break; + case MO_SB: + tmp64 = (int8_t)qemu_ld_ub; + break; + case MO_LEUW: + tmp64 = qemu_ld_leuw; + break; + case MO_LESW: + tmp64 = (int16_t)qemu_ld_leuw; + break; + case MO_LEUL: + tmp64 = qemu_ld_leul; + break; + case MO_LESL: + tmp64 = (int32_t)qemu_ld_leul; + break; + case MO_LEQ: + tmp64 = qemu_ld_leq; + break; + case MO_BEUW: + tmp64 = qemu_ld_beuw; + break; + case MO_BESW: + tmp64 = (int16_t)qemu_ld_beuw; + break; + case MO_BEUL: + tmp64 = qemu_ld_beul; + break; + case MO_BESL: + tmp64 = (int32_t)qemu_ld_beul; + break; + case MO_BEQ: + tmp64 = qemu_ld_beq; + break; + default: + tcg_abort(); + } + tci_write_reg(regs, t0, tmp64); + if (TCG_TARGET_REG_BITS == 32) { + tci_write_reg(regs, t1, tmp64 >> 32); + } + break; + case INDEX_op_qemu_st_i32: + t0 = tci_read_r(regs, &tb_ptr); + taddr = tci_read_ulong(regs, &tb_ptr); + oi = tci_read_i(&tb_ptr); + switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { + case MO_UB: + qemu_st_b(t0); + break; + case MO_LEUW: + qemu_st_lew(t0); + break; + case MO_LEUL: + qemu_st_lel(t0); + break; + case MO_BEUW: + qemu_st_bew(t0); + break; + case MO_BEUL: + qemu_st_bel(t0); + break; + default: + tcg_abort(); + } + break; + case INDEX_op_qemu_st_i64: + tmp64 = tci_read_r64(regs, &tb_ptr); + taddr = tci_read_ulong(regs, &tb_ptr); + oi = tci_read_i(&tb_ptr); + switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { + case MO_UB: + qemu_st_b(tmp64); + break; + case MO_LEUW: + qemu_st_lew(tmp64); + break; + case MO_LEUL: + qemu_st_lel(tmp64); + break; + case MO_LEQ: + qemu_st_leq(tmp64); + break; + case MO_BEUW: + qemu_st_bew(tmp64); + break; + case MO_BEUL: + qemu_st_bel(tmp64); + break; + case MO_BEQ: + qemu_st_beq(tmp64); + break; + default: + tcg_abort(); + } + break; + case INDEX_op_mb: + /* Ensure ordering for all kinds */ + smp_mb(); + break; + default: + TODO(); + break; + } + tci_assert(tb_ptr == old_code_ptr + op_size); + } +exit: + return ret; +} diff --git a/qemu/tcg/tci/README b/qemu/tcg/tci/README new file mode 100644 index 0000000000..386c3c7507 --- /dev/null +++ b/qemu/tcg/tci/README @@ -0,0 +1,130 @@ +TCG Interpreter (TCI) - Copyright (c) 2011 Stefan Weil. + +This file is released under the BSD license. + +1) Introduction + +TCG (Tiny Code Generator) is a code generator which translates +code fragments ("basic blocks") from target code (any of the +targets supported by QEMU) to a code representation which +can be run on a host. + +QEMU can create native code for some hosts (arm, i386, ia64, ppc, ppc64, +s390, sparc, x86_64). For others, unofficial host support was written. + +By adding a code generator for a virtual machine and using an +interpreter for the generated bytecode, it is possible to +support (almost) any host. + +This is what TCI (Tiny Code Interpreter) does. + +2) Implementation + +Like each TCG host frontend, TCI implements the code generator in +tcg-target.inc.c, tcg-target.h. Both files are in directory tcg/tci. + +The additional file tcg/tci.c adds the interpreter. + +The bytecode consists of opcodes (same numeric values as those used by +TCG), command length and arguments of variable size and number. + +3) Usage + +For hosts without native TCG, the interpreter TCI must be enabled by + + configure --enable-tcg-interpreter + +If configure is called without --enable-tcg-interpreter, it will +suggest using this option. Setting it automatically would need +additional code in configure which must be fixed when new native TCG +implementations are added. + +System emulation should work on any 32 or 64 bit host. +User mode emulation might work. Maybe a new linker script (*.ld) +is needed. Byte order might be wrong (on big endian hosts) +and need fixes in configure. + +For hosts with native TCG, the interpreter TCI can be enabled by + + configure --enable-tcg-interpreter + +The only difference from running QEMU with TCI to running without TCI +should be speed. Especially during development of TCI, it was very +useful to compare runs with and without TCI. Create /tmp/qemu.log by + + qemu-system-i386 -d in_asm,op_opt,cpu -D /tmp/qemu.log -singlestep + +once with interpreter and once without interpreter and compare the resulting +qemu.log files. This is also useful to see the effects of additional +registers or additional opcodes (it is easy to modify the virtual machine). +It can also be used to verify native TCGs. + +Hosts with native TCG can also enable TCI by claiming to be unsupported: + + configure --cpu=unknown --enable-tcg-interpreter + +configure then no longer uses the native linker script (*.ld) for +user mode emulation. + + +4) Status + +TCI needs special implementation for 32 and 64 bit host, 32 and 64 bit target, +host and target with same or different endianness. + + | host (le) host (be) + | 32 64 32 64 +------------+------------------------------------------------------------ +target (le) | s0, u0 s1, u1 s?, u? s?, u? +32 bit | + | +target (le) | sc, uc s1, u1 s?, u? s?, u? +64 bit | + | +target (be) | sc, u0 sc, uc s?, u? s?, u? +32 bit | + | +target (be) | sc, uc sc, uc s?, u? s?, u? +64 bit | + | + +System emulation +s? = untested +sc = compiles +s0 = bios works +s1 = grub works +s2 = Linux boots + +Linux user mode emulation +u? = untested +uc = compiles +u0 = static hello works +u1 = linux-user-test works + +5) Todo list + +* TCI is not widely tested. It was written and tested on a x86_64 host + running i386 and x86_64 system emulation and Linux user mode. + A cross compiled QEMU for i386 host also works with the same basic tests. + A cross compiled QEMU for mipsel host works, too. It is terribly slow + because I run it in a mips malta emulation, so it is an interpreted + emulation in an emulation. + A cross compiled QEMU for arm host works (tested with pc bios). + A cross compiled QEMU for ppc host works at least partially: + i386-linux-user/qemu-i386 can run a simple hello-world program + (tested in a ppc emulation). + +* Some TCG opcodes are either missing in the code generator and/or + in the interpreter. These opcodes raise a runtime exception, so it is + possible to see where code must be added. + +* The pseudo code is not optimized and still ugly. For hosts with special + alignment requirements, it needs some fixes (maybe aligned bytecode + would also improve speed for hosts which support byte alignment). + +* A better disassembler for the pseudo code would be nice (a very primitive + disassembler is included in tcg-target.inc.c). + +* It might be useful to have a runtime option which selects the native TCG + or TCI, so QEMU would have to include two TCGs. Today, selecting TCI + is a configure option, so you need two compilations of QEMU. diff --git a/qemu/tcg/tci/tcg-target.h b/qemu/tcg/tci/tcg-target.h new file mode 100644 index 0000000000..8b90ab71cb --- /dev/null +++ b/qemu/tcg/tci/tcg-target.h @@ -0,0 +1,213 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * This code implements a TCG which does not generate machine code for some + * real target machine but which generates virtual machine code for an + * interpreter. Interpreted pseudo code is slow, but it works on any host. + * + * Some remarks might help in understanding the code: + * + * "target" or "TCG target" is the machine which runs the generated code. + * This is different to the usual meaning in QEMU where "target" is the + * emulated machine. So normally QEMU host is identical to TCG target. + * Here the TCG target is a virtual machine, but this virtual machine must + * use the same word size like the real machine. + * Therefore, we need both 32 and 64 bit virtual machines (interpreter). + */ + +#ifndef TCG_TARGET_H +#define TCG_TARGET_H + +#define TCG_TARGET_INTERPRETER 1 +#define TCG_TARGET_INSN_UNIT_SIZE 1 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 + +#if UINTPTR_MAX == UINT32_MAX +# define TCG_TARGET_REG_BITS 32 +#elif UINTPTR_MAX == UINT64_MAX +# define TCG_TARGET_REG_BITS 64 +#else +# error Unknown pointer size for tci target +#endif + +#ifdef CONFIG_DEBUG_TCG +/* Enable debug output. */ +#define CONFIG_DEBUG_TCG_INTERPRETER +#endif + +/* Optional instructions. */ + +#define TCG_TARGET_HAS_bswap16_i32 1 +#define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_div_i32 1 +#define TCG_TARGET_HAS_rem_i32 1 +#define TCG_TARGET_HAS_ext8s_i32 1 +#define TCG_TARGET_HAS_ext16s_i32 1 +#define TCG_TARGET_HAS_ext8u_i32 1 +#define TCG_TARGET_HAS_ext16u_i32 1 +#define TCG_TARGET_HAS_andc_i32 0 +#define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_extract_i32 0 +#define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 +#define TCG_TARGET_HAS_eqv_i32 0 +#define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_neg_i32 1 +#define TCG_TARGET_HAS_not_i32 1 +#define TCG_TARGET_HAS_orc_i32 0 +#define TCG_TARGET_HAS_rot_i32 1 +#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_direct_jump 1 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_bswap16_i64 1 +#define TCG_TARGET_HAS_bswap32_i64 1 +#define TCG_TARGET_HAS_bswap64_i64 1 +#define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_extract_i64 0 +#define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 +#define TCG_TARGET_HAS_div_i64 0 +#define TCG_TARGET_HAS_rem_i64 0 +#define TCG_TARGET_HAS_ext8s_i64 1 +#define TCG_TARGET_HAS_ext16s_i64 1 +#define TCG_TARGET_HAS_ext32s_i64 1 +#define TCG_TARGET_HAS_ext8u_i64 1 +#define TCG_TARGET_HAS_ext16u_i64 1 +#define TCG_TARGET_HAS_ext32u_i64 1 +#define TCG_TARGET_HAS_andc_i64 0 +#define TCG_TARGET_HAS_eqv_i64 0 +#define TCG_TARGET_HAS_nand_i64 0 +#define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 +#define TCG_TARGET_HAS_neg_i64 1 +#define TCG_TARGET_HAS_not_i64 1 +#define TCG_TARGET_HAS_orc_i64 0 +#define TCG_TARGET_HAS_rot_i64 1 +#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 +#else +#define TCG_TARGET_HAS_mulu2_i32 1 +#endif /* TCG_TARGET_REG_BITS == 64 */ + +/* Number of registers available. + For 32 bit hosts, we need more than 8 registers (call arguments). */ +/* #define TCG_TARGET_NB_REGS 8 */ +#define TCG_TARGET_NB_REGS 16 +/* #define TCG_TARGET_NB_REGS 32 */ + +/* List of registers which are used by TCG. */ +typedef enum { + TCG_REG_R0 = 0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, +#if TCG_TARGET_NB_REGS >= 32 + TCG_REG_R16, + TCG_REG_R17, + TCG_REG_R18, + TCG_REG_R19, + TCG_REG_R20, + TCG_REG_R21, + TCG_REG_R22, + TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, + TCG_REG_R28, + TCG_REG_R29, + TCG_REG_R30, + TCG_REG_R31, +#endif +#endif + /* Special value UINT8_MAX is used by TCI to encode constant values. */ + TCG_CONST = UINT8_MAX +} TCGReg; + +#define TCG_AREG0 (TCG_TARGET_NB_REGS - 2) + +/* Used for function call generation. */ +#define TCG_REG_CALL_STACK (TCG_TARGET_NB_REGS - 1) +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_STACK_ALIGN 16 + +void tci_disas(uint8_t opc); + +#define HAVE_TCG_QEMU_TB_EXEC + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +} + +/* We could notice __i386__ or __s390x__ and reduce the barriers depending + on the host. But if you want performance, you use the normal backend. + We prefer consistency across hosts on this. */ +#define TCG_TARGET_DEFAULT_MO (0) + +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 + +static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, + uintptr_t jmp_addr, uintptr_t addr) +{ + /* patch the branch destination */ + atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4)); + /* no need to flush icache explicitly */ +} + +#endif /* TCG_TARGET_H */ diff --git a/qemu/tcg/tci/tcg-target.inc.c b/qemu/tcg/tci/tcg-target.inc.c new file mode 100644 index 0000000000..ab3114532f --- /dev/null +++ b/qemu/tcg/tci/tcg-target.inc.c @@ -0,0 +1,896 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* TODO list: + * - See TODO comments in code. + */ + +/* Marker for missing code. */ +#define TODO() \ + do { \ + fprintf(stderr, "TODO %s:%u: %s()\n", \ + __FILE__, __LINE__, __func__); \ + tcg_abort(); \ + } while (0) + +/* Bitfield n...m (in 32 bit value). */ +#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) + +/* Macros used in tcg_target_op_defs. */ +#define R "r" +#define RI "ri" +#if TCG_TARGET_REG_BITS == 32 +# define R64 "r", "r" +#else +# define R64 "r" +#endif +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +# define L "L", "L" +# define S "S", "S" +#else +# define L "L" +# define S "S" +#endif + +/* TODO: documentation. */ +static const TCGTargetOpDef tcg_target_op_defs[] = { + { INDEX_op_exit_tb, { NULL } }, + { INDEX_op_goto_tb, { NULL } }, + { INDEX_op_br, { NULL } }, + + { INDEX_op_ld8u_i32, { R, R } }, + { INDEX_op_ld8s_i32, { R, R } }, + { INDEX_op_ld16u_i32, { R, R } }, + { INDEX_op_ld16s_i32, { R, R } }, + { INDEX_op_ld_i32, { R, R } }, + { INDEX_op_st8_i32, { R, R } }, + { INDEX_op_st16_i32, { R, R } }, + { INDEX_op_st_i32, { R, R } }, + + { INDEX_op_add_i32, { R, RI, RI } }, + { INDEX_op_sub_i32, { R, RI, RI } }, + { INDEX_op_mul_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_div_i32 + { INDEX_op_div_i32, { R, R, R } }, + { INDEX_op_divu_i32, { R, R, R } }, + { INDEX_op_rem_i32, { R, R, R } }, + { INDEX_op_remu_i32, { R, R, R } }, +#elif TCG_TARGET_HAS_div2_i32 + { INDEX_op_div2_i32, { R, R, "0", "1", R } }, + { INDEX_op_divu2_i32, { R, R, "0", "1", R } }, +#endif + /* TODO: Does R, RI, RI result in faster code than R, R, RI? + If both operands are constants, we can optimize. */ + { INDEX_op_and_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_andc_i32 + { INDEX_op_andc_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_eqv_i32 + { INDEX_op_eqv_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nand_i32 + { INDEX_op_nand_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nor_i32 + { INDEX_op_nor_i32, { R, RI, RI } }, +#endif + { INDEX_op_or_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_orc_i32 + { INDEX_op_orc_i32, { R, RI, RI } }, +#endif + { INDEX_op_xor_i32, { R, RI, RI } }, + { INDEX_op_shl_i32, { R, RI, RI } }, + { INDEX_op_shr_i32, { R, RI, RI } }, + { INDEX_op_sar_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_rot_i32 + { INDEX_op_rotl_i32, { R, RI, RI } }, + { INDEX_op_rotr_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_deposit_i32 + { INDEX_op_deposit_i32, { R, "0", R } }, +#endif + + { INDEX_op_brcond_i32, { R, RI } }, + + { INDEX_op_setcond_i32, { R, R, RI } }, +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_setcond_i64, { R, R, RI } }, +#endif /* TCG_TARGET_REG_BITS == 64 */ + +#if TCG_TARGET_REG_BITS == 32 + /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */ + { INDEX_op_add2_i32, { R, R, R, R, R, R } }, + { INDEX_op_sub2_i32, { R, R, R, R, R, R } }, + { INDEX_op_brcond2_i32, { R, R, RI, RI } }, + { INDEX_op_mulu2_i32, { R, R, R, R } }, + { INDEX_op_setcond2_i32, { R, R, R, RI, RI } }, +#endif + +#if TCG_TARGET_HAS_not_i32 + { INDEX_op_not_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_neg_i32 + { INDEX_op_neg_i32, { R, R } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_ld8u_i64, { R, R } }, + { INDEX_op_ld8s_i64, { R, R } }, + { INDEX_op_ld16u_i64, { R, R } }, + { INDEX_op_ld16s_i64, { R, R } }, + { INDEX_op_ld32u_i64, { R, R } }, + { INDEX_op_ld32s_i64, { R, R } }, + { INDEX_op_ld_i64, { R, R } }, + + { INDEX_op_st8_i64, { R, R } }, + { INDEX_op_st16_i64, { R, R } }, + { INDEX_op_st32_i64, { R, R } }, + { INDEX_op_st_i64, { R, R } }, + + { INDEX_op_add_i64, { R, RI, RI } }, + { INDEX_op_sub_i64, { R, RI, RI } }, + { INDEX_op_mul_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_div_i64 + { INDEX_op_div_i64, { R, R, R } }, + { INDEX_op_divu_i64, { R, R, R } }, + { INDEX_op_rem_i64, { R, R, R } }, + { INDEX_op_remu_i64, { R, R, R } }, +#elif TCG_TARGET_HAS_div2_i64 + { INDEX_op_div2_i64, { R, R, "0", "1", R } }, + { INDEX_op_divu2_i64, { R, R, "0", "1", R } }, +#endif + { INDEX_op_and_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_andc_i64 + { INDEX_op_andc_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_eqv_i64 + { INDEX_op_eqv_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nand_i64 + { INDEX_op_nand_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nor_i64 + { INDEX_op_nor_i64, { R, RI, RI } }, +#endif + { INDEX_op_or_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_orc_i64 + { INDEX_op_orc_i64, { R, RI, RI } }, +#endif + { INDEX_op_xor_i64, { R, RI, RI } }, + { INDEX_op_shl_i64, { R, RI, RI } }, + { INDEX_op_shr_i64, { R, RI, RI } }, + { INDEX_op_sar_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_rot_i64 + { INDEX_op_rotl_i64, { R, RI, RI } }, + { INDEX_op_rotr_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_deposit_i64 + { INDEX_op_deposit_i64, { R, "0", R } }, +#endif + { INDEX_op_brcond_i64, { R, RI } }, + +#if TCG_TARGET_HAS_ext8s_i64 + { INDEX_op_ext8s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16s_i64 + { INDEX_op_ext16s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext32s_i64 + { INDEX_op_ext32s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext8u_i64 + { INDEX_op_ext8u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16u_i64 + { INDEX_op_ext16u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext32u_i64 + { INDEX_op_ext32u_i64, { R, R } }, +#endif + { INDEX_op_ext_i32_i64, { R, R } }, + { INDEX_op_extu_i32_i64, { R, R } }, +#if TCG_TARGET_HAS_bswap16_i64 + { INDEX_op_bswap16_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap32_i64 + { INDEX_op_bswap32_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap64_i64 + { INDEX_op_bswap64_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_not_i64 + { INDEX_op_not_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_neg_i64 + { INDEX_op_neg_i64, { R, R } }, +#endif +#endif /* TCG_TARGET_REG_BITS == 64 */ + + { INDEX_op_qemu_ld_i32, { R, L } }, + { INDEX_op_qemu_ld_i64, { R64, L } }, + + { INDEX_op_qemu_st_i32, { R, S } }, + { INDEX_op_qemu_st_i64, { R64, S } }, + +#if TCG_TARGET_HAS_ext8s_i32 + { INDEX_op_ext8s_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16s_i32 + { INDEX_op_ext16s_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext8u_i32 + { INDEX_op_ext8u_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16u_i32 + { INDEX_op_ext16u_i32, { R, R } }, +#endif + +#if TCG_TARGET_HAS_bswap16_i32 + { INDEX_op_bswap16_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap32_i32 + { INDEX_op_bswap32_i32, { R, R } }, +#endif + + { INDEX_op_mb, { } }, + { -1 }, +}; + +static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) +{ + int i, n = ARRAY_SIZE(tcg_target_op_defs); + + for (i = 0; i < n; ++i) { + if (tcg_target_op_defs[i].op == op) { + return &tcg_target_op_defs[i]; + } + } + return NULL; +} + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, +#if 0 /* used for TCG_REG_CALL_STACK */ + TCG_REG_R4, +#endif + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, +#endif +}; + +#if MAX_OPC_PARAM_IARGS != 6 +# error Fix needed, number of supported input arguments changed! +#endif + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, +#if 0 /* used for TCG_REG_CALL_STACK */ + TCG_REG_R4, +#endif + TCG_REG_R5, + TCG_REG_R6, +#if TCG_TARGET_REG_BITS == 32 + /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */ + TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, +#else +# error Too few input registers available +#endif +#endif +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R0, +#if TCG_TARGET_REG_BITS == 32 + TCG_REG_R1 +#endif +}; + +#ifdef CONFIG_DEBUG_TCG +static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r00", + "r01", + "r02", + "r03", + "r04", + "r05", + "r06", + "r07", +#if TCG_TARGET_NB_REGS >= 16 + "r08", + "r09", + "r10", + "r11", + "r12", + "r13", + "r14", + "r15", +#if TCG_TARGET_NB_REGS >= 32 + "r16", + "r17", + "r18", + "r19", + "r20", + "r21", + "r22", + "r23", + "r24", + "r25", + "r26", + "r27", + "r28", + "r29", + "r30", + "r31" +#endif +#endif +}; +#endif + +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + /* tcg_out_reloc always uses the same type, addend. */ + tcg_debug_assert(type == sizeof(tcg_target_long)); + tcg_debug_assert(addend == 0); + tcg_debug_assert(value != 0); + if (TCG_TARGET_REG_BITS == 32) { + tcg_patch32(code_ptr, value); + } else { + tcg_patch64(code_ptr, value); + } + return true; +} + +/* Parse target specific constraints. */ +static const char *target_parse_constraint(TCGArgConstraint *ct, + const char *ct_str, TCGType type) +{ + switch (*ct_str++) { + case 'r': + case 'L': /* qemu_ld constraint */ + case 'S': /* qemu_st constraint */ + ct->ct |= TCG_CT_REG; + ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1; + break; + default: + return NULL; + } + return ct_str; +} + +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) +/* Show current bytecode. Used by tcg interpreter. */ +void tci_disas(uint8_t opc) +{ + const TCGOpDef *def = &tcg_op_defs[opc]; + fprintf(stderr, "TCG %s %u, %u, %u\n", + def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs); +} +#endif + +/* Write value (native size). */ +static void tcg_out_i(TCGContext *s, tcg_target_ulong v) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out32(s, v); + } else { + tcg_out64(s, v); + } +} + +/* Write opcode. */ +static void tcg_out_op_t(TCGContext *s, TCGOpcode op) +{ + tcg_out8(s, op); + tcg_out8(s, 0); +} + +/* Write register. */ +static void tcg_out_r(TCGContext *s, TCGArg t0) +{ + tcg_debug_assert(t0 < TCG_TARGET_NB_REGS); + tcg_out8(s, t0); +} + +/* Write register or constant (native size). */ +static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + tcg_debug_assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out_i(s, arg); + } else { + tcg_out_r(s, arg); + } +} + +/* Write register or constant (32 bit). */ +static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + tcg_debug_assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out32(s, arg); + } else { + tcg_out_r(s, arg); + } +} + +#if TCG_TARGET_REG_BITS == 64 +/* Write register or constant (64 bit). */ +static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + tcg_debug_assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out64(s, arg); + } else { + tcg_out_r(s, arg); + } +} +#endif + +/* Write label. */ +static void tci_out_label(TCGContext *s, TCGLabel *label) +{ + if (label->has_value) { + tcg_out_i(s, label->u.value); + tcg_debug_assert(label->u.value); + } else { + tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); + s->code_ptr += sizeof(tcg_target_ulong); + } +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, + intptr_t arg2) +{ + uint8_t *old_code_ptr = s->code_ptr; + if (type == TCG_TYPE_I32) { + tcg_out_op_t(s, INDEX_op_ld_i32); + tcg_out_r(s, ret); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); + } else { + tcg_debug_assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_ld_i64); + tcg_out_r(s, ret); + tcg_out_r(s, arg1); + tcg_debug_assert(arg2 == (int32_t)arg2); + tcg_out32(s, arg2); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + tcg_debug_assert(ret != arg); +#if TCG_TARGET_REG_BITS == 32 + tcg_out_op_t(s, INDEX_op_mov_i32); +#else + tcg_out_op_t(s, INDEX_op_mov_i64); +#endif + tcg_out_r(s, ret); + tcg_out_r(s, arg); + old_code_ptr[1] = s->code_ptr - old_code_ptr; + return true; +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg t0, tcg_target_long arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + uint32_t arg32 = arg; + if (type == TCG_TYPE_I32 || arg == arg32) { + tcg_out_op_t(s, INDEX_op_movi_i32); + tcg_out_r(s, t0); + tcg_out32(s, arg32); + } else { + tcg_debug_assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_movi_i64); + tcg_out_r(s, t0); + tcg_out64(s, arg); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + tcg_out_op_t(s, INDEX_op_call); + tcg_out_ri(s, 1, (uintptr_t)arg); + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args) +{ + uint8_t *old_code_ptr = s->code_ptr; + + tcg_out_op_t(s, opc); + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out64(s, args[0]); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_insn_offset) { + /* Direct jump method. */ + /* Align for atomic patching and thread safety */ + s->code_ptr = QEMU_ALIGN_PTR_UP(s->code_ptr, 4); + s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); + tcg_out32(s, 0); + } else { + /* Indirect jump method. */ + TODO(); + } + set_jmp_reset_offset(s, args[0]); + break; + case INDEX_op_br: + tci_out_label(s, arg_label(args[0])); + break; + case INDEX_op_setcond_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + tcg_out8(s, args[3]); /* condition */ + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_setcond2_i32: + /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_ri32(s, const_args[3], args[3]); + tcg_out_ri32(s, const_args[4], args[4]); + tcg_out8(s, args[5]); /* condition */ + break; +#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_setcond_i64: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri64(s, const_args[2], args[2]); + tcg_out8(s, args[3]); /* condition */ + break; +#endif + case INDEX_op_ld8u_i32: + case INDEX_op_ld8s_i32: + case INDEX_op_ld16u_i32: + case INDEX_op_ld16s_i32: + case INDEX_op_ld_i32: + case INDEX_op_st8_i32: + case INDEX_op_st16_i32: + case INDEX_op_st_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld8s_i64: + case INDEX_op_ld16u_i64: + case INDEX_op_ld16s_i64: + case INDEX_op_ld32u_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_ld_i64: + case INDEX_op_st8_i64: + case INDEX_op_st16_i64: + case INDEX_op_st32_i64: + case INDEX_op_st_i64: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_debug_assert(args[2] == (int32_t)args[2]); + tcg_out32(s, args[2]); + break; + case INDEX_op_add_i32: + case INDEX_op_sub_i32: + case INDEX_op_mul_i32: + case INDEX_op_and_i32: + case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */ + case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_eqv_i32). */ + case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */ + case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */ + case INDEX_op_or_i32: + case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i32). */ + case INDEX_op_xor_i32: + case INDEX_op_shl_i32: + case INDEX_op_shr_i32: + case INDEX_op_sar_i32: + case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ + case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + break; + case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_debug_assert(args[3] <= UINT8_MAX); + tcg_out8(s, args[3]); + tcg_debug_assert(args[4] <= UINT8_MAX); + tcg_out8(s, args[4]); + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_add_i64: + case INDEX_op_sub_i64: + case INDEX_op_mul_i64: + case INDEX_op_and_i64: + case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */ + case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */ + case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */ + case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */ + case INDEX_op_or_i64: + case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */ + case INDEX_op_xor_i64: + case INDEX_op_shl_i64: + case INDEX_op_shr_i64: + case INDEX_op_sar_i64: + case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ + case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ + tcg_out_r(s, args[0]); + tcg_out_ri64(s, const_args[1], args[1]); + tcg_out_ri64(s, const_args[2], args[2]); + break; + case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_debug_assert(args[3] <= UINT8_MAX); + tcg_out8(s, args[3]); + tcg_debug_assert(args[4] <= UINT8_MAX); + tcg_out8(s, args[4]); + break; + case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + TODO(); + break; + case INDEX_op_div2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ + case INDEX_op_divu2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ + TODO(); + break; + case INDEX_op_brcond_i64: + tcg_out_r(s, args[0]); + tcg_out_ri64(s, const_args[1], args[1]); + tcg_out8(s, args[2]); /* condition */ + tci_out_label(s, arg_label(args[3])); + break; + case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ + case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ + case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ + case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */ + case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */ + case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */ + case INDEX_op_ext8u_i64: /* Optional (TCG_TARGET_HAS_ext8u_i64). */ + case INDEX_op_ext16s_i64: /* Optional (TCG_TARGET_HAS_ext16s_i64). */ + case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ + case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ + case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: +#endif /* TCG_TARGET_REG_BITS == 64 */ + case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ + case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ + case INDEX_op_ext8s_i32: /* Optional (TCG_TARGET_HAS_ext8s_i32). */ + case INDEX_op_ext16s_i32: /* Optional (TCG_TARGET_HAS_ext16s_i32). */ + case INDEX_op_ext8u_i32: /* Optional (TCG_TARGET_HAS_ext8u_i32). */ + case INDEX_op_ext16u_i32: /* Optional (TCG_TARGET_HAS_ext16u_i32). */ + case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ + case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + break; + case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + break; + case INDEX_op_div2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ + case INDEX_op_divu2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ + TODO(); + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_add2_i32: + case INDEX_op_sub2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_r(s, args[3]); + tcg_out_r(s, args[4]); + tcg_out_r(s, args[5]); + break; + case INDEX_op_brcond2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + tcg_out_ri32(s, const_args[3], args[3]); + tcg_out8(s, args[4]); /* condition */ + tci_out_label(s, arg_label(args[5])); + break; + case INDEX_op_mulu2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_r(s, args[3]); + break; +#endif + case INDEX_op_brcond_i32: + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out8(s, args[2]); /* condition */ + tci_out_label(s, arg_label(args[3])); + break; + case INDEX_op_qemu_ld_i32: + tcg_out_r(s, *args++); + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_r(s, *args++); + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_r(s, *args++); + } + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_st_i32: + tcg_out_r(s, *args++); + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_st_i64: + tcg_out_r(s, *args++); + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_r(s, *args++); + } + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_mb: + break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, + intptr_t arg2) +{ + uint8_t *old_code_ptr = s->code_ptr; + if (type == TCG_TYPE_I32) { + tcg_out_op_t(s, INDEX_op_st_i32); + tcg_out_r(s, arg); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); + } else { + tcg_debug_assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_st_i64); + tcg_out_r(s, arg); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + return false; +} + +/* Test if a constant matches the constraint. */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + /* No need to return 0 or 1, 0 or != 0 is good enough. */ + return arg_ct->ct & TCG_CT_CONST; +} + +static void tcg_target_init(TCGContext *s) +{ +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) + const char *envval = getenv("DEBUG_TCG"); + if (envval) { + qemu_set_log(strtol(envval, NULL, 0)); + } +#endif + + /* The current code uses uint8_t for tcg operations. */ + tcg_debug_assert(s->tcg_op_defs_max <= UINT8_MAX); + + /* Registers available for 32 bit operations. */ + s->tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1; + /* Registers available for 64 bit operations. */ + s->tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1; + /* TODO: Which registers should be set here? */ + s->tcg_target_call_clobber_regs = BIT(TCG_TARGET_NB_REGS) - 1; + + s->reserved_regs = 0; + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + + /* We use negative offsets from "sp" so that we can distinguish + stores that might pretend to be call arguments. */ + tcg_set_frame(s, TCG_REG_CALL_STACK, + -CPU_TEMP_BUF_NLONGS * sizeof(long), + CPU_TEMP_BUF_NLONGS * sizeof(long)); +} + +/* Generate global QEMU prologue and epilogue code. */ +static inline void tcg_target_qemu_prologue(TCGContext *s) +{ +} From cb5aab0feff6cba55b6845d7d49347d2b6ea65a2 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sun, 6 Apr 2025 02:33:36 -0400 Subject: [PATCH 3/4] squash avr & rh850 (#2146) * Squashed commit of #2021 * Squashed commit of #1918 --------- Co-authored-by: Glenn Baker Co-authored-by: Damien Cauquil --- CMakeLists.txt | 104 +- CREDITS.TXT | 1 + Cargo.toml | 4 +- README.md | 2 +- bindings/const_generator.py | 16 +- bindings/dotnet/UnicornEngine/Const/AVR.fs | 155 + bindings/dotnet/UnicornEngine/Const/Common.fs | 5 +- bindings/dotnet/UnicornEngine/Const/Rh850.fs | 99 + bindings/go/unicorn/avr_const.go | 150 + bindings/go/unicorn/rh850_const.go | 94 + bindings/go/unicorn/unicorn_const.go | 7 +- .../java/src/main/java/unicorn/AVRConst.java | 153 + .../src/main/java/unicorn/Rh850Const.java | 97 + .../src/main/java/unicorn/UnicornConst.java | 5 +- bindings/pascal/unicorn/AVRConst.pas | 155 + bindings/pascal/unicorn/Rh850Const.pas | 99 + bindings/pascal/unicorn/UnicornConst.pas | 7 +- bindings/python/sample_rh850.py | 70 + bindings/python/unicorn/__init__.py | 2 +- bindings/python/unicorn/avr_const.py | 147 + bindings/python/unicorn/rh850_const.py | 91 + bindings/python/unicorn/unicorn_const.py | 5 +- .../lib/unicorn_engine/avr_const.rb | 150 + .../lib/unicorn_engine/rh850_const.rb | 94 + .../lib/unicorn_engine/unicorn_const.rb | 7 +- bindings/rust/build.rs | 6 + bindings/rust/src/avr.rs | 211 + bindings/rust/src/lib.rs | 16 + bindings/rust/src/rh850.rs | 119 + bindings/rust/src/unicorn_const.rs | 6 +- bindings/zig/unicorn/AVR_const.zig | 151 + bindings/zig/unicorn/rh850_const.zig | 95 + bindings/zig/unicorn/unicorn_const.zig | 5 +- build.zig | 1 + format.sh | 0 include/uc_priv.h | 1 + include/unicorn/avr.h | 189 + include/unicorn/rh850.h | 111 + include/unicorn/unicorn.h | 7 + msvc/avr-softmmu/config-target.h | 5 + msvc/rh850-softmmu/config-target.h | 6 + qemu/MAINTAINERS | 9 + qemu/avr.h | 1297 ++++ qemu/configure | 21 +- qemu/include/tcg/tcg.h | 19 +- qemu/rh850.h | 1294 ++++ qemu/target/avr/cpu-param.h | 36 + qemu/target/avr/cpu-qom.h | 56 + qemu/target/avr/cpu.c | 459 ++ qemu/target/avr/cpu.h | 274 + qemu/target/avr/decode-insn.c.inc | 1097 ++++ qemu/target/avr/gdbstub.c | 84 + qemu/target/avr/helper.c | 373 ++ qemu/target/avr/helper.h | 37 + qemu/target/avr/insn.decode | 187 + qemu/target/avr/machine.c | 119 + qemu/target/avr/translate.c | 3270 +++++++++++ qemu/target/avr/unicorn.c | 280 + qemu/target/avr/unicorn.h | 21 + qemu/target/avr/unicorn_helper.h | 165 + qemu/target/rh850/Makefile.objs | 1 + qemu/target/rh850/cpu-param.h | 11 + qemu/target/rh850/cpu.c | 473 ++ qemu/target/rh850/cpu.h | 276 + qemu/target/rh850/cpu_bits.h | 431 ++ qemu/target/rh850/cpu_user.h | 13 + qemu/target/rh850/fpu_helper.c | 823 +++ qemu/target/rh850/fpu_translate.c | 1557 +++++ qemu/target/rh850/fpu_translate.h | 41 + qemu/target/rh850/gdbstub.c | 169 + qemu/target/rh850/helper.c | 539 ++ qemu/target/rh850/helper.h | 157 + qemu/target/rh850/instmap.h | 624 ++ qemu/target/rh850/op_helper.c | 89 + qemu/target/rh850/pmp.c | 379 ++ qemu/target/rh850/pmp.h | 64 + qemu/target/rh850/register_indices.h | 63 + qemu/target/rh850/translate.c | 5190 +++++++++++++++++ qemu/target/rh850/translate.h | 35 + qemu/target/rh850/unicorn.c | 140 + qemu/target/rh850/unicorn.h | 16 + samples/Makefile | 3 + samples/sample_avr.c | 131 + samples/sample_rh850.c | 118 + symbols.sh | 22 +- tests/unit/test_avr.c | 268 + tests/unit/test_rh850.c | 40 + uc.c | 56 + 88 files changed, 23454 insertions(+), 21 deletions(-) create mode 100644 bindings/dotnet/UnicornEngine/Const/AVR.fs create mode 100644 bindings/dotnet/UnicornEngine/Const/Rh850.fs create mode 100644 bindings/go/unicorn/avr_const.go create mode 100644 bindings/go/unicorn/rh850_const.go create mode 100644 bindings/java/src/main/java/unicorn/AVRConst.java create mode 100644 bindings/java/src/main/java/unicorn/Rh850Const.java create mode 100644 bindings/pascal/unicorn/AVRConst.pas create mode 100644 bindings/pascal/unicorn/Rh850Const.pas create mode 100644 bindings/python/sample_rh850.py create mode 100644 bindings/python/unicorn/avr_const.py create mode 100644 bindings/python/unicorn/rh850_const.py create mode 100644 bindings/ruby/unicorn_gem/lib/unicorn_engine/avr_const.rb create mode 100644 bindings/ruby/unicorn_gem/lib/unicorn_engine/rh850_const.rb create mode 100644 bindings/rust/src/avr.rs create mode 100644 bindings/rust/src/rh850.rs create mode 100644 bindings/zig/unicorn/AVR_const.zig create mode 100644 bindings/zig/unicorn/rh850_const.zig mode change 100644 => 100755 format.sh create mode 100644 include/unicorn/avr.h create mode 100644 include/unicorn/rh850.h create mode 100644 msvc/avr-softmmu/config-target.h create mode 100644 msvc/rh850-softmmu/config-target.h create mode 100644 qemu/avr.h create mode 100644 qemu/rh850.h create mode 100644 qemu/target/avr/cpu-param.h create mode 100644 qemu/target/avr/cpu-qom.h create mode 100644 qemu/target/avr/cpu.c create mode 100644 qemu/target/avr/cpu.h create mode 100644 qemu/target/avr/decode-insn.c.inc create mode 100644 qemu/target/avr/gdbstub.c create mode 100644 qemu/target/avr/helper.c create mode 100644 qemu/target/avr/helper.h create mode 100644 qemu/target/avr/insn.decode create mode 100644 qemu/target/avr/machine.c create mode 100644 qemu/target/avr/translate.c create mode 100644 qemu/target/avr/unicorn.c create mode 100644 qemu/target/avr/unicorn.h create mode 100644 qemu/target/avr/unicorn_helper.h create mode 100644 qemu/target/rh850/Makefile.objs create mode 100644 qemu/target/rh850/cpu-param.h create mode 100644 qemu/target/rh850/cpu.c create mode 100644 qemu/target/rh850/cpu.h create mode 100644 qemu/target/rh850/cpu_bits.h create mode 100644 qemu/target/rh850/cpu_user.h create mode 100644 qemu/target/rh850/fpu_helper.c create mode 100644 qemu/target/rh850/fpu_translate.c create mode 100644 qemu/target/rh850/fpu_translate.h create mode 100644 qemu/target/rh850/gdbstub.c create mode 100644 qemu/target/rh850/helper.c create mode 100644 qemu/target/rh850/helper.h create mode 100644 qemu/target/rh850/instmap.h create mode 100644 qemu/target/rh850/op_helper.c create mode 100644 qemu/target/rh850/pmp.c create mode 100644 qemu/target/rh850/pmp.h create mode 100644 qemu/target/rh850/register_indices.h create mode 100644 qemu/target/rh850/translate.c create mode 100644 qemu/target/rh850/translate.h create mode 100644 qemu/target/rh850/unicorn.c create mode 100644 qemu/target/rh850/unicorn.h create mode 100644 samples/sample_avr.c create mode 100644 samples/sample_rh850.c create mode 100644 tests/unit/test_avr.c create mode 100644 tests/unit/test_rh850.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 78db47b688..6a266c4d76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,7 +86,7 @@ option(UNICORN_FUZZ "Enable fuzzing" OFF) option(UNICORN_LOGGING "Enable logging" OFF) option(UNICORN_BUILD_TESTS "Build unicorn tests" ${PROJECT_IS_TOP_LEVEL}) option(UNICORN_INSTALL "Enable unicorn installation" ${PROJECT_IS_TOP_LEVEL}) -set(UNICORN_ARCH "x86;arm;aarch64;riscv;mips;sparc;m68k;ppc;s390x;tricore" CACHE STRING "Enabled unicorn architectures") +set(UNICORN_ARCH "x86;arm;aarch64;riscv;mips;sparc;m68k;ppc;rh850;s390x;tricore;avr" CACHE STRING "Enabled unicorn architectures") option(UNICORN_TRACER "Trace unicorn execution" OFF) option(UNICORN_INTERPRETER "Use interpreter mode" OFF) @@ -274,6 +274,11 @@ else() set(UNICORN_TARGET_ARCH "tricore") break() endif() + string(FIND ${UC_COMPILER_MACRO} "__AVR__" UC_RET) + if (${UC_RET} GREATER_EQUAL "0") + set(UNICORN_TARGET_ARCH "avr") + break() + endif() message(FATAL_ERROR "Unknown host compiler: ${CMAKE_C_COMPILER}.") endwhile(TRUE) endif() @@ -304,6 +309,9 @@ else() if(UNICORN_HAS_PPC) set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_PPC ") endif() + if(UNICORN_HAS_RH850) + set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_RH850 ") + endif() if(UNICORN_HAS_RISCV) set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_RISCV ") endif() @@ -313,6 +321,9 @@ else() if (UNICORN_HAS_TRICORE) set (EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_TRICORE ") endif() + if (UNICORN_HAS_AVR) + set (EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_AVR ") + endif() set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-fPIC") if(ANDROID_ABI) @@ -355,6 +366,9 @@ else() if(UNICORN_HAS_PPC) set(TARGET_LIST "${TARGET_LIST}ppc-softmmu, ppc64-softmmu, ") endif() + if(UNICORN_HAS_RH850) + set(TARGET_LIST "${TARGET_LIST}rh850-softmmu, ") + endif() if(UNICORN_HAS_RISCV) set(TARGET_LIST "${TARGET_LIST}riscv32-softmmu, riscv64-softmmu, ") endif() @@ -364,6 +378,9 @@ else() if (UNICORN_HAS_TRICORE) set (TARGET_LIST "${TARGET_LIST}tricore-softmmu, ") endif() + if (UNICORN_HAS_AVR) + set (TARGET_LIST "${TARGET_LIST}avr-softmmu, ") + endif() set(TARGET_LIST "${TARGET_LIST} ") # GEN config-host.mak & target directories @@ -446,6 +463,12 @@ else() OUTPUT_FILE ${CMAKE_BINARY_DIR}/ppc64-softmmu/config-target.h ) endif() + if(UNICORN_HAS_RH850) + execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/scripts/create_config + INPUT_FILE ${CMAKE_BINARY_DIR}/rh850-softmmu/config-target.mak + OUTPUT_FILE ${CMAKE_BINARY_DIR}/rh850-softmmu/config-target.h + ) + endif() if(UNICORN_HAS_RISCV) execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/scripts/create_config INPUT_FILE ${CMAKE_BINARY_DIR}/riscv32-softmmu/config-target.mak @@ -468,6 +491,12 @@ else() OUTPUT_FILE ${CMAKE_BINARY_DIR}/tricore-softmmu/config-target.h ) endif() + if (UNICORN_HAS_AVR) + execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/scripts/create_config + INPUT_FILE ${CMAKE_BINARY_DIR}/avr-softmmu/config-target.mak + OUTPUT_FILE ${CMAKE_BINARY_DIR}/avr-softmmu/config-target.h + ) + endif() add_compile_options( ${UNICORN_CFLAGS} -I${CMAKE_CURRENT_SOURCE_DIR}/qemu/tcg/${UNICORN_TARGET_ARCH} @@ -1174,6 +1203,65 @@ endif() endif() +if (UNICORN_HAS_AVR) +add_library(avr-softmmu STATIC + ${UNICORN_ARCH_COMMON} + + qemu/target/avr/cpu.c + qemu/target/avr/helper.c + qemu/target/avr/translate.c + qemu/target/avr/unicorn.c +) + +if(MSVC) + target_compile_options(avr-softmmu PRIVATE + -DNEED_CPU_H + /FIavr.h + /I${CMAKE_CURRENT_SOURCE_DIR}/msvc/avr-softmmu + /I${CMAKE_CURRENT_SOURCE_DIR}/qemu/target/avr + ) +else() + target_compile_options(avr-softmmu PRIVATE + -DNEED_CPU_H + -include avr.h + -I${CMAKE_BINARY_DIR}/avr-softmmu + -I${CMAKE_CURRENT_SOURCE_DIR}/qemu/target/avr + ) +endif() +endif() + + +if (UNICORN_HAS_RH850) +add_library(rh850-softmmu STATIC + ${UNICORN_ARCH_COMMON} + + qemu/target/rh850/cpu.c + qemu/target/rh850/fpu_helper.c + qemu/target/rh850/helper.c + qemu/target/rh850/op_helper.c + qemu/target/rh850/translate.c + qemu/target/rh850/fpu_translate.c + qemu/target/rh850/unicorn.c +) + + +if(MSVC) + target_compile_options(rh850-softmmu PRIVATE + -DNEED_CPU_H + /FIrh850.h + /I${CMAKE_CURRENT_SOURCE_DIR}/msvc/rh850-softmmu + /I${CMAKE_CURRENT_SOURCE_DIR}/qemu/target/rh850 + ) +else() + target_compile_options(rh850-softmmu PRIVATE + -DNEED_CPU_H + -include rh850.h + -I${CMAKE_BINARY_DIR}/rh850-softmmu + -I${CMAKE_CURRENT_SOURCE_DIR}/qemu/target/rh850 + ) +endif() +endif() + set(UNICORN_SRCS uc.c @@ -1326,6 +1414,13 @@ if(UNICORN_HAS_PPC) target_link_libraries(ppc64-softmmu PRIVATE unicorn-common) set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_ppc) endif() +if(UNICORN_HAS_RH850) + set(UNICORN_COMPILE_OPTIONS ${UNICORN_COMPILE_OPTIONS} -DUNICORN_HAS_RH850) + set(UNICORN_LINK_LIBRARIES ${UNICORN_LINK_LIBRARIES} rh850-softmmu rh850-softmmu) + set(UNICORN_SAMPLE_FILE ${UNICORN_SAMPLE_FILE} sample_rh850) + target_link_libraries(rh850-softmmu PRIVATE unicorn-common) + set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_rh850) +endif() if(UNICORN_HAS_RISCV) set(UNICORN_COMPILE_OPTIONS ${UNICORN_COMPILE_OPTIONS} -DUNICORN_HAS_RISCV) set(UNICORN_LINK_LIBRARIES ${UNICORN_LINK_LIBRARIES} riscv32-softmmu riscv64-softmmu) @@ -1348,6 +1443,13 @@ if (UNICORN_HAS_TRICORE) target_link_libraries(tricore-softmmu unicorn-common) set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_tricore) endif() +if (UNICORN_HAS_AVR) + set(UNICORN_COMPILE_OPTIONS ${UNICORN_COMPILE_OPTIONS} -DUNICORN_HAS_AVR) + set(UNICORN_LINK_LIBRARIES ${UNICORN_LINK_LIBRARIES} avr-softmmu) + set(UNICORN_SAMPLE_FILE ${UNICORN_SAMPLE_FILE} sample_avr) + target_link_libraries(avr-softmmu unicorn-common) + set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_avr) +endif() # Extra tests set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_mem) diff --git a/CREDITS.TXT b/CREDITS.TXT index ee443858b7..f2dc6118a0 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -81,3 +81,4 @@ Ziqiao Kong (lazymio): uc_context_free() API and various bug fix & improvement. Sven Almgren (blindmatrix): bug fix Chenxu Wu (kabeor): Documentation Philipp Takacs: virtual tlb, memory snapshots +Glenn Baker: AVR architecture support diff --git a/Cargo.toml b/Cargo.toml index 2136e417a6..46800fbc63 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,7 +42,7 @@ pkg-config = { version = "0.3" } [features] default = ["arch_all"] dynamic_linkage = [] -arch_all = ["arch_x86", "arch_arm", "arch_aarch64", "arch_riscv", "arch_mips", "arch_sparc", "arch_m68k", "arch_ppc", "arch_s390x", "arch_tricore"] +arch_all = ["arch_x86", "arch_arm", "arch_aarch64", "arch_riscv", "arch_mips", "arch_sparc", "arch_m68k", "arch_ppc", "arch_rh850", "arch_s390x", "arch_tricore", "arch_avr"] arch_x86 = [] arch_arm = [] # NOTE: unicorn-c only separates on top-level arch name, @@ -55,3 +55,5 @@ arch_m68k = [] arch_ppc = [] arch_s390x = [] arch_tricore = [] +arch_avr = [] +arch_rh850 = [] diff --git a/README.md b/README.md index 293d05db01..cc07f1d4ce 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Unicorn is a lightweight, multi-platform, multi-architecture CPU emulator framew Unicorn offers some unparalleled features: -- Multi-architecture: ARM, ARM64 (ARMv8), M68K, MIPS, PowerPC, RISCV, SPARC, S390X, TriCore and X86 (16, 32, 64-bit) +- Multi-architecture: ARM, ARM64 (ARMv8), AVR, M68K, MIPS, PowerPC, RISCV, SPARC, S390X, TriCore and X86 (16, 32, 64-bit) - Clean/simple/lightweight/intuitive architecture-neutral API - Implemented in pure C language, with bindings for Crystal, Clojure, Visual Basic, Perl, Rust, Ruby, Python, Java, .NET, Go, Delphi/Free Pascal, Haskell, Pharo, Lua and Zig. - Native support for Windows & *nix (with Mac OSX, Linux, Android, *BSD & Solaris confirmed) diff --git a/bindings/const_generator.py b/bindings/const_generator.py index 982b48f4cb..64e39356cc 100644 --- a/bindings/const_generator.py +++ b/bindings/const_generator.py @@ -6,7 +6,7 @@ INCL_DIR = os.path.join('..', 'include', 'unicorn') -include = [ 'arm.h', 'arm64.h', 'mips.h', 'x86.h', 'sparc.h', 'm68k.h', 'ppc.h', 'riscv.h', 's390x.h', 'tricore.h', 'unicorn.h' ] +include = [ 'arm.h', 'arm64.h', 'avr.h', 'mips.h', 'x86.h', 'sparc.h', 'm68k.h', 'ppc.h', 'rh850.h', 'riscv.h', 's390x.h', 'tricore.h', 'unicorn.h' ] template = { 'python': { @@ -17,11 +17,13 @@ # prefixes for constant filenames of all archs - case sensitive 'arm.h': 'arm', 'arm64.h': 'arm64', + 'avr.h': 'avr', 'mips.h': 'mips', 'x86.h': 'x86', 'sparc.h': 'sparc', 'm68k.h': 'm68k', 'ppc.h': 'ppc', + 'rh850.h': 'rh850', 'riscv.h': 'riscv', 's390x.h' : 's390x', 'tricore.h' : 'tricore', @@ -37,12 +39,14 @@ # prefixes for constant filenames of all archs - case sensitive 'arm.h': 'arm', 'arm64.h': 'arm64', + 'avr.h': 'avr', 'mips.h': 'mips', 'x86.h': 'x86', 'sparc.h': 'sparc', 'm68k.h': 'm68k', 'ppc.h': 'ppc', 'riscv.h': 'riscv', + 'rh850.h': 'rh850', 's390x.h' : 's390x', 'tricore.h' : 'tricore', 'unicorn.h': 'unicorn', @@ -57,11 +61,13 @@ # prefixes for constant filenames of all archs - case sensitive 'arm.h': 'arm', 'arm64.h': 'arm64', + 'avr.h': 'avr', 'mips.h': 'mips', 'x86.h': 'x86', 'sparc.h': 'sparc', 'm68k.h': 'm68k', 'ppc.h': 'ppc', + 'rh850.h': 'rh850', 'riscv.h': 'riscv', 's390x.h' : 's390x', 'tricore.h' : 'tricore', @@ -77,11 +83,13 @@ # prefixes for constant filenames of all archs - case sensitive 'arm.h': 'Arm', 'arm64.h': 'Arm64', + 'avr.h': 'AVR', 'mips.h': 'Mips', 'x86.h': 'X86', 'sparc.h': 'Sparc', 'm68k.h': 'M68k', 'ppc.h': 'Ppc', + 'rh850.h': 'Rh850', 'riscv.h': 'Riscv', 's390x.h' : 'S390x', 'tricore.h' : 'TriCore', @@ -97,11 +105,13 @@ # prefixes for constant filenames of all archs - case sensitive 'arm.h': 'Arm', 'arm64.h': 'Arm64', + 'avr.h': 'AVR', 'mips.h': 'Mips', 'x86.h': 'X86', 'sparc.h': 'Sparc', 'm68k.h': 'M68k', 'ppc.h': 'Ppc', + 'rh850.h': 'Rh850', 'riscv.h': 'Riscv', 's390x.h' : 'S390x', 'tricore.h' : 'TriCore', @@ -117,11 +127,13 @@ # prefixes for constant filenames of all archs - case sensitive 'arm.h': 'Arm', 'arm64.h': 'Arm64', + 'avr.h': 'AVR', 'mips.h': 'Mips', 'x86.h': 'X86', 'sparc.h': 'Sparc', 'm68k.h': 'M68k', 'ppc.h': 'Ppc', + 'rh850.h': 'Rh850', 'riscv.h': 'Riscv', 's390x.h' : 'S390x', 'tricore.h' : 'TriCore', @@ -137,12 +149,14 @@ # prefixes for constant filenames of all archs - case sensitive 'arm.h': 'arm', 'arm64.h': 'arm64', + 'avr.h': 'AVR', 'mips.h': 'mips', 'x86.h': 'x86', 'sparc.h': 'sparc', 'm68k.h': 'm68k', 'ppc.h': 'ppc', 'riscv.h': 'riscv', + 'rh850.h': 'rh850', 's390x.h' : 's390x', 'tricore.h' : 'tricore', 'unicorn.h': 'unicorn', diff --git a/bindings/dotnet/UnicornEngine/Const/AVR.fs b/bindings/dotnet/UnicornEngine/Const/AVR.fs new file mode 100644 index 0000000000..d7613dac85 --- /dev/null +++ b/bindings/dotnet/UnicornEngine/Const/AVR.fs @@ -0,0 +1,155 @@ +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +namespace UnicornEngine.Const + +open System + +[] +module AVR = + + // AVR architectures + let UC_AVR_ARCH_AVR1 = 10 + let UC_AVR_ARCH_AVR2 = 20 + let UC_AVR_ARCH_AVR25 = 25 + let UC_AVR_ARCH_AVR3 = 30 + let UC_AVR_ARCH_AVR4 = 40 + let UC_AVR_ARCH_AVR5 = 50 + let UC_AVR_ARCH_AVR51 = 51 + let UC_AVR_ARCH_AVR6 = 60 + let UC_CPU_AVR_ARCH = 1000 + + // AVR CPU + let UC_CPU_AVR_ATMEGA16 = 50016 + let UC_CPU_AVR_ATMEGA32 = 50032 + let UC_CPU_AVR_ATMEGA64 = 50064 + let UC_CPU_AVR_ATMEGA128 = 51128 + let UC_CPU_AVR_ATMEGA128RFR2 = 51129 + let UC_CPU_AVR_ATMEGA1280 = 51130 + let UC_CPU_AVR_ATMEGA256 = 60256 + let UC_CPU_AVR_ATMEGA256RFR2 = 60257 + let UC_CPU_AVR_ATMEGA2560 = 60258 + + // AVR memory + let UC_AVR_MEM_FLASH = 134217728 + + // AVR registers + + let UC_AVR_REG_INVALID = 0 + let UC_AVR_REG_R0 = 1 + let UC_AVR_REG_R1 = 2 + let UC_AVR_REG_R2 = 3 + let UC_AVR_REG_R3 = 4 + let UC_AVR_REG_R4 = 5 + let UC_AVR_REG_R5 = 6 + let UC_AVR_REG_R6 = 7 + let UC_AVR_REG_R7 = 8 + let UC_AVR_REG_R8 = 9 + let UC_AVR_REG_R9 = 10 + let UC_AVR_REG_R10 = 11 + let UC_AVR_REG_R11 = 12 + let UC_AVR_REG_R12 = 13 + let UC_AVR_REG_R13 = 14 + let UC_AVR_REG_R14 = 15 + let UC_AVR_REG_R15 = 16 + let UC_AVR_REG_R16 = 17 + let UC_AVR_REG_R17 = 18 + let UC_AVR_REG_R18 = 19 + let UC_AVR_REG_R19 = 20 + let UC_AVR_REG_R20 = 21 + let UC_AVR_REG_R21 = 22 + let UC_AVR_REG_R22 = 23 + let UC_AVR_REG_R23 = 24 + let UC_AVR_REG_R24 = 25 + let UC_AVR_REG_R25 = 26 + let UC_AVR_REG_R26 = 27 + let UC_AVR_REG_R27 = 28 + let UC_AVR_REG_R28 = 29 + let UC_AVR_REG_R29 = 30 + let UC_AVR_REG_R30 = 31 + let UC_AVR_REG_R31 = 32 + let UC_AVR_REG_PC = 33 + let UC_AVR_REG_SP = 34 + let UC_AVR_REG_RAMPD = 57 + let UC_AVR_REG_RAMPX = 58 + let UC_AVR_REG_RAMPY = 59 + let UC_AVR_REG_RAMPZ = 60 + let UC_AVR_REG_EIND = 61 + let UC_AVR_REG_SPL = 62 + let UC_AVR_REG_SPH = 63 + let UC_AVR_REG_SREG = 64 + + // 16-bit coalesced registers + let UC_AVR_REG_R0W = 65 + let UC_AVR_REG_R1W = 66 + let UC_AVR_REG_R2W = 67 + let UC_AVR_REG_R3W = 68 + let UC_AVR_REG_R4W = 69 + let UC_AVR_REG_R5W = 70 + let UC_AVR_REG_R6W = 71 + let UC_AVR_REG_R7W = 72 + let UC_AVR_REG_R8W = 73 + let UC_AVR_REG_R9W = 74 + let UC_AVR_REG_R10W = 75 + let UC_AVR_REG_R11W = 76 + let UC_AVR_REG_R12W = 77 + let UC_AVR_REG_R13W = 78 + let UC_AVR_REG_R14W = 79 + let UC_AVR_REG_R15W = 80 + let UC_AVR_REG_R16W = 81 + let UC_AVR_REG_R17W = 82 + let UC_AVR_REG_R18W = 83 + let UC_AVR_REG_R19W = 84 + let UC_AVR_REG_R20W = 85 + let UC_AVR_REG_R21W = 86 + let UC_AVR_REG_R22W = 87 + let UC_AVR_REG_R23W = 88 + let UC_AVR_REG_R24W = 89 + let UC_AVR_REG_R25W = 90 + let UC_AVR_REG_R26W = 91 + let UC_AVR_REG_R27W = 92 + let UC_AVR_REG_R28W = 93 + let UC_AVR_REG_R29W = 94 + let UC_AVR_REG_R30W = 95 + + // 32-bit coalesced registers + let UC_AVR_REG_R0D = 97 + let UC_AVR_REG_R1D = 98 + let UC_AVR_REG_R2D = 99 + let UC_AVR_REG_R3D = 100 + let UC_AVR_REG_R4D = 101 + let UC_AVR_REG_R5D = 102 + let UC_AVR_REG_R6D = 103 + let UC_AVR_REG_R7D = 104 + let UC_AVR_REG_R8D = 105 + let UC_AVR_REG_R9D = 106 + let UC_AVR_REG_R10D = 107 + let UC_AVR_REG_R11D = 108 + let UC_AVR_REG_R12D = 109 + let UC_AVR_REG_R13D = 110 + let UC_AVR_REG_R14D = 111 + let UC_AVR_REG_R15D = 112 + let UC_AVR_REG_R16D = 113 + let UC_AVR_REG_R17D = 114 + let UC_AVR_REG_R18D = 115 + let UC_AVR_REG_R19D = 116 + let UC_AVR_REG_R20D = 117 + let UC_AVR_REG_R21D = 118 + let UC_AVR_REG_R22D = 119 + let UC_AVR_REG_R23D = 120 + let UC_AVR_REG_R24D = 121 + let UC_AVR_REG_R25D = 122 + let UC_AVR_REG_R26D = 123 + let UC_AVR_REG_R27D = 124 + let UC_AVR_REG_R28D = 125 + + // Alias registers + let UC_AVR_REG_Xhi = 28 + let UC_AVR_REG_Xlo = 27 + let UC_AVR_REG_Yhi = 30 + let UC_AVR_REG_Ylo = 29 + let UC_AVR_REG_Zhi = 32 + let UC_AVR_REG_Zlo = 31 + let UC_AVR_REG_X = 91 + let UC_AVR_REG_Y = 93 + let UC_AVR_REG_Z = 95 + diff --git a/bindings/dotnet/UnicornEngine/Const/Common.fs b/bindings/dotnet/UnicornEngine/Const/Common.fs index addce7b7c6..cdd6ce767a 100644 --- a/bindings/dotnet/UnicornEngine/Const/Common.fs +++ b/bindings/dotnet/UnicornEngine/Const/Common.fs @@ -26,7 +26,9 @@ module Common = let UC_ARCH_RISCV = 8 let UC_ARCH_S390X = 9 let UC_ARCH_TRICORE = 10 - let UC_ARCH_MAX = 11 + let UC_ARCH_AVR = 11 + let UC_ARCH_RH850 = 12 + let UC_ARCH_MAX = 13 let UC_MODE_LITTLE_ENDIAN = 0 let UC_MODE_BIG_ENDIAN = 1073741824 @@ -53,6 +55,7 @@ module Common = let UC_MODE_SPARC32 = 4 let UC_MODE_SPARC64 = 8 let UC_MODE_V9 = 16 + let UC_MODE_RH850 = 4 let UC_MODE_RISCV32 = 4 let UC_MODE_RISCV64 = 8 diff --git a/bindings/dotnet/UnicornEngine/Const/Rh850.fs b/bindings/dotnet/UnicornEngine/Const/Rh850.fs new file mode 100644 index 0000000000..073f10416f --- /dev/null +++ b/bindings/dotnet/UnicornEngine/Const/Rh850.fs @@ -0,0 +1,99 @@ +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +namespace UnicornManaged.Const + +open System + +[] +module Rh850 = + let UC_RH850_SYSREG_SELID0 = 32 + let UC_RH850_SYSREG_SELID1 = 64 + let UC_RH850_SYSREG_SELID2 = 96 + let UC_RH850_SYSREG_SELID3 = 128 + let UC_RH850_SYSREG_SELID4 = 160 + let UC_RH850_SYSREG_SELID5 = 192 + let UC_RH850_SYSREG_SELID6 = 224 + let UC_RH850_SYSREG_SELID7 = 256 + + // RH850 global purpose registers + + let UC_RH850_REG_R0 = 0 + let UC_RH850_REG_R1 = 1 + let UC_RH850_REG_R2 = 2 + let UC_RH850_REG_R3 = 3 + let UC_RH850_REG_R4 = 4 + let UC_RH850_REG_R5 = 5 + let UC_RH850_REG_R6 = 6 + let UC_RH850_REG_R7 = 7 + let UC_RH850_REG_R8 = 8 + let UC_RH850_REG_R9 = 9 + let UC_RH850_REG_R10 = 10 + let UC_RH850_REG_R11 = 11 + let UC_RH850_REG_R12 = 12 + let UC_RH850_REG_R13 = 13 + let UC_RH850_REG_R14 = 14 + let UC_RH850_REG_R15 = 15 + let UC_RH850_REG_R16 = 16 + let UC_RH850_REG_R17 = 17 + let UC_RH850_REG_R18 = 18 + let UC_RH850_REG_R19 = 19 + let UC_RH850_REG_R20 = 20 + let UC_RH850_REG_R21 = 21 + let UC_RH850_REG_R22 = 22 + let UC_RH850_REG_R23 = 23 + let UC_RH850_REG_R24 = 24 + let UC_RH850_REG_R25 = 25 + let UC_RH850_REG_R26 = 26 + let UC_RH850_REG_R27 = 27 + let UC_RH850_REG_R28 = 28 + let UC_RH850_REG_R29 = 29 + let UC_RH850_REG_R30 = 30 + let UC_RH850_REG_R31 = 31 + + // RH850 system registers, selection ID 0 + let UC_RH850_REG_EIPC = 32 + let UC_RH850_REG_EIPSW = 33 + let UC_RH850_REG_FEPC = 34 + let UC_RH850_REG_FEPSW = 35 + let UC_RH850_REG_ECR = 36 + let UC_RH850_REG_PSW = 37 + let UC_RH850_REG_FPSR = 38 + let UC_RH850_REG_FPEPC = 39 + let UC_RH850_REG_FPST = 40 + let UC_RH850_REG_FPCC = 41 + let UC_RH850_REG_FPCFG = 42 + let UC_RH850_REG_FPEC = 43 + let UC_RH850_REG_EIIC = 45 + let UC_RH850_REG_FEIC = 46 + let UC_RH850_REG_CTPC = 48 + let UC_RH850_REG_CTPSW = 49 + let UC_RH850_REG_CTBP = 52 + let UC_RH850_REG_EIWR = 60 + let UC_RH850_REG_FEWR = 61 + let UC_RH850_REG_BSEL = 63 + + // RH850 system regusters, selection ID 1 + let UC_RH850_REG_MCFG0 = 64 + let UC_RH850_REG_RBASE = 65 + let UC_RH850_REG_EBASE = 66 + let UC_RH850_REG_INTBP = 67 + let UC_RH850_REG_MCTL = 68 + let UC_RH850_REG_PID = 69 + let UC_RH850_REG_SCCFG = 75 + let UC_RH850_REG_SCBP = 76 + + // RH850 system registers, selection ID 2 + let UC_RH850_REG_HTCFG0 = 96 + let UC_RH850_REG_MEA = 102 + let UC_RH850_REG_ASID = 103 + let UC_RH850_REG_MEI = 104 + let UC_RH850_REG_PC = 288 + let UC_RH850_REG_ENDING = 289 + + // RH8509 Registers aliases. + + let UC_RH850_REG_ZERO = 0 + let UC_RH850_REG_SP = 3 + let UC_RH850_REG_EP = 30 + let UC_RH850_REG_LP = 31 + diff --git a/bindings/go/unicorn/avr_const.go b/bindings/go/unicorn/avr_const.go new file mode 100644 index 0000000000..985bf7d009 --- /dev/null +++ b/bindings/go/unicorn/avr_const.go @@ -0,0 +1,150 @@ +package unicorn +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [avr_const.go] +const ( + +// AVR architectures + AVR_ARCH_AVR1 = 10 + AVR_ARCH_AVR2 = 20 + AVR_ARCH_AVR25 = 25 + AVR_ARCH_AVR3 = 30 + AVR_ARCH_AVR4 = 40 + AVR_ARCH_AVR5 = 50 + AVR_ARCH_AVR51 = 51 + AVR_ARCH_AVR6 = 60 + CPU_AVR_ARCH = 1000 + +// AVR CPU + CPU_AVR_ATMEGA16 = 50016 + CPU_AVR_ATMEGA32 = 50032 + CPU_AVR_ATMEGA64 = 50064 + CPU_AVR_ATMEGA128 = 51128 + CPU_AVR_ATMEGA128RFR2 = 51129 + CPU_AVR_ATMEGA1280 = 51130 + CPU_AVR_ATMEGA256 = 60256 + CPU_AVR_ATMEGA256RFR2 = 60257 + CPU_AVR_ATMEGA2560 = 60258 + +// AVR memory + AVR_MEM_FLASH = 134217728 + +// AVR registers + + AVR_REG_INVALID = 0 + AVR_REG_R0 = 1 + AVR_REG_R1 = 2 + AVR_REG_R2 = 3 + AVR_REG_R3 = 4 + AVR_REG_R4 = 5 + AVR_REG_R5 = 6 + AVR_REG_R6 = 7 + AVR_REG_R7 = 8 + AVR_REG_R8 = 9 + AVR_REG_R9 = 10 + AVR_REG_R10 = 11 + AVR_REG_R11 = 12 + AVR_REG_R12 = 13 + AVR_REG_R13 = 14 + AVR_REG_R14 = 15 + AVR_REG_R15 = 16 + AVR_REG_R16 = 17 + AVR_REG_R17 = 18 + AVR_REG_R18 = 19 + AVR_REG_R19 = 20 + AVR_REG_R20 = 21 + AVR_REG_R21 = 22 + AVR_REG_R22 = 23 + AVR_REG_R23 = 24 + AVR_REG_R24 = 25 + AVR_REG_R25 = 26 + AVR_REG_R26 = 27 + AVR_REG_R27 = 28 + AVR_REG_R28 = 29 + AVR_REG_R29 = 30 + AVR_REG_R30 = 31 + AVR_REG_R31 = 32 + AVR_REG_PC = 33 + AVR_REG_SP = 34 + AVR_REG_RAMPD = 57 + AVR_REG_RAMPX = 58 + AVR_REG_RAMPY = 59 + AVR_REG_RAMPZ = 60 + AVR_REG_EIND = 61 + AVR_REG_SPL = 62 + AVR_REG_SPH = 63 + AVR_REG_SREG = 64 + +// 16-bit coalesced registers + AVR_REG_R0W = 65 + AVR_REG_R1W = 66 + AVR_REG_R2W = 67 + AVR_REG_R3W = 68 + AVR_REG_R4W = 69 + AVR_REG_R5W = 70 + AVR_REG_R6W = 71 + AVR_REG_R7W = 72 + AVR_REG_R8W = 73 + AVR_REG_R9W = 74 + AVR_REG_R10W = 75 + AVR_REG_R11W = 76 + AVR_REG_R12W = 77 + AVR_REG_R13W = 78 + AVR_REG_R14W = 79 + AVR_REG_R15W = 80 + AVR_REG_R16W = 81 + AVR_REG_R17W = 82 + AVR_REG_R18W = 83 + AVR_REG_R19W = 84 + AVR_REG_R20W = 85 + AVR_REG_R21W = 86 + AVR_REG_R22W = 87 + AVR_REG_R23W = 88 + AVR_REG_R24W = 89 + AVR_REG_R25W = 90 + AVR_REG_R26W = 91 + AVR_REG_R27W = 92 + AVR_REG_R28W = 93 + AVR_REG_R29W = 94 + AVR_REG_R30W = 95 + +// 32-bit coalesced registers + AVR_REG_R0D = 97 + AVR_REG_R1D = 98 + AVR_REG_R2D = 99 + AVR_REG_R3D = 100 + AVR_REG_R4D = 101 + AVR_REG_R5D = 102 + AVR_REG_R6D = 103 + AVR_REG_R7D = 104 + AVR_REG_R8D = 105 + AVR_REG_R9D = 106 + AVR_REG_R10D = 107 + AVR_REG_R11D = 108 + AVR_REG_R12D = 109 + AVR_REG_R13D = 110 + AVR_REG_R14D = 111 + AVR_REG_R15D = 112 + AVR_REG_R16D = 113 + AVR_REG_R17D = 114 + AVR_REG_R18D = 115 + AVR_REG_R19D = 116 + AVR_REG_R20D = 117 + AVR_REG_R21D = 118 + AVR_REG_R22D = 119 + AVR_REG_R23D = 120 + AVR_REG_R24D = 121 + AVR_REG_R25D = 122 + AVR_REG_R26D = 123 + AVR_REG_R27D = 124 + AVR_REG_R28D = 125 + +// Alias registers + AVR_REG_Xhi = 28 + AVR_REG_Xlo = 27 + AVR_REG_Yhi = 30 + AVR_REG_Ylo = 29 + AVR_REG_Zhi = 32 + AVR_REG_Zlo = 31 + AVR_REG_X = 91 + AVR_REG_Y = 93 + AVR_REG_Z = 95 +) \ No newline at end of file diff --git a/bindings/go/unicorn/rh850_const.go b/bindings/go/unicorn/rh850_const.go new file mode 100644 index 0000000000..72ad301628 --- /dev/null +++ b/bindings/go/unicorn/rh850_const.go @@ -0,0 +1,94 @@ +package unicorn +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [rh850_const.go] +const ( + RH850_SYSREG_SELID0 = 32 + RH850_SYSREG_SELID1 = 64 + RH850_SYSREG_SELID2 = 96 + RH850_SYSREG_SELID3 = 128 + RH850_SYSREG_SELID4 = 160 + RH850_SYSREG_SELID5 = 192 + RH850_SYSREG_SELID6 = 224 + RH850_SYSREG_SELID7 = 256 + +// RH850 global purpose registers + + RH850_REG_R0 = 0 + RH850_REG_R1 = 1 + RH850_REG_R2 = 2 + RH850_REG_R3 = 3 + RH850_REG_R4 = 4 + RH850_REG_R5 = 5 + RH850_REG_R6 = 6 + RH850_REG_R7 = 7 + RH850_REG_R8 = 8 + RH850_REG_R9 = 9 + RH850_REG_R10 = 10 + RH850_REG_R11 = 11 + RH850_REG_R12 = 12 + RH850_REG_R13 = 13 + RH850_REG_R14 = 14 + RH850_REG_R15 = 15 + RH850_REG_R16 = 16 + RH850_REG_R17 = 17 + RH850_REG_R18 = 18 + RH850_REG_R19 = 19 + RH850_REG_R20 = 20 + RH850_REG_R21 = 21 + RH850_REG_R22 = 22 + RH850_REG_R23 = 23 + RH850_REG_R24 = 24 + RH850_REG_R25 = 25 + RH850_REG_R26 = 26 + RH850_REG_R27 = 27 + RH850_REG_R28 = 28 + RH850_REG_R29 = 29 + RH850_REG_R30 = 30 + RH850_REG_R31 = 31 + +// RH850 system registers, selection ID 0 + RH850_REG_EIPC = 32 + RH850_REG_EIPSW = 33 + RH850_REG_FEPC = 34 + RH850_REG_FEPSW = 35 + RH850_REG_ECR = 36 + RH850_REG_PSW = 37 + RH850_REG_FPSR = 38 + RH850_REG_FPEPC = 39 + RH850_REG_FPST = 40 + RH850_REG_FPCC = 41 + RH850_REG_FPCFG = 42 + RH850_REG_FPEC = 43 + RH850_REG_EIIC = 45 + RH850_REG_FEIC = 46 + RH850_REG_CTPC = 48 + RH850_REG_CTPSW = 49 + RH850_REG_CTBP = 52 + RH850_REG_EIWR = 60 + RH850_REG_FEWR = 61 + RH850_REG_BSEL = 63 + +// RH850 system regusters, selection ID 1 + RH850_REG_MCFG0 = 64 + RH850_REG_RBASE = 65 + RH850_REG_EBASE = 66 + RH850_REG_INTBP = 67 + RH850_REG_MCTL = 68 + RH850_REG_PID = 69 + RH850_REG_SCCFG = 75 + RH850_REG_SCBP = 76 + +// RH850 system registers, selection ID 2 + RH850_REG_HTCFG0 = 96 + RH850_REG_MEA = 102 + RH850_REG_ASID = 103 + RH850_REG_MEI = 104 + RH850_REG_PC = 288 + RH850_REG_ENDING = 289 + +// RH8509 Registers aliases. + + RH850_REG_ZERO = 0 + RH850_REG_SP = 3 + RH850_REG_EP = 30 + RH850_REG_LP = 31 +) \ No newline at end of file diff --git a/bindings/go/unicorn/unicorn_const.go b/bindings/go/unicorn/unicorn_const.go index f005f652dd..06e445b7b6 100644 --- a/bindings/go/unicorn/unicorn_const.go +++ b/bindings/go/unicorn/unicorn_const.go @@ -21,7 +21,9 @@ const ( ARCH_RISCV = 8 ARCH_S390X = 9 ARCH_TRICORE = 10 - ARCH_MAX = 11 + ARCH_AVR = 11 + ARCH_RH850 = 12 + ARCH_MAX = 13 MODE_LITTLE_ENDIAN = 0 MODE_BIG_ENDIAN = 1073741824 @@ -48,6 +50,7 @@ const ( MODE_SPARC32 = 4 MODE_SPARC64 = 8 MODE_V9 = 16 + MODE_RH850 = 4 MODE_RISCV32 = 4 MODE_RISCV64 = 8 @@ -149,4 +152,4 @@ const ( PROT_ALL = 7 CTL_CONTEXT_CPU = 1 CTL_CONTEXT_MEMORY = 2 -) \ No newline at end of file +) diff --git a/bindings/java/src/main/java/unicorn/AVRConst.java b/bindings/java/src/main/java/unicorn/AVRConst.java new file mode 100644 index 0000000000..066fd97774 --- /dev/null +++ b/bindings/java/src/main/java/unicorn/AVRConst.java @@ -0,0 +1,153 @@ +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +package unicorn; + +public interface AVRConst { + + // AVR architectures + public static final int UC_AVR_ARCH_AVR1 = 10; + public static final int UC_AVR_ARCH_AVR2 = 20; + public static final int UC_AVR_ARCH_AVR25 = 25; + public static final int UC_AVR_ARCH_AVR3 = 30; + public static final int UC_AVR_ARCH_AVR4 = 40; + public static final int UC_AVR_ARCH_AVR5 = 50; + public static final int UC_AVR_ARCH_AVR51 = 51; + public static final int UC_AVR_ARCH_AVR6 = 60; + public static final int UC_CPU_AVR_ARCH = 1000; + + // AVR CPU + public static final int UC_CPU_AVR_ATMEGA16 = 50016; + public static final int UC_CPU_AVR_ATMEGA32 = 50032; + public static final int UC_CPU_AVR_ATMEGA64 = 50064; + public static final int UC_CPU_AVR_ATMEGA128 = 51128; + public static final int UC_CPU_AVR_ATMEGA128RFR2 = 51129; + public static final int UC_CPU_AVR_ATMEGA1280 = 51130; + public static final int UC_CPU_AVR_ATMEGA256 = 60256; + public static final int UC_CPU_AVR_ATMEGA256RFR2 = 60257; + public static final int UC_CPU_AVR_ATMEGA2560 = 60258; + + // AVR memory + public static final int UC_AVR_MEM_FLASH = 134217728; + + // AVR registers + + public static final int UC_AVR_REG_INVALID = 0; + public static final int UC_AVR_REG_R0 = 1; + public static final int UC_AVR_REG_R1 = 2; + public static final int UC_AVR_REG_R2 = 3; + public static final int UC_AVR_REG_R3 = 4; + public static final int UC_AVR_REG_R4 = 5; + public static final int UC_AVR_REG_R5 = 6; + public static final int UC_AVR_REG_R6 = 7; + public static final int UC_AVR_REG_R7 = 8; + public static final int UC_AVR_REG_R8 = 9; + public static final int UC_AVR_REG_R9 = 10; + public static final int UC_AVR_REG_R10 = 11; + public static final int UC_AVR_REG_R11 = 12; + public static final int UC_AVR_REG_R12 = 13; + public static final int UC_AVR_REG_R13 = 14; + public static final int UC_AVR_REG_R14 = 15; + public static final int UC_AVR_REG_R15 = 16; + public static final int UC_AVR_REG_R16 = 17; + public static final int UC_AVR_REG_R17 = 18; + public static final int UC_AVR_REG_R18 = 19; + public static final int UC_AVR_REG_R19 = 20; + public static final int UC_AVR_REG_R20 = 21; + public static final int UC_AVR_REG_R21 = 22; + public static final int UC_AVR_REG_R22 = 23; + public static final int UC_AVR_REG_R23 = 24; + public static final int UC_AVR_REG_R24 = 25; + public static final int UC_AVR_REG_R25 = 26; + public static final int UC_AVR_REG_R26 = 27; + public static final int UC_AVR_REG_R27 = 28; + public static final int UC_AVR_REG_R28 = 29; + public static final int UC_AVR_REG_R29 = 30; + public static final int UC_AVR_REG_R30 = 31; + public static final int UC_AVR_REG_R31 = 32; + public static final int UC_AVR_REG_PC = 33; + public static final int UC_AVR_REG_SP = 34; + public static final int UC_AVR_REG_RAMPD = 57; + public static final int UC_AVR_REG_RAMPX = 58; + public static final int UC_AVR_REG_RAMPY = 59; + public static final int UC_AVR_REG_RAMPZ = 60; + public static final int UC_AVR_REG_EIND = 61; + public static final int UC_AVR_REG_SPL = 62; + public static final int UC_AVR_REG_SPH = 63; + public static final int UC_AVR_REG_SREG = 64; + + // 16-bit coalesced registers + public static final int UC_AVR_REG_R0W = 65; + public static final int UC_AVR_REG_R1W = 66; + public static final int UC_AVR_REG_R2W = 67; + public static final int UC_AVR_REG_R3W = 68; + public static final int UC_AVR_REG_R4W = 69; + public static final int UC_AVR_REG_R5W = 70; + public static final int UC_AVR_REG_R6W = 71; + public static final int UC_AVR_REG_R7W = 72; + public static final int UC_AVR_REG_R8W = 73; + public static final int UC_AVR_REG_R9W = 74; + public static final int UC_AVR_REG_R10W = 75; + public static final int UC_AVR_REG_R11W = 76; + public static final int UC_AVR_REG_R12W = 77; + public static final int UC_AVR_REG_R13W = 78; + public static final int UC_AVR_REG_R14W = 79; + public static final int UC_AVR_REG_R15W = 80; + public static final int UC_AVR_REG_R16W = 81; + public static final int UC_AVR_REG_R17W = 82; + public static final int UC_AVR_REG_R18W = 83; + public static final int UC_AVR_REG_R19W = 84; + public static final int UC_AVR_REG_R20W = 85; + public static final int UC_AVR_REG_R21W = 86; + public static final int UC_AVR_REG_R22W = 87; + public static final int UC_AVR_REG_R23W = 88; + public static final int UC_AVR_REG_R24W = 89; + public static final int UC_AVR_REG_R25W = 90; + public static final int UC_AVR_REG_R26W = 91; + public static final int UC_AVR_REG_R27W = 92; + public static final int UC_AVR_REG_R28W = 93; + public static final int UC_AVR_REG_R29W = 94; + public static final int UC_AVR_REG_R30W = 95; + + // 32-bit coalesced registers + public static final int UC_AVR_REG_R0D = 97; + public static final int UC_AVR_REG_R1D = 98; + public static final int UC_AVR_REG_R2D = 99; + public static final int UC_AVR_REG_R3D = 100; + public static final int UC_AVR_REG_R4D = 101; + public static final int UC_AVR_REG_R5D = 102; + public static final int UC_AVR_REG_R6D = 103; + public static final int UC_AVR_REG_R7D = 104; + public static final int UC_AVR_REG_R8D = 105; + public static final int UC_AVR_REG_R9D = 106; + public static final int UC_AVR_REG_R10D = 107; + public static final int UC_AVR_REG_R11D = 108; + public static final int UC_AVR_REG_R12D = 109; + public static final int UC_AVR_REG_R13D = 110; + public static final int UC_AVR_REG_R14D = 111; + public static final int UC_AVR_REG_R15D = 112; + public static final int UC_AVR_REG_R16D = 113; + public static final int UC_AVR_REG_R17D = 114; + public static final int UC_AVR_REG_R18D = 115; + public static final int UC_AVR_REG_R19D = 116; + public static final int UC_AVR_REG_R20D = 117; + public static final int UC_AVR_REG_R21D = 118; + public static final int UC_AVR_REG_R22D = 119; + public static final int UC_AVR_REG_R23D = 120; + public static final int UC_AVR_REG_R24D = 121; + public static final int UC_AVR_REG_R25D = 122; + public static final int UC_AVR_REG_R26D = 123; + public static final int UC_AVR_REG_R27D = 124; + public static final int UC_AVR_REG_R28D = 125; + + // Alias registers + public static final int UC_AVR_REG_Xhi = 28; + public static final int UC_AVR_REG_Xlo = 27; + public static final int UC_AVR_REG_Yhi = 30; + public static final int UC_AVR_REG_Ylo = 29; + public static final int UC_AVR_REG_Zhi = 32; + public static final int UC_AVR_REG_Zlo = 31; + public static final int UC_AVR_REG_X = 91; + public static final int UC_AVR_REG_Y = 93; + public static final int UC_AVR_REG_Z = 95; + +} diff --git a/bindings/java/src/main/java/unicorn/Rh850Const.java b/bindings/java/src/main/java/unicorn/Rh850Const.java new file mode 100644 index 0000000000..098eea41b5 --- /dev/null +++ b/bindings/java/src/main/java/unicorn/Rh850Const.java @@ -0,0 +1,97 @@ +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +package unicorn; + +public interface Rh850Const { + public static final int UC_RH850_SYSREG_SELID0 = 32; + public static final int UC_RH850_SYSREG_SELID1 = 64; + public static final int UC_RH850_SYSREG_SELID2 = 96; + public static final int UC_RH850_SYSREG_SELID3 = 128; + public static final int UC_RH850_SYSREG_SELID4 = 160; + public static final int UC_RH850_SYSREG_SELID5 = 192; + public static final int UC_RH850_SYSREG_SELID6 = 224; + public static final int UC_RH850_SYSREG_SELID7 = 256; + +// RH850 global purpose registers + + public static final int UC_RH850_REG_R0 = 0; + public static final int UC_RH850_REG_R1 = 1; + public static final int UC_RH850_REG_R2 = 2; + public static final int UC_RH850_REG_R3 = 3; + public static final int UC_RH850_REG_R4 = 4; + public static final int UC_RH850_REG_R5 = 5; + public static final int UC_RH850_REG_R6 = 6; + public static final int UC_RH850_REG_R7 = 7; + public static final int UC_RH850_REG_R8 = 8; + public static final int UC_RH850_REG_R9 = 9; + public static final int UC_RH850_REG_R10 = 10; + public static final int UC_RH850_REG_R11 = 11; + public static final int UC_RH850_REG_R12 = 12; + public static final int UC_RH850_REG_R13 = 13; + public static final int UC_RH850_REG_R14 = 14; + public static final int UC_RH850_REG_R15 = 15; + public static final int UC_RH850_REG_R16 = 16; + public static final int UC_RH850_REG_R17 = 17; + public static final int UC_RH850_REG_R18 = 18; + public static final int UC_RH850_REG_R19 = 19; + public static final int UC_RH850_REG_R20 = 20; + public static final int UC_RH850_REG_R21 = 21; + public static final int UC_RH850_REG_R22 = 22; + public static final int UC_RH850_REG_R23 = 23; + public static final int UC_RH850_REG_R24 = 24; + public static final int UC_RH850_REG_R25 = 25; + public static final int UC_RH850_REG_R26 = 26; + public static final int UC_RH850_REG_R27 = 27; + public static final int UC_RH850_REG_R28 = 28; + public static final int UC_RH850_REG_R29 = 29; + public static final int UC_RH850_REG_R30 = 30; + public static final int UC_RH850_REG_R31 = 31; + +// RH850 system registers, selection ID 0 + public static final int UC_RH850_REG_EIPC = 32; + public static final int UC_RH850_REG_EIPSW = 33; + public static final int UC_RH850_REG_FEPC = 34; + public static final int UC_RH850_REG_FEPSW = 35; + public static final int UC_RH850_REG_ECR = 36; + public static final int UC_RH850_REG_PSW = 37; + public static final int UC_RH850_REG_FPSR = 38; + public static final int UC_RH850_REG_FPEPC = 39; + public static final int UC_RH850_REG_FPST = 40; + public static final int UC_RH850_REG_FPCC = 41; + public static final int UC_RH850_REG_FPCFG = 42; + public static final int UC_RH850_REG_FPEC = 43; + public static final int UC_RH850_REG_EIIC = 45; + public static final int UC_RH850_REG_FEIC = 46; + public static final int UC_RH850_REG_CTPC = 48; + public static final int UC_RH850_REG_CTPSW = 49; + public static final int UC_RH850_REG_CTBP = 52; + public static final int UC_RH850_REG_EIWR = 60; + public static final int UC_RH850_REG_FEWR = 61; + public static final int UC_RH850_REG_BSEL = 63; + +// RH850 system regusters, selection ID 1 + public static final int UC_RH850_REG_MCFG0 = 64; + public static final int UC_RH850_REG_RBASE = 65; + public static final int UC_RH850_REG_EBASE = 66; + public static final int UC_RH850_REG_INTBP = 67; + public static final int UC_RH850_REG_MCTL = 68; + public static final int UC_RH850_REG_PID = 69; + public static final int UC_RH850_REG_SCCFG = 75; + public static final int UC_RH850_REG_SCBP = 76; + +// RH850 system registers, selection ID 2 + public static final int UC_RH850_REG_HTCFG0 = 96; + public static final int UC_RH850_REG_MEA = 102; + public static final int UC_RH850_REG_ASID = 103; + public static final int UC_RH850_REG_MEI = 104; + public static final int UC_RH850_REG_PC = 288; + public static final int UC_RH850_REG_ENDING = 289; + +// RH8509 Registers aliases. + + public static final int UC_RH850_REG_ZERO = 0; + public static final int UC_RH850_REG_SP = 3; + public static final int UC_RH850_REG_EP = 30; + public static final int UC_RH850_REG_LP = 31; + +} diff --git a/bindings/java/src/main/java/unicorn/UnicornConst.java b/bindings/java/src/main/java/unicorn/UnicornConst.java index 2fca78c336..fc06f9b5c2 100644 --- a/bindings/java/src/main/java/unicorn/UnicornConst.java +++ b/bindings/java/src/main/java/unicorn/UnicornConst.java @@ -23,7 +23,9 @@ public interface UnicornConst { public static final int UC_ARCH_RISCV = 8; public static final int UC_ARCH_S390X = 9; public static final int UC_ARCH_TRICORE = 10; - public static final int UC_ARCH_MAX = 11; + public static final int UC_ARCH_AVR = 11; + public static final int UC_ARCH_RH850 = 12; + public static final int UC_ARCH_MAX = 13; public static final int UC_MODE_LITTLE_ENDIAN = 0; public static final int UC_MODE_BIG_ENDIAN = 1073741824; @@ -50,6 +52,7 @@ public interface UnicornConst { public static final int UC_MODE_SPARC32 = 4; public static final int UC_MODE_SPARC64 = 8; public static final int UC_MODE_V9 = 16; + public static final int UC_MODE_RH850 = 4; public static final int UC_MODE_RISCV32 = 4; public static final int UC_MODE_RISCV64 = 8; diff --git a/bindings/pascal/unicorn/AVRConst.pas b/bindings/pascal/unicorn/AVRConst.pas new file mode 100644 index 0000000000..c607d54ac9 --- /dev/null +++ b/bindings/pascal/unicorn/AVRConst.pas @@ -0,0 +1,155 @@ +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +unit AVRConst; + +interface + +const +// AVR architectures + UC_AVR_ARCH_AVR1 = 10; + UC_AVR_ARCH_AVR2 = 20; + UC_AVR_ARCH_AVR25 = 25; + UC_AVR_ARCH_AVR3 = 30; + UC_AVR_ARCH_AVR4 = 40; + UC_AVR_ARCH_AVR5 = 50; + UC_AVR_ARCH_AVR51 = 51; + UC_AVR_ARCH_AVR6 = 60; + UC_CPU_AVR_ARCH = 1000; + +// AVR CPU + UC_CPU_AVR_ATMEGA16 = 50016; + UC_CPU_AVR_ATMEGA32 = 50032; + UC_CPU_AVR_ATMEGA64 = 50064; + UC_CPU_AVR_ATMEGA128 = 51128; + UC_CPU_AVR_ATMEGA128RFR2 = 51129; + UC_CPU_AVR_ATMEGA1280 = 51130; + UC_CPU_AVR_ATMEGA256 = 60256; + UC_CPU_AVR_ATMEGA256RFR2 = 60257; + UC_CPU_AVR_ATMEGA2560 = 60258; + +// AVR memory + UC_AVR_MEM_FLASH = 134217728; + +// AVR registers + + UC_AVR_REG_INVALID = 0; + UC_AVR_REG_R0 = 1; + UC_AVR_REG_R1 = 2; + UC_AVR_REG_R2 = 3; + UC_AVR_REG_R3 = 4; + UC_AVR_REG_R4 = 5; + UC_AVR_REG_R5 = 6; + UC_AVR_REG_R6 = 7; + UC_AVR_REG_R7 = 8; + UC_AVR_REG_R8 = 9; + UC_AVR_REG_R9 = 10; + UC_AVR_REG_R10 = 11; + UC_AVR_REG_R11 = 12; + UC_AVR_REG_R12 = 13; + UC_AVR_REG_R13 = 14; + UC_AVR_REG_R14 = 15; + UC_AVR_REG_R15 = 16; + UC_AVR_REG_R16 = 17; + UC_AVR_REG_R17 = 18; + UC_AVR_REG_R18 = 19; + UC_AVR_REG_R19 = 20; + UC_AVR_REG_R20 = 21; + UC_AVR_REG_R21 = 22; + UC_AVR_REG_R22 = 23; + UC_AVR_REG_R23 = 24; + UC_AVR_REG_R24 = 25; + UC_AVR_REG_R25 = 26; + UC_AVR_REG_R26 = 27; + UC_AVR_REG_R27 = 28; + UC_AVR_REG_R28 = 29; + UC_AVR_REG_R29 = 30; + UC_AVR_REG_R30 = 31; + UC_AVR_REG_R31 = 32; + UC_AVR_REG_PC = 33; + UC_AVR_REG_SP = 34; + UC_AVR_REG_RAMPD = 57; + UC_AVR_REG_RAMPX = 58; + UC_AVR_REG_RAMPY = 59; + UC_AVR_REG_RAMPZ = 60; + UC_AVR_REG_EIND = 61; + UC_AVR_REG_SPL = 62; + UC_AVR_REG_SPH = 63; + UC_AVR_REG_SREG = 64; + +// 16-bit coalesced registers + UC_AVR_REG_R0W = 65; + UC_AVR_REG_R1W = 66; + UC_AVR_REG_R2W = 67; + UC_AVR_REG_R3W = 68; + UC_AVR_REG_R4W = 69; + UC_AVR_REG_R5W = 70; + UC_AVR_REG_R6W = 71; + UC_AVR_REG_R7W = 72; + UC_AVR_REG_R8W = 73; + UC_AVR_REG_R9W = 74; + UC_AVR_REG_R10W = 75; + UC_AVR_REG_R11W = 76; + UC_AVR_REG_R12W = 77; + UC_AVR_REG_R13W = 78; + UC_AVR_REG_R14W = 79; + UC_AVR_REG_R15W = 80; + UC_AVR_REG_R16W = 81; + UC_AVR_REG_R17W = 82; + UC_AVR_REG_R18W = 83; + UC_AVR_REG_R19W = 84; + UC_AVR_REG_R20W = 85; + UC_AVR_REG_R21W = 86; + UC_AVR_REG_R22W = 87; + UC_AVR_REG_R23W = 88; + UC_AVR_REG_R24W = 89; + UC_AVR_REG_R25W = 90; + UC_AVR_REG_R26W = 91; + UC_AVR_REG_R27W = 92; + UC_AVR_REG_R28W = 93; + UC_AVR_REG_R29W = 94; + UC_AVR_REG_R30W = 95; + +// 32-bit coalesced registers + UC_AVR_REG_R0D = 97; + UC_AVR_REG_R1D = 98; + UC_AVR_REG_R2D = 99; + UC_AVR_REG_R3D = 100; + UC_AVR_REG_R4D = 101; + UC_AVR_REG_R5D = 102; + UC_AVR_REG_R6D = 103; + UC_AVR_REG_R7D = 104; + UC_AVR_REG_R8D = 105; + UC_AVR_REG_R9D = 106; + UC_AVR_REG_R10D = 107; + UC_AVR_REG_R11D = 108; + UC_AVR_REG_R12D = 109; + UC_AVR_REG_R13D = 110; + UC_AVR_REG_R14D = 111; + UC_AVR_REG_R15D = 112; + UC_AVR_REG_R16D = 113; + UC_AVR_REG_R17D = 114; + UC_AVR_REG_R18D = 115; + UC_AVR_REG_R19D = 116; + UC_AVR_REG_R20D = 117; + UC_AVR_REG_R21D = 118; + UC_AVR_REG_R22D = 119; + UC_AVR_REG_R23D = 120; + UC_AVR_REG_R24D = 121; + UC_AVR_REG_R25D = 122; + UC_AVR_REG_R26D = 123; + UC_AVR_REG_R27D = 124; + UC_AVR_REG_R28D = 125; + +// Alias registers + UC_AVR_REG_Xhi = 28; + UC_AVR_REG_Xlo = 27; + UC_AVR_REG_Yhi = 30; + UC_AVR_REG_Ylo = 29; + UC_AVR_REG_Zhi = 32; + UC_AVR_REG_Zlo = 31; + UC_AVR_REG_X = 91; + UC_AVR_REG_Y = 93; + UC_AVR_REG_Z = 95; + +implementation +end. \ No newline at end of file diff --git a/bindings/pascal/unicorn/Rh850Const.pas b/bindings/pascal/unicorn/Rh850Const.pas new file mode 100644 index 0000000000..c405b3c464 --- /dev/null +++ b/bindings/pascal/unicorn/Rh850Const.pas @@ -0,0 +1,99 @@ +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +unit Rh850Const; + +interface + +const UC_RH850_SYSREG_SELID0 = 32; + UC_RH850_SYSREG_SELID1 = 64; + UC_RH850_SYSREG_SELID2 = 96; + UC_RH850_SYSREG_SELID3 = 128; + UC_RH850_SYSREG_SELID4 = 160; + UC_RH850_SYSREG_SELID5 = 192; + UC_RH850_SYSREG_SELID6 = 224; + UC_RH850_SYSREG_SELID7 = 256; + +// RH850 global purpose registers + + UC_RH850_REG_R0 = 0; + UC_RH850_REG_R1 = 1; + UC_RH850_REG_R2 = 2; + UC_RH850_REG_R3 = 3; + UC_RH850_REG_R4 = 4; + UC_RH850_REG_R5 = 5; + UC_RH850_REG_R6 = 6; + UC_RH850_REG_R7 = 7; + UC_RH850_REG_R8 = 8; + UC_RH850_REG_R9 = 9; + UC_RH850_REG_R10 = 10; + UC_RH850_REG_R11 = 11; + UC_RH850_REG_R12 = 12; + UC_RH850_REG_R13 = 13; + UC_RH850_REG_R14 = 14; + UC_RH850_REG_R15 = 15; + UC_RH850_REG_R16 = 16; + UC_RH850_REG_R17 = 17; + UC_RH850_REG_R18 = 18; + UC_RH850_REG_R19 = 19; + UC_RH850_REG_R20 = 20; + UC_RH850_REG_R21 = 21; + UC_RH850_REG_R22 = 22; + UC_RH850_REG_R23 = 23; + UC_RH850_REG_R24 = 24; + UC_RH850_REG_R25 = 25; + UC_RH850_REG_R26 = 26; + UC_RH850_REG_R27 = 27; + UC_RH850_REG_R28 = 28; + UC_RH850_REG_R29 = 29; + UC_RH850_REG_R30 = 30; + UC_RH850_REG_R31 = 31; + +// RH850 system registers, selection ID 0 + UC_RH850_REG_EIPC = 32; + UC_RH850_REG_EIPSW = 33; + UC_RH850_REG_FEPC = 34; + UC_RH850_REG_FEPSW = 35; + UC_RH850_REG_ECR = 36; + UC_RH850_REG_PSW = 37; + UC_RH850_REG_FPSR = 38; + UC_RH850_REG_FPEPC = 39; + UC_RH850_REG_FPST = 40; + UC_RH850_REG_FPCC = 41; + UC_RH850_REG_FPCFG = 42; + UC_RH850_REG_FPEC = 43; + UC_RH850_REG_EIIC = 45; + UC_RH850_REG_FEIC = 46; + UC_RH850_REG_CTPC = 48; + UC_RH850_REG_CTPSW = 49; + UC_RH850_REG_CTBP = 52; + UC_RH850_REG_EIWR = 60; + UC_RH850_REG_FEWR = 61; + UC_RH850_REG_BSEL = 63; + +// RH850 system regusters, selection ID 1 + UC_RH850_REG_MCFG0 = 64; + UC_RH850_REG_RBASE = 65; + UC_RH850_REG_EBASE = 66; + UC_RH850_REG_INTBP = 67; + UC_RH850_REG_MCTL = 68; + UC_RH850_REG_PID = 69; + UC_RH850_REG_SCCFG = 75; + UC_RH850_REG_SCBP = 76; + +// RH850 system registers, selection ID 2 + UC_RH850_REG_HTCFG0 = 96; + UC_RH850_REG_MEA = 102; + UC_RH850_REG_ASID = 103; + UC_RH850_REG_MEI = 104; + UC_RH850_REG_PC = 288; + UC_RH850_REG_ENDING = 289; + +// RH8509 Registers aliases. + + UC_RH850_REG_ZERO = 0; + UC_RH850_REG_SP = 3; + UC_RH850_REG_EP = 30; + UC_RH850_REG_LP = 31; + +implementation +end. \ No newline at end of file diff --git a/bindings/pascal/unicorn/UnicornConst.pas b/bindings/pascal/unicorn/UnicornConst.pas index 0716ec3e4e..ce19c20386 100644 --- a/bindings/pascal/unicorn/UnicornConst.pas +++ b/bindings/pascal/unicorn/UnicornConst.pas @@ -24,7 +24,9 @@ interface UC_ARCH_RISCV = 8; UC_ARCH_S390X = 9; UC_ARCH_TRICORE = 10; - UC_ARCH_MAX = 11; + UC_ARCH_AVR = 11; + UC_ARCH_RH850 = 12; + UC_ARCH_MAX = 13; UC_MODE_LITTLE_ENDIAN = 0; UC_MODE_BIG_ENDIAN = 1073741824; @@ -51,6 +53,7 @@ interface UC_MODE_SPARC32 = 4; UC_MODE_SPARC64 = 8; UC_MODE_V9 = 16; + UC_MODE_RH850 = 4; UC_MODE_RISCV32 = 4; UC_MODE_RISCV64 = 8; @@ -154,4 +157,4 @@ interface UC_CTL_CONTEXT_MEMORY = 2; implementation -end. \ No newline at end of file +end. diff --git a/bindings/python/sample_rh850.py b/bindings/python/sample_rh850.py new file mode 100644 index 0000000000..294d76ff2b --- /dev/null +++ b/bindings/python/sample_rh850.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# Sample code for RH850 of Unicorn. Damien Cauquil +# + +from __future__ import print_function +from unicorn import * +from unicorn.rh850_const import * + + +''' + 0 01 0e 06 addi 6, r1, r1 + 4 00 c1 11 add r1, r2 +''' +RH850_CODE = b"\x01\x0e\x06\x00\xc1\x11" + +# memory address where emulation starts +ADDRESS = 0x10000 + + +# callback for tracing basic blocks +def hook_block(uc, address, size, user_data): + print(">>> Tracing basic block at 0x%x, block size = 0x%x" %(address, size)) + + +# callback for tracing instructions +def hook_code(uc, address, size, user_data): + print(">>> Tracing instruction at 0x%x, instruction size = 0x%x" %(address, size)) + + +# Test RH850 +def test_rh850(): + print("Emulate RH850 code") + try: + # Initialize emulator in RISCV32 mode + mu = Uc(UC_ARCH_RH850, 0) + + # map 2MB memory for this emulation + mu.mem_map(ADDRESS, 2 * 1024 * 1024) + + # write machine code to be emulated to memory + mu.mem_write(ADDRESS, RH850_CODE) + + # initialize machine registers + mu.reg_write(UC_RH850_REG_R1, 0x1234) + mu.reg_write(UC_RH850_REG_R2, 0x7890) + + # tracing all basic blocks with customized callback + mu.hook_add(UC_HOOK_BLOCK, hook_block) + + # tracing all instructions with customized callback + mu.hook_add(UC_HOOK_CODE, hook_code) + + # emulate machine code in infinite time + mu.emu_start(ADDRESS, ADDRESS + len(RH850_CODE)) + + # now print out some registers + print(">>> Emulation done. Below is the CPU context") + + r1 = mu.reg_read(UC_RH850_REG_R1) + r2 = mu.reg_read(UC_RH850_REG_R2) + print(">>> R1 = 0x%x" % r1) + print(">>> R2 = 0x%x" % r2) + + except UcError as e: + print("ERROR: %s" % e) + + +if __name__ == '__main__': + test_rh850() + diff --git a/bindings/python/unicorn/__init__.py b/bindings/python/unicorn/__init__.py index a93e12a05c..04894a8e2b 100644 --- a/bindings/python/unicorn/__init__.py +++ b/bindings/python/unicorn/__init__.py @@ -1,4 +1,4 @@ # Forwarding defs for compatibility -from . import arm_const, arm64_const, mips_const, sparc_const, m68k_const, x86_const, riscv_const, s390x_const, tricore_const +from . import arm_const, arm64_const, avr_const, mips_const, sparc_const, m68k_const, x86_const, riscv_const, s390x_const, tricore_const from .unicorn_const import * from .unicorn import Uc, ucsubclass, uc_version, uc_arch_supported, version_bind, debug, UcError, __version__ diff --git a/bindings/python/unicorn/avr_const.py b/bindings/python/unicorn/avr_const.py new file mode 100644 index 0000000000..1bf80a3fa5 --- /dev/null +++ b/bindings/python/unicorn/avr_const.py @@ -0,0 +1,147 @@ +# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [avr_const.py] + +# AVR architectures +UC_AVR_ARCH_AVR1 = 10 +UC_AVR_ARCH_AVR2 = 20 +UC_AVR_ARCH_AVR25 = 25 +UC_AVR_ARCH_AVR3 = 30 +UC_AVR_ARCH_AVR4 = 40 +UC_AVR_ARCH_AVR5 = 50 +UC_AVR_ARCH_AVR51 = 51 +UC_AVR_ARCH_AVR6 = 60 +UC_CPU_AVR_ARCH = 1000 + +# AVR CPU +UC_CPU_AVR_ATMEGA16 = 50016 +UC_CPU_AVR_ATMEGA32 = 50032 +UC_CPU_AVR_ATMEGA64 = 50064 +UC_CPU_AVR_ATMEGA128 = 51128 +UC_CPU_AVR_ATMEGA128RFR2 = 51129 +UC_CPU_AVR_ATMEGA1280 = 51130 +UC_CPU_AVR_ATMEGA256 = 60256 +UC_CPU_AVR_ATMEGA256RFR2 = 60257 +UC_CPU_AVR_ATMEGA2560 = 60258 + +# AVR memory +UC_AVR_MEM_FLASH = 134217728 + +# AVR registers + +UC_AVR_REG_INVALID = 0 +UC_AVR_REG_R0 = 1 +UC_AVR_REG_R1 = 2 +UC_AVR_REG_R2 = 3 +UC_AVR_REG_R3 = 4 +UC_AVR_REG_R4 = 5 +UC_AVR_REG_R5 = 6 +UC_AVR_REG_R6 = 7 +UC_AVR_REG_R7 = 8 +UC_AVR_REG_R8 = 9 +UC_AVR_REG_R9 = 10 +UC_AVR_REG_R10 = 11 +UC_AVR_REG_R11 = 12 +UC_AVR_REG_R12 = 13 +UC_AVR_REG_R13 = 14 +UC_AVR_REG_R14 = 15 +UC_AVR_REG_R15 = 16 +UC_AVR_REG_R16 = 17 +UC_AVR_REG_R17 = 18 +UC_AVR_REG_R18 = 19 +UC_AVR_REG_R19 = 20 +UC_AVR_REG_R20 = 21 +UC_AVR_REG_R21 = 22 +UC_AVR_REG_R22 = 23 +UC_AVR_REG_R23 = 24 +UC_AVR_REG_R24 = 25 +UC_AVR_REG_R25 = 26 +UC_AVR_REG_R26 = 27 +UC_AVR_REG_R27 = 28 +UC_AVR_REG_R28 = 29 +UC_AVR_REG_R29 = 30 +UC_AVR_REG_R30 = 31 +UC_AVR_REG_R31 = 32 +UC_AVR_REG_PC = 33 +UC_AVR_REG_SP = 34 +UC_AVR_REG_RAMPD = 57 +UC_AVR_REG_RAMPX = 58 +UC_AVR_REG_RAMPY = 59 +UC_AVR_REG_RAMPZ = 60 +UC_AVR_REG_EIND = 61 +UC_AVR_REG_SPL = 62 +UC_AVR_REG_SPH = 63 +UC_AVR_REG_SREG = 64 + +# 16-bit coalesced registers +UC_AVR_REG_R0W = 65 +UC_AVR_REG_R1W = 66 +UC_AVR_REG_R2W = 67 +UC_AVR_REG_R3W = 68 +UC_AVR_REG_R4W = 69 +UC_AVR_REG_R5W = 70 +UC_AVR_REG_R6W = 71 +UC_AVR_REG_R7W = 72 +UC_AVR_REG_R8W = 73 +UC_AVR_REG_R9W = 74 +UC_AVR_REG_R10W = 75 +UC_AVR_REG_R11W = 76 +UC_AVR_REG_R12W = 77 +UC_AVR_REG_R13W = 78 +UC_AVR_REG_R14W = 79 +UC_AVR_REG_R15W = 80 +UC_AVR_REG_R16W = 81 +UC_AVR_REG_R17W = 82 +UC_AVR_REG_R18W = 83 +UC_AVR_REG_R19W = 84 +UC_AVR_REG_R20W = 85 +UC_AVR_REG_R21W = 86 +UC_AVR_REG_R22W = 87 +UC_AVR_REG_R23W = 88 +UC_AVR_REG_R24W = 89 +UC_AVR_REG_R25W = 90 +UC_AVR_REG_R26W = 91 +UC_AVR_REG_R27W = 92 +UC_AVR_REG_R28W = 93 +UC_AVR_REG_R29W = 94 +UC_AVR_REG_R30W = 95 + +# 32-bit coalesced registers +UC_AVR_REG_R0D = 97 +UC_AVR_REG_R1D = 98 +UC_AVR_REG_R2D = 99 +UC_AVR_REG_R3D = 100 +UC_AVR_REG_R4D = 101 +UC_AVR_REG_R5D = 102 +UC_AVR_REG_R6D = 103 +UC_AVR_REG_R7D = 104 +UC_AVR_REG_R8D = 105 +UC_AVR_REG_R9D = 106 +UC_AVR_REG_R10D = 107 +UC_AVR_REG_R11D = 108 +UC_AVR_REG_R12D = 109 +UC_AVR_REG_R13D = 110 +UC_AVR_REG_R14D = 111 +UC_AVR_REG_R15D = 112 +UC_AVR_REG_R16D = 113 +UC_AVR_REG_R17D = 114 +UC_AVR_REG_R18D = 115 +UC_AVR_REG_R19D = 116 +UC_AVR_REG_R20D = 117 +UC_AVR_REG_R21D = 118 +UC_AVR_REG_R22D = 119 +UC_AVR_REG_R23D = 120 +UC_AVR_REG_R24D = 121 +UC_AVR_REG_R25D = 122 +UC_AVR_REG_R26D = 123 +UC_AVR_REG_R27D = 124 +UC_AVR_REG_R28D = 125 + +# Alias registers +UC_AVR_REG_Xhi = 28 +UC_AVR_REG_Xlo = 27 +UC_AVR_REG_Yhi = 30 +UC_AVR_REG_Ylo = 29 +UC_AVR_REG_Zhi = 32 +UC_AVR_REG_Zlo = 31 +UC_AVR_REG_X = 91 +UC_AVR_REG_Y = 93 +UC_AVR_REG_Z = 95 diff --git a/bindings/python/unicorn/rh850_const.py b/bindings/python/unicorn/rh850_const.py new file mode 100644 index 0000000000..6985d85e4f --- /dev/null +++ b/bindings/python/unicorn/rh850_const.py @@ -0,0 +1,91 @@ +# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [rh850_const.py] +UC_RH850_SYSREG_SELID0 = 32 +UC_RH850_SYSREG_SELID1 = 64 +UC_RH850_SYSREG_SELID2 = 96 +UC_RH850_SYSREG_SELID3 = 128 +UC_RH850_SYSREG_SELID4 = 160 +UC_RH850_SYSREG_SELID5 = 192 +UC_RH850_SYSREG_SELID6 = 224 +UC_RH850_SYSREG_SELID7 = 256 + +# RH850 global purpose registers + +UC_RH850_REG_R0 = 0 +UC_RH850_REG_R1 = 1 +UC_RH850_REG_R2 = 2 +UC_RH850_REG_R3 = 3 +UC_RH850_REG_R4 = 4 +UC_RH850_REG_R5 = 5 +UC_RH850_REG_R6 = 6 +UC_RH850_REG_R7 = 7 +UC_RH850_REG_R8 = 8 +UC_RH850_REG_R9 = 9 +UC_RH850_REG_R10 = 10 +UC_RH850_REG_R11 = 11 +UC_RH850_REG_R12 = 12 +UC_RH850_REG_R13 = 13 +UC_RH850_REG_R14 = 14 +UC_RH850_REG_R15 = 15 +UC_RH850_REG_R16 = 16 +UC_RH850_REG_R17 = 17 +UC_RH850_REG_R18 = 18 +UC_RH850_REG_R19 = 19 +UC_RH850_REG_R20 = 20 +UC_RH850_REG_R21 = 21 +UC_RH850_REG_R22 = 22 +UC_RH850_REG_R23 = 23 +UC_RH850_REG_R24 = 24 +UC_RH850_REG_R25 = 25 +UC_RH850_REG_R26 = 26 +UC_RH850_REG_R27 = 27 +UC_RH850_REG_R28 = 28 +UC_RH850_REG_R29 = 29 +UC_RH850_REG_R30 = 30 +UC_RH850_REG_R31 = 31 + +# RH850 system registers, selection ID 0 +UC_RH850_REG_EIPC = 32 +UC_RH850_REG_EIPSW = 33 +UC_RH850_REG_FEPC = 34 +UC_RH850_REG_FEPSW = 35 +UC_RH850_REG_ECR = 36 +UC_RH850_REG_PSW = 37 +UC_RH850_REG_FPSR = 38 +UC_RH850_REG_FPEPC = 39 +UC_RH850_REG_FPST = 40 +UC_RH850_REG_FPCC = 41 +UC_RH850_REG_FPCFG = 42 +UC_RH850_REG_FPEC = 43 +UC_RH850_REG_EIIC = 45 +UC_RH850_REG_FEIC = 46 +UC_RH850_REG_CTPC = 48 +UC_RH850_REG_CTPSW = 49 +UC_RH850_REG_CTBP = 52 +UC_RH850_REG_EIWR = 60 +UC_RH850_REG_FEWR = 61 +UC_RH850_REG_BSEL = 63 + +# RH850 system regusters, selection ID 1 +UC_RH850_REG_MCFG0 = 64 +UC_RH850_REG_RBASE = 65 +UC_RH850_REG_EBASE = 66 +UC_RH850_REG_INTBP = 67 +UC_RH850_REG_MCTL = 68 +UC_RH850_REG_PID = 69 +UC_RH850_REG_SCCFG = 75 +UC_RH850_REG_SCBP = 76 + +# RH850 system registers, selection ID 2 +UC_RH850_REG_HTCFG0 = 96 +UC_RH850_REG_MEA = 102 +UC_RH850_REG_ASID = 103 +UC_RH850_REG_MEI = 104 +UC_RH850_REG_PC = 288 +UC_RH850_REG_ENDING = 289 + +# RH8509 Registers aliases. + +UC_RH850_REG_ZERO = 0 +UC_RH850_REG_SP = 3 +UC_RH850_REG_EP = 30 +UC_RH850_REG_LP = 31 diff --git a/bindings/python/unicorn/unicorn_const.py b/bindings/python/unicorn/unicorn_const.py index 8fab22415b..d3d72aa3a2 100644 --- a/bindings/python/unicorn/unicorn_const.py +++ b/bindings/python/unicorn/unicorn_const.py @@ -19,7 +19,9 @@ UC_ARCH_RISCV = 8 UC_ARCH_S390X = 9 UC_ARCH_TRICORE = 10 -UC_ARCH_MAX = 11 +UC_ARCH_AVR = 11 +UC_ARCH_RH850 = 12 +UC_ARCH_MAX = 13 UC_MODE_LITTLE_ENDIAN = 0 UC_MODE_BIG_ENDIAN = 1073741824 @@ -46,6 +48,7 @@ UC_MODE_SPARC32 = 4 UC_MODE_SPARC64 = 8 UC_MODE_V9 = 16 +UC_MODE_RH850 = 4 UC_MODE_RISCV32 = 4 UC_MODE_RISCV64 = 8 diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/avr_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/avr_const.rb new file mode 100644 index 0000000000..126ebd0c8f --- /dev/null +++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/avr_const.rb @@ -0,0 +1,150 @@ +# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [avr_const.rb] + +module UnicornEngine + +# AVR architectures + UC_AVR_ARCH_AVR1 = 10 + UC_AVR_ARCH_AVR2 = 20 + UC_AVR_ARCH_AVR25 = 25 + UC_AVR_ARCH_AVR3 = 30 + UC_AVR_ARCH_AVR4 = 40 + UC_AVR_ARCH_AVR5 = 50 + UC_AVR_ARCH_AVR51 = 51 + UC_AVR_ARCH_AVR6 = 60 + UC_CPU_AVR_ARCH = 1000 + +# AVR CPU + UC_CPU_AVR_ATMEGA16 = 50016 + UC_CPU_AVR_ATMEGA32 = 50032 + UC_CPU_AVR_ATMEGA64 = 50064 + UC_CPU_AVR_ATMEGA128 = 51128 + UC_CPU_AVR_ATMEGA128RFR2 = 51129 + UC_CPU_AVR_ATMEGA1280 = 51130 + UC_CPU_AVR_ATMEGA256 = 60256 + UC_CPU_AVR_ATMEGA256RFR2 = 60257 + UC_CPU_AVR_ATMEGA2560 = 60258 + +# AVR memory + UC_AVR_MEM_FLASH = 134217728 + +# AVR registers + + UC_AVR_REG_INVALID = 0 + UC_AVR_REG_R0 = 1 + UC_AVR_REG_R1 = 2 + UC_AVR_REG_R2 = 3 + UC_AVR_REG_R3 = 4 + UC_AVR_REG_R4 = 5 + UC_AVR_REG_R5 = 6 + UC_AVR_REG_R6 = 7 + UC_AVR_REG_R7 = 8 + UC_AVR_REG_R8 = 9 + UC_AVR_REG_R9 = 10 + UC_AVR_REG_R10 = 11 + UC_AVR_REG_R11 = 12 + UC_AVR_REG_R12 = 13 + UC_AVR_REG_R13 = 14 + UC_AVR_REG_R14 = 15 + UC_AVR_REG_R15 = 16 + UC_AVR_REG_R16 = 17 + UC_AVR_REG_R17 = 18 + UC_AVR_REG_R18 = 19 + UC_AVR_REG_R19 = 20 + UC_AVR_REG_R20 = 21 + UC_AVR_REG_R21 = 22 + UC_AVR_REG_R22 = 23 + UC_AVR_REG_R23 = 24 + UC_AVR_REG_R24 = 25 + UC_AVR_REG_R25 = 26 + UC_AVR_REG_R26 = 27 + UC_AVR_REG_R27 = 28 + UC_AVR_REG_R28 = 29 + UC_AVR_REG_R29 = 30 + UC_AVR_REG_R30 = 31 + UC_AVR_REG_R31 = 32 + UC_AVR_REG_PC = 33 + UC_AVR_REG_SP = 34 + UC_AVR_REG_RAMPD = 57 + UC_AVR_REG_RAMPX = 58 + UC_AVR_REG_RAMPY = 59 + UC_AVR_REG_RAMPZ = 60 + UC_AVR_REG_EIND = 61 + UC_AVR_REG_SPL = 62 + UC_AVR_REG_SPH = 63 + UC_AVR_REG_SREG = 64 + +# 16-bit coalesced registers + UC_AVR_REG_R0W = 65 + UC_AVR_REG_R1W = 66 + UC_AVR_REG_R2W = 67 + UC_AVR_REG_R3W = 68 + UC_AVR_REG_R4W = 69 + UC_AVR_REG_R5W = 70 + UC_AVR_REG_R6W = 71 + UC_AVR_REG_R7W = 72 + UC_AVR_REG_R8W = 73 + UC_AVR_REG_R9W = 74 + UC_AVR_REG_R10W = 75 + UC_AVR_REG_R11W = 76 + UC_AVR_REG_R12W = 77 + UC_AVR_REG_R13W = 78 + UC_AVR_REG_R14W = 79 + UC_AVR_REG_R15W = 80 + UC_AVR_REG_R16W = 81 + UC_AVR_REG_R17W = 82 + UC_AVR_REG_R18W = 83 + UC_AVR_REG_R19W = 84 + UC_AVR_REG_R20W = 85 + UC_AVR_REG_R21W = 86 + UC_AVR_REG_R22W = 87 + UC_AVR_REG_R23W = 88 + UC_AVR_REG_R24W = 89 + UC_AVR_REG_R25W = 90 + UC_AVR_REG_R26W = 91 + UC_AVR_REG_R27W = 92 + UC_AVR_REG_R28W = 93 + UC_AVR_REG_R29W = 94 + UC_AVR_REG_R30W = 95 + +# 32-bit coalesced registers + UC_AVR_REG_R0D = 97 + UC_AVR_REG_R1D = 98 + UC_AVR_REG_R2D = 99 + UC_AVR_REG_R3D = 100 + UC_AVR_REG_R4D = 101 + UC_AVR_REG_R5D = 102 + UC_AVR_REG_R6D = 103 + UC_AVR_REG_R7D = 104 + UC_AVR_REG_R8D = 105 + UC_AVR_REG_R9D = 106 + UC_AVR_REG_R10D = 107 + UC_AVR_REG_R11D = 108 + UC_AVR_REG_R12D = 109 + UC_AVR_REG_R13D = 110 + UC_AVR_REG_R14D = 111 + UC_AVR_REG_R15D = 112 + UC_AVR_REG_R16D = 113 + UC_AVR_REG_R17D = 114 + UC_AVR_REG_R18D = 115 + UC_AVR_REG_R19D = 116 + UC_AVR_REG_R20D = 117 + UC_AVR_REG_R21D = 118 + UC_AVR_REG_R22D = 119 + UC_AVR_REG_R23D = 120 + UC_AVR_REG_R24D = 121 + UC_AVR_REG_R25D = 122 + UC_AVR_REG_R26D = 123 + UC_AVR_REG_R27D = 124 + UC_AVR_REG_R28D = 125 + +# Alias registers + UC_AVR_REG_Xhi = 28 + UC_AVR_REG_Xlo = 27 + UC_AVR_REG_Yhi = 30 + UC_AVR_REG_Ylo = 29 + UC_AVR_REG_Zhi = 32 + UC_AVR_REG_Zlo = 31 + UC_AVR_REG_X = 91 + UC_AVR_REG_Y = 93 + UC_AVR_REG_Z = 95 +end \ No newline at end of file diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/rh850_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/rh850_const.rb new file mode 100644 index 0000000000..40629b9883 --- /dev/null +++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/rh850_const.rb @@ -0,0 +1,94 @@ +# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [rh850_const.rb] + +module UnicornEngine + UC_RH850_SYSREG_SELID0 = 32 + UC_RH850_SYSREG_SELID1 = 64 + UC_RH850_SYSREG_SELID2 = 96 + UC_RH850_SYSREG_SELID3 = 128 + UC_RH850_SYSREG_SELID4 = 160 + UC_RH850_SYSREG_SELID5 = 192 + UC_RH850_SYSREG_SELID6 = 224 + UC_RH850_SYSREG_SELID7 = 256 + +# RH850 global purpose registers + + UC_RH850_REG_R0 = 0 + UC_RH850_REG_R1 = 1 + UC_RH850_REG_R2 = 2 + UC_RH850_REG_R3 = 3 + UC_RH850_REG_R4 = 4 + UC_RH850_REG_R5 = 5 + UC_RH850_REG_R6 = 6 + UC_RH850_REG_R7 = 7 + UC_RH850_REG_R8 = 8 + UC_RH850_REG_R9 = 9 + UC_RH850_REG_R10 = 10 + UC_RH850_REG_R11 = 11 + UC_RH850_REG_R12 = 12 + UC_RH850_REG_R13 = 13 + UC_RH850_REG_R14 = 14 + UC_RH850_REG_R15 = 15 + UC_RH850_REG_R16 = 16 + UC_RH850_REG_R17 = 17 + UC_RH850_REG_R18 = 18 + UC_RH850_REG_R19 = 19 + UC_RH850_REG_R20 = 20 + UC_RH850_REG_R21 = 21 + UC_RH850_REG_R22 = 22 + UC_RH850_REG_R23 = 23 + UC_RH850_REG_R24 = 24 + UC_RH850_REG_R25 = 25 + UC_RH850_REG_R26 = 26 + UC_RH850_REG_R27 = 27 + UC_RH850_REG_R28 = 28 + UC_RH850_REG_R29 = 29 + UC_RH850_REG_R30 = 30 + UC_RH850_REG_R31 = 31 + +# RH850 system registers, selection ID 0 + UC_RH850_REG_EIPC = 32 + UC_RH850_REG_EIPSW = 33 + UC_RH850_REG_FEPC = 34 + UC_RH850_REG_FEPSW = 35 + UC_RH850_REG_ECR = 36 + UC_RH850_REG_PSW = 37 + UC_RH850_REG_FPSR = 38 + UC_RH850_REG_FPEPC = 39 + UC_RH850_REG_FPST = 40 + UC_RH850_REG_FPCC = 41 + UC_RH850_REG_FPCFG = 42 + UC_RH850_REG_FPEC = 43 + UC_RH850_REG_EIIC = 45 + UC_RH850_REG_FEIC = 46 + UC_RH850_REG_CTPC = 48 + UC_RH850_REG_CTPSW = 49 + UC_RH850_REG_CTBP = 52 + UC_RH850_REG_EIWR = 60 + UC_RH850_REG_FEWR = 61 + UC_RH850_REG_BSEL = 63 + +# RH850 system regusters, selection ID 1 + UC_RH850_REG_MCFG0 = 64 + UC_RH850_REG_RBASE = 65 + UC_RH850_REG_EBASE = 66 + UC_RH850_REG_INTBP = 67 + UC_RH850_REG_MCTL = 68 + UC_RH850_REG_PID = 69 + UC_RH850_REG_SCCFG = 75 + UC_RH850_REG_SCBP = 76 + +# RH850 system registers, selection ID 2 + UC_RH850_REG_HTCFG0 = 96 + UC_RH850_REG_MEA = 102 + UC_RH850_REG_ASID = 103 + UC_RH850_REG_MEI = 104 + UC_RH850_REG_PC = 288 + UC_RH850_REG_ENDING = 289 + +# RH8509 Registers aliases. + + UC_RH850_REG_ZERO = 0 + UC_RH850_REG_SP = 3 + UC_RH850_REG_EP = 30 + UC_RH850_REG_LP = 31 +end \ No newline at end of file diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/unicorn_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/unicorn_const.rb index c573a6f7c0..2f0a859740 100644 --- a/bindings/ruby/unicorn_gem/lib/unicorn_engine/unicorn_const.rb +++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/unicorn_const.rb @@ -21,7 +21,9 @@ module UnicornEngine UC_ARCH_RISCV = 8 UC_ARCH_S390X = 9 UC_ARCH_TRICORE = 10 - UC_ARCH_MAX = 11 + UC_ARCH_AVR = 11 + UC_ARCH_RH850 = 12 + UC_ARCH_MAX = 13 UC_MODE_LITTLE_ENDIAN = 0 UC_MODE_BIG_ENDIAN = 1073741824 @@ -48,6 +50,7 @@ module UnicornEngine UC_MODE_SPARC32 = 4 UC_MODE_SPARC64 = 8 UC_MODE_V9 = 16 + UC_MODE_RH850 = 4 UC_MODE_RISCV32 = 4 UC_MODE_RISCV64 = 8 @@ -149,4 +152,4 @@ module UnicornEngine UC_PROT_ALL = 7 UC_CTL_CONTEXT_CPU = 1 UC_CTL_CONTEXT_MEMORY = 2 -end \ No newline at end of file +end diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs index 821367d39b..f91a37a51d 100644 --- a/bindings/rust/build.rs +++ b/bindings/rust/build.rs @@ -116,6 +116,12 @@ fn build_with_cmake() { if std::env::var("CARGO_FEATURE_ARCH_TRICORE").is_ok() { archs.push_str("tricore;"); } + if std::env::var("CARGO_FEATURE_ARCH_AVR").is_ok() { + archs.push_str("avr;"); + } + if std::env::var("CARGO_FEATURE_ARCH_RH850").is_ok() { + archs.push_str("rh850;"); + } if !archs.is_empty() { archs.pop(); diff --git a/bindings/rust/src/avr.rs b/bindings/rust/src/avr.rs new file mode 100644 index 0000000000..1660933497 --- /dev/null +++ b/bindings/rust/src/avr.rs @@ -0,0 +1,211 @@ +#![allow(non_camel_case_types)] +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +#[repr(C)] +#[derive(PartialEq, Debug, Clone, Copy)] +pub enum RegisterAVR { + INVALID = 0, + + // General purpose registers (GPR) + R0 = 1, + R1 = 2, + R2 = 3, + R3 = 4, + R4 = 5, + R5 = 6, + R6 = 7, + R7 = 8, + R8 = 9, + R9 = 10, + R10 = 11, + R11 = 12, + R12 = 13, + R13 = 14, + R14 = 15, + R15 = 16, + R16 = 17, + R17 = 18, + R18 = 19, + R19 = 20, + R20 = 21, + R21 = 22, + R22 = 23, + R23 = 24, + R24 = 25, + R25 = 26, + R26 = 27, + R27 = 28, + R28 = 29, + R29 = 30, + R30 = 31, + R31 = 32, + + PC = 33, + SP = 34, + + RAMPD = 57, + RAMPX = 58, + RAMPY = 59, + RAMPZ = 60, + EIND = 61, + SPL = 62, + SPH = 63, + SREG = 64, + + // 16-bit coalesced registers + R0W = 65, + R1W = 66, + R2W = 67, + R3W = 68, + R4W = 69, + R5W = 70, + R6W = 71, + R7W = 72, + R8W = 73, + R9W = 74, + R10W = 75, + R11W = 76, + R12W = 77, + R13W = 78, + R14W = 79, + R15W = 80, + R16W = 81, + R17W = 82, + R18W = 83, + R19W = 84, + R20W = 85, + R21W = 86, + R22W = 87, + R23W = 88, + R24W = 89, + R25W = 90, + R26W = 91, + R27W = 92, + R28W = 93, + R29W = 94, + R30W = 95, + + // 32-bit coalesced registers + R0D = 97, + R1D = 98, + R2D = 99, + R3D = 100, + R4D = 101, + R5D = 102, + R6D = 103, + R7D = 104, + R8D = 105, + R9D = 106, + R10D = 107, + R11D = 108, + R12D = 109, + R13D = 110, + R14D = 111, + R15D = 112, + R16D = 113, + R17D = 114, + R18D = 115, + R19D = 116, + R20D = 117, + R21D = 118, + R22D = 119, + R23D = 120, + R24D = 121, + R25D = 122, + R26D = 123, + R27D = 124, + R28D = 125, +} + +impl RegisterAVR { + // alias registers + // (assoc) Xhi = 28 + // (assoc) Xlo = 27 + // (assoc) Yhi = 30 + // (assoc) Ylo = 29 + // (assoc) Zhi = 32 + // (assoc) Zlo = 31 + pub const XHI: RegisterAVR = RegisterAVR::R27; + pub const XLO: RegisterAVR = RegisterAVR::R26; + pub const YHI: RegisterAVR = RegisterAVR::R29; + pub const YLO: RegisterAVR = RegisterAVR::R28; + pub const ZHI: RegisterAVR = RegisterAVR::R31; + pub const ZLO: RegisterAVR = RegisterAVR::R30; + + // (assoc) X = 91 + // (assoc) Y = 93 + // (assoc) Z = 95 + pub const X: RegisterAVR = RegisterAVR::R26W; + pub const Y: RegisterAVR = RegisterAVR::R28W; + pub const Z: RegisterAVR = RegisterAVR::R30W; +} + +impl From for i32 { + fn from(r: RegisterAVR) -> Self { + r as i32 + } +} + +#[repr(C)] +#[derive(PartialEq, Debug, Clone, Copy)] +pub enum AvrArch { + UC_AVR_ARCH_AVR1 = 10, + UC_AVR_ARCH_AVR2 = 20, + UC_AVR_ARCH_AVR25 = 25, + UC_AVR_ARCH_AVR3 = 30, + UC_AVR_ARCH_AVR4 = 40, + UC_AVR_ARCH_AVR5 = 50, + UC_AVR_ARCH_AVR51 = 51, + UC_AVR_ARCH_AVR6 = 60, +} + +impl From for i32 { + fn from(value: AvrArch) -> Self { + value as i32 + } +} + +impl From<&AvrArch> for i32 { + fn from(value: &AvrArch) -> Self { + *value as i32 + } +} + +#[repr(C)] +#[derive(PartialEq, Debug, Clone, Copy)] +pub enum AvrCpuModel { + UC_CPU_AVR_ATMEGA16 = 50016, + UC_CPU_AVR_ATMEGA32 = 50032, + UC_CPU_AVR_ATMEGA64 = 50064, + UC_CPU_AVR_ATMEGA128 = 51128, + UC_CPU_AVR_ATMEGA128RFR2 = 51129, + UC_CPU_AVR_ATMEGA1280 = 51130, + UC_CPU_AVR_ATMEGA256 = 60256, + UC_CPU_AVR_ATMEGA256RFR2 = 60257, + UC_CPU_AVR_ATMEGA2560 = 60258, +} + +impl From for i32 { + fn from(value: AvrCpuModel) -> Self { + value as i32 + } +} + +impl From<&AvrCpuModel> for i32 { + fn from(value: &AvrCpuModel) -> Self { + *value as i32 + } +} + +#[repr(i32)] +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum AvrMem { + // Flash program memory (code) + FLASH = 0x08000000, +} + +impl From for i32 { + fn from(r: AvrMem) -> Self { + r as i32 + } +} diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index 7cc95a3946..09b5b75281 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -59,6 +59,12 @@ mod arm64; #[cfg(feature = "arch_aarch64")] pub use crate::arm64::*; +// include avr support if conditionally compiled in +#[cfg(feature = "arch_avr")] +mod avr; +#[cfg(feature = "arch_avr")] +pub use crate::avr::*; + // include m68k support if conditionally compiled in #[cfg(feature = "arch_m68k")] mod m68k; @@ -95,6 +101,12 @@ mod sparc; #[cfg(feature = "arch_sparc")] pub use crate::sparc::*; +// include rh850 support if conditionally compiled in +#[cfg(feature = "arch_rh850")] +mod rh850; +#[cfg(feature = "arch_rh850")] +pub use crate::rh850::*; + // include tricore support if conditionally compiled in #[cfg(feature = "arch_tricore")] mod tricore; @@ -1052,6 +1064,10 @@ impl<'a, D> Unicorn<'a, D> { Arch::S390X => Ok(RegisterS390X::PC as i32), #[cfg(feature = "arch_tricore")] Arch::TRICORE => Ok(RegisterTRICORE::PC as i32), + #[cfg(feature = "arch_avr")] + Arch::AVR => Ok(RegisterAVR::PC as i32), + #[cfg(feature = "arch_rh850")] + Arch::RH850 => Ok(RegisterRH850::PC as i32), // returns `uc_error::ARCH` for `Arch::MAX`, and any // other architecture that are not compiled in _ => Err(uc_error::ARCH), diff --git a/bindings/rust/src/rh850.rs b/bindings/rust/src/rh850.rs new file mode 100644 index 0000000000..92e8a16a4a --- /dev/null +++ b/bindings/rust/src/rh850.rs @@ -0,0 +1,119 @@ +#![allow(non_camel_case_types)] + +// RH850 registers +#[repr(C)] +#[derive(PartialEq, Debug, Clone, Copy)] +pub enum RegisterRH850 { + INVALID = -1, + + // General purpose registers + R0 = 0, + R1 = 1, + R2 = 2, + R3 = 3, + R4 = 4, + R5 = 5, + R6 = 6, + R7 = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + R16 = 16, + R17 = 17, + R18 = 18, + R19 = 19, + R20 = 20, + R21 = 21, + R22 = 22, + R23 = 23, + R24 = 24, + R25 = 25, + R26 = 26, + R27 = 27, + R28 = 28, + R29 = 29, + R30 = 30, + R31 = 31, + + // System registers + EIPC = 32, + EIPSW = 33, + FEPC = 34, + FEPSW = 35, + ECR = 36, + PSW = 37, + FPSR = 38, + FPEPC = 39, + FPST = 40, + FPCC = 41, + FPCFG = 42, + FPEC = 43, + EIIC = 45, + FEIC = 46, + CTPC = 48, + CTPSW = 49, + CTBP = 52, + EIWR = 60, + FEWR = 61, + BSEL = 63, + + // system registers, selection ID 1 + MCFG0 = 64, + RBASE = 65, + EBASE = 66, + INTBP = 67, + MCTL = 68, + PID = 69, + SCCFG = 75, + SCBP = 76, + + // system registers, selection ID 2 + HTCFG0 = 96, + MEA = 102, + ASID = 103, + MEI = 104, + PC = 288, + + ENDING = 289, +} + +impl RegisterRH850 { + // Alias registers + // (assoc) ZERO = 0, + // (assoc) SP = 3, + // (assoc) EP = 30, + // (assoc) LP = 31, + pub const ZERO: RegisterRH850 = RegisterRH850::R0; + pub const SP: RegisterRH850 = RegisterRH850::R3; + pub const EP: RegisterRH850 = RegisterRH850::R30; + pub const LP: RegisterRH850 = RegisterRH850::R31; +} + +impl From for i32 { + fn from(r: RegisterRH850) -> Self { + r as i32 + } +} + +#[repr(i32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum RH850CpuModel { + UC_CPU_RH850_ANY = 0, +} + +impl From for i32 { + fn from(value: RH850CpuModel) -> Self { + value as i32 + } +} + +impl From<&RH850CpuModel> for i32 { + fn from(value: &RH850CpuModel) -> Self { + (*value) as i32 + } +} \ No newline at end of file diff --git a/bindings/rust/src/unicorn_const.rs b/bindings/rust/src/unicorn_const.rs index e5a7d0c015..8b2883ebeb 100644 --- a/bindings/rust/src/unicorn_const.rs +++ b/bindings/rust/src/unicorn_const.rs @@ -193,7 +193,9 @@ pub enum Arch { RISCV = 8, S390X = 9, TRICORE = 10, - MAX = 11, + AVR = 11, + RH850 = 12, + MAX = 13, } impl TryFrom for Arch { @@ -211,6 +213,7 @@ impl TryFrom for Arch { x if x == Self::RISCV as usize => Ok(Self::RISCV), x if x == Self::S390X as usize => Ok(Self::S390X), x if x == Self::TRICORE as usize => Ok(Self::TRICORE), + x if x == Self::RH850 as usize => Ok(Self::RH850), x if x == Self::MAX as usize => Ok(Self::MAX), _ => Err(uc_error::ARCH), } @@ -246,6 +249,7 @@ bitflags! { const SPARC32 = Self::MIPS32.bits(); const SPARC64 = Self::MIPS64.bits(); const V9 = Self::THUMB.bits(); + const RH850 = 4; const RISCV32 = Self::MIPS32.bits(); const RISCV64 = Self::MIPS64.bits(); } diff --git a/bindings/zig/unicorn/AVR_const.zig b/bindings/zig/unicorn/AVR_const.zig new file mode 100644 index 0000000000..e408f18e01 --- /dev/null +++ b/bindings/zig/unicorn/AVR_const.zig @@ -0,0 +1,151 @@ +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +pub const AVRConst = enum(c_int) { + +// AVR architectures + AVR_ARCH_AVR1 = 10, + AVR_ARCH_AVR2 = 20, + AVR_ARCH_AVR25 = 25, + AVR_ARCH_AVR3 = 30, + AVR_ARCH_AVR4 = 40, + AVR_ARCH_AVR5 = 50, + AVR_ARCH_AVR51 = 51, + AVR_ARCH_AVR6 = 60, + CPU_AVR_ARCH = 1000, + +// AVR CPU + CPU_AVR_ATMEGA16 = 50016, + CPU_AVR_ATMEGA32 = 50032, + CPU_AVR_ATMEGA64 = 50064, + CPU_AVR_ATMEGA128 = 51128, + CPU_AVR_ATMEGA128RFR2 = 51129, + CPU_AVR_ATMEGA1280 = 51130, + CPU_AVR_ATMEGA256 = 60256, + CPU_AVR_ATMEGA256RFR2 = 60257, + CPU_AVR_ATMEGA2560 = 60258, + +// AVR memory + AVR_MEM_FLASH = 134217728, + +// AVR registers + + AVR_REG_INVALID = 0, + AVR_REG_R0 = 1, + AVR_REG_R1 = 2, + AVR_REG_R2 = 3, + AVR_REG_R3 = 4, + AVR_REG_R4 = 5, + AVR_REG_R5 = 6, + AVR_REG_R6 = 7, + AVR_REG_R7 = 8, + AVR_REG_R8 = 9, + AVR_REG_R9 = 10, + AVR_REG_R10 = 11, + AVR_REG_R11 = 12, + AVR_REG_R12 = 13, + AVR_REG_R13 = 14, + AVR_REG_R14 = 15, + AVR_REG_R15 = 16, + AVR_REG_R16 = 17, + AVR_REG_R17 = 18, + AVR_REG_R18 = 19, + AVR_REG_R19 = 20, + AVR_REG_R20 = 21, + AVR_REG_R21 = 22, + AVR_REG_R22 = 23, + AVR_REG_R23 = 24, + AVR_REG_R24 = 25, + AVR_REG_R25 = 26, + AVR_REG_R26 = 27, + AVR_REG_R27 = 28, + AVR_REG_R28 = 29, + AVR_REG_R29 = 30, + AVR_REG_R30 = 31, + AVR_REG_R31 = 32, + AVR_REG_PC = 33, + AVR_REG_SP = 34, + AVR_REG_RAMPD = 57, + AVR_REG_RAMPX = 58, + AVR_REG_RAMPY = 59, + AVR_REG_RAMPZ = 60, + AVR_REG_EIND = 61, + AVR_REG_SPL = 62, + AVR_REG_SPH = 63, + AVR_REG_SREG = 64, + +// 16-bit coalesced registers + AVR_REG_R0W = 65, + AVR_REG_R1W = 66, + AVR_REG_R2W = 67, + AVR_REG_R3W = 68, + AVR_REG_R4W = 69, + AVR_REG_R5W = 70, + AVR_REG_R6W = 71, + AVR_REG_R7W = 72, + AVR_REG_R8W = 73, + AVR_REG_R9W = 74, + AVR_REG_R10W = 75, + AVR_REG_R11W = 76, + AVR_REG_R12W = 77, + AVR_REG_R13W = 78, + AVR_REG_R14W = 79, + AVR_REG_R15W = 80, + AVR_REG_R16W = 81, + AVR_REG_R17W = 82, + AVR_REG_R18W = 83, + AVR_REG_R19W = 84, + AVR_REG_R20W = 85, + AVR_REG_R21W = 86, + AVR_REG_R22W = 87, + AVR_REG_R23W = 88, + AVR_REG_R24W = 89, + AVR_REG_R25W = 90, + AVR_REG_R26W = 91, + AVR_REG_R27W = 92, + AVR_REG_R28W = 93, + AVR_REG_R29W = 94, + AVR_REG_R30W = 95, + +// 32-bit coalesced registers + AVR_REG_R0D = 97, + AVR_REG_R1D = 98, + AVR_REG_R2D = 99, + AVR_REG_R3D = 100, + AVR_REG_R4D = 101, + AVR_REG_R5D = 102, + AVR_REG_R6D = 103, + AVR_REG_R7D = 104, + AVR_REG_R8D = 105, + AVR_REG_R9D = 106, + AVR_REG_R10D = 107, + AVR_REG_R11D = 108, + AVR_REG_R12D = 109, + AVR_REG_R13D = 110, + AVR_REG_R14D = 111, + AVR_REG_R15D = 112, + AVR_REG_R16D = 113, + AVR_REG_R17D = 114, + AVR_REG_R18D = 115, + AVR_REG_R19D = 116, + AVR_REG_R20D = 117, + AVR_REG_R21D = 118, + AVR_REG_R22D = 119, + AVR_REG_R23D = 120, + AVR_REG_R24D = 121, + AVR_REG_R25D = 122, + AVR_REG_R26D = 123, + AVR_REG_R27D = 124, + AVR_REG_R28D = 125, + +// Alias registers + AVR_REG_Xhi = 28, + AVR_REG_Xlo = 27, + AVR_REG_Yhi = 30, + AVR_REG_Ylo = 29, + AVR_REG_Zhi = 32, + AVR_REG_Zlo = 31, + AVR_REG_X = 91, + AVR_REG_Y = 93, + AVR_REG_Z = 95, + +}; diff --git a/bindings/zig/unicorn/rh850_const.zig b/bindings/zig/unicorn/rh850_const.zig new file mode 100644 index 0000000000..3b965c6333 --- /dev/null +++ b/bindings/zig/unicorn/rh850_const.zig @@ -0,0 +1,95 @@ +// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT + +pub const rh850Const = enum(c_int) { + RH850_SYSREG_SELID0 = 32, + RH850_SYSREG_SELID1 = 64, + RH850_SYSREG_SELID2 = 96, + RH850_SYSREG_SELID3 = 128, + RH850_SYSREG_SELID4 = 160, + RH850_SYSREG_SELID5 = 192, + RH850_SYSREG_SELID6 = 224, + RH850_SYSREG_SELID7 = 256, + +// RH850 global purpose registers + + RH850_REG_R0 = 0, + RH850_REG_R1 = 1, + RH850_REG_R2 = 2, + RH850_REG_R3 = 3, + RH850_REG_R4 = 4, + RH850_REG_R5 = 5, + RH850_REG_R6 = 6, + RH850_REG_R7 = 7, + RH850_REG_R8 = 8, + RH850_REG_R9 = 9, + RH850_REG_R10 = 10, + RH850_REG_R11 = 11, + RH850_REG_R12 = 12, + RH850_REG_R13 = 13, + RH850_REG_R14 = 14, + RH850_REG_R15 = 15, + RH850_REG_R16 = 16, + RH850_REG_R17 = 17, + RH850_REG_R18 = 18, + RH850_REG_R19 = 19, + RH850_REG_R20 = 20, + RH850_REG_R21 = 21, + RH850_REG_R22 = 22, + RH850_REG_R23 = 23, + RH850_REG_R24 = 24, + RH850_REG_R25 = 25, + RH850_REG_R26 = 26, + RH850_REG_R27 = 27, + RH850_REG_R28 = 28, + RH850_REG_R29 = 29, + RH850_REG_R30 = 30, + RH850_REG_R31 = 31, + +// RH850 system registers, selection ID 0 + RH850_REG_EIPC = 32, + RH850_REG_EIPSW = 33, + RH850_REG_FEPC = 34, + RH850_REG_FEPSW = 35, + RH850_REG_ECR = 36, + RH850_REG_PSW = 37, + RH850_REG_FPSR = 38, + RH850_REG_FPEPC = 39, + RH850_REG_FPST = 40, + RH850_REG_FPCC = 41, + RH850_REG_FPCFG = 42, + RH850_REG_FPEC = 43, + RH850_REG_EIIC = 45, + RH850_REG_FEIC = 46, + RH850_REG_CTPC = 48, + RH850_REG_CTPSW = 49, + RH850_REG_CTBP = 52, + RH850_REG_EIWR = 60, + RH850_REG_FEWR = 61, + RH850_REG_BSEL = 63, + +// RH850 system regusters, selection ID 1 + RH850_REG_MCFG0 = 64, + RH850_REG_RBASE = 65, + RH850_REG_EBASE = 66, + RH850_REG_INTBP = 67, + RH850_REG_MCTL = 68, + RH850_REG_PID = 69, + RH850_REG_SCCFG = 75, + RH850_REG_SCBP = 76, + +// RH850 system registers, selection ID 2 + RH850_REG_HTCFG0 = 96, + RH850_REG_MEA = 102, + RH850_REG_ASID = 103, + RH850_REG_MEI = 104, + RH850_REG_PC = 288, + RH850_REG_ENDING = 289, + +// RH8509 Registers aliases. + + RH850_REG_ZERO = 0, + RH850_REG_SP = 3, + RH850_REG_EP = 30, + RH850_REG_LP = 31, + +}; diff --git a/bindings/zig/unicorn/unicorn_const.zig b/bindings/zig/unicorn/unicorn_const.zig index 7991ff9783..2c3250b6be 100644 --- a/bindings/zig/unicorn/unicorn_const.zig +++ b/bindings/zig/unicorn/unicorn_const.zig @@ -21,7 +21,9 @@ pub const unicornConst = enum(c_int) { ARCH_RISCV = 8, ARCH_S390X = 9, ARCH_TRICORE = 10, - ARCH_MAX = 11, + ARCH_AVR = 11, + ARCH_RH850 = 12, + ARCH_MAX = 13, MODE_LITTLE_ENDIAN = 0, MODE_BIG_ENDIAN = 1073741824, @@ -48,6 +50,7 @@ pub const unicornConst = enum(c_int) { MODE_SPARC32 = 4, MODE_SPARC64 = 8, MODE_V9 = 16, + MODE_RH850 = 4, MODE_RISCV32 = 4, MODE_RISCV64 = 8, diff --git a/build.zig b/build.zig index e1eae62470..ca5f905bfb 100644 --- a/build.zig +++ b/build.zig @@ -74,6 +74,7 @@ pub fn build(b: *std.Build) void { .{ .file_type = .zig, .root_file_path = "bindings/zig/sample/sample_riscv_zig.zig" }, .{ .file_type = .c, .root_file_path = "samples/sample_arm.c" }, .{ .file_type = .c, .root_file_path = "samples/sample_arm64.c" }, + .{ .file_type = .c, .root_file_path = "samples/sample_avr.c" }, .{ .file_type = .c, .root_file_path = "samples/sample_ctl.c" }, .{ .file_type = .c, .root_file_path = "samples/sample_batch_reg.c" }, .{ .file_type = .c, .root_file_path = "samples/sample_m68k.c" }, diff --git a/format.sh b/format.sh old mode 100644 new mode 100755 diff --git a/include/uc_priv.h b/include/uc_priv.h index 484fc53e3f..f3a1c045e7 100644 --- a/include/uc_priv.h +++ b/include/uc_priv.h @@ -35,6 +35,7 @@ (UC_MODE_RISCV32 | UC_MODE_RISCV64 | UC_MODE_LITTLE_ENDIAN) #define UC_MODE_S390X_MASK (UC_MODE_BIG_ENDIAN) #define UC_MODE_TRICORE_MASK (UC_MODE_LITTLE_ENDIAN) +#define UC_MODE_AVR_MASK (UC_MODE_LITTLE_ENDIAN) #define ARR_SIZE(a) (sizeof(a) / sizeof(a[0])) diff --git a/include/unicorn/avr.h b/include/unicorn/avr.h new file mode 100644 index 0000000000..0487d3fd09 --- /dev/null +++ b/include/unicorn/avr.h @@ -0,0 +1,189 @@ +/* This file is released under LGPL2. + See COPYING.LGPL2 in root directory for more details +*/ + +/* + Created for Unicorn Engine by Glenn Baker , 2024 +*/ + +#ifndef UNICORN_AVR_H +#define UNICORN_AVR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +#pragma warning(disable : 4201) +#endif + +//> AVR architectures +typedef enum uc_avr_arch { + UC_AVR_ARCH_AVR1 = 10, + UC_AVR_ARCH_AVR2 = 20, + UC_AVR_ARCH_AVR25 = 25, + UC_AVR_ARCH_AVR3 = 30, + UC_AVR_ARCH_AVR4 = 40, + UC_AVR_ARCH_AVR5 = 50, + UC_AVR_ARCH_AVR51 = 51, + UC_AVR_ARCH_AVR6 = 60, +} uc_avr_arch; + +#define UC_CPU_AVR_ARCH 1000 + +//> AVR CPU +typedef enum uc_cpu_avr { + // Enhanced Core with 16K up to 64K of program memory ("AVR5") + UC_CPU_AVR_ATMEGA16 = UC_AVR_ARCH_AVR5*UC_CPU_AVR_ARCH + 16, + UC_CPU_AVR_ATMEGA32 = UC_AVR_ARCH_AVR5*UC_CPU_AVR_ARCH + 32, + UC_CPU_AVR_ATMEGA64 = UC_AVR_ARCH_AVR5*UC_CPU_AVR_ARCH + 64, + + // Enhanced Core with 128K of program memory ("AVR5.1") + UC_CPU_AVR_ATMEGA128 = UC_AVR_ARCH_AVR51*UC_CPU_AVR_ARCH + 128, + UC_CPU_AVR_ATMEGA128RFR2, + UC_CPU_AVR_ATMEGA1280, + + // Enhanced Core with 128K+ of program memory, i.e. 3-byte PC ("AVR6") + UC_CPU_AVR_ATMEGA256 = UC_AVR_ARCH_AVR6*UC_CPU_AVR_ARCH + 256, + UC_CPU_AVR_ATMEGA256RFR2, + UC_CPU_AVR_ATMEGA2560, +} uc_cpu_avr; + +//> AVR memory +typedef enum uc_avr_mem { + // Flash program memory (code) + UC_AVR_MEM_FLASH = 0x08000000, +} uc_avr_mem; + +//> AVR registers +typedef enum uc_avr_reg { + UC_AVR_REG_INVALID = 0, + + // General purpose registers (GPR) + UC_AVR_REG_R0 = 1, + UC_AVR_REG_R1, + UC_AVR_REG_R2, + UC_AVR_REG_R3, + UC_AVR_REG_R4, + UC_AVR_REG_R5, + UC_AVR_REG_R6, + UC_AVR_REG_R7, + UC_AVR_REG_R8, + UC_AVR_REG_R9, + UC_AVR_REG_R10, + UC_AVR_REG_R11, + UC_AVR_REG_R12, + UC_AVR_REG_R13, + UC_AVR_REG_R14, + UC_AVR_REG_R15, + UC_AVR_REG_R16, + UC_AVR_REG_R17, + UC_AVR_REG_R18, + UC_AVR_REG_R19, + UC_AVR_REG_R20, + UC_AVR_REG_R21, + UC_AVR_REG_R22, + UC_AVR_REG_R23, + UC_AVR_REG_R24, + UC_AVR_REG_R25, + UC_AVR_REG_R26, + UC_AVR_REG_R27, + UC_AVR_REG_R28, + UC_AVR_REG_R29, + UC_AVR_REG_R30, + UC_AVR_REG_R31, + + UC_AVR_REG_PC, + UC_AVR_REG_SP, + + UC_AVR_REG_RAMPD = UC_AVR_REG_PC + 16 + 8, + UC_AVR_REG_RAMPX, + UC_AVR_REG_RAMPY, + UC_AVR_REG_RAMPZ, + UC_AVR_REG_EIND, + UC_AVR_REG_SPL, + UC_AVR_REG_SPH, + UC_AVR_REG_SREG, + + //> 16-bit coalesced registers + UC_AVR_REG_R0W = UC_AVR_REG_PC + 32, + UC_AVR_REG_R1W, + UC_AVR_REG_R2W, + UC_AVR_REG_R3W, + UC_AVR_REG_R4W, + UC_AVR_REG_R5W, + UC_AVR_REG_R6W, + UC_AVR_REG_R7W, + UC_AVR_REG_R8W, + UC_AVR_REG_R9W, + UC_AVR_REG_R10W, + UC_AVR_REG_R11W, + UC_AVR_REG_R12W, + UC_AVR_REG_R13W, + UC_AVR_REG_R14W, + UC_AVR_REG_R15W, + UC_AVR_REG_R16W, + UC_AVR_REG_R17W, + UC_AVR_REG_R18W, + UC_AVR_REG_R19W, + UC_AVR_REG_R20W, + UC_AVR_REG_R21W, + UC_AVR_REG_R22W, + UC_AVR_REG_R23W, + UC_AVR_REG_R24W, + UC_AVR_REG_R25W, + UC_AVR_REG_R26W, + UC_AVR_REG_R27W, + UC_AVR_REG_R28W, + UC_AVR_REG_R29W, + UC_AVR_REG_R30W, + + //> 32-bit coalesced registers + UC_AVR_REG_R0D = UC_AVR_REG_PC + 64, + UC_AVR_REG_R1D, + UC_AVR_REG_R2D, + UC_AVR_REG_R3D, + UC_AVR_REG_R4D, + UC_AVR_REG_R5D, + UC_AVR_REG_R6D, + UC_AVR_REG_R7D, + UC_AVR_REG_R8D, + UC_AVR_REG_R9D, + UC_AVR_REG_R10D, + UC_AVR_REG_R11D, + UC_AVR_REG_R12D, + UC_AVR_REG_R13D, + UC_AVR_REG_R14D, + UC_AVR_REG_R15D, + UC_AVR_REG_R16D, + UC_AVR_REG_R17D, + UC_AVR_REG_R18D, + UC_AVR_REG_R19D, + UC_AVR_REG_R20D, + UC_AVR_REG_R21D, + UC_AVR_REG_R22D, + UC_AVR_REG_R23D, + UC_AVR_REG_R24D, + UC_AVR_REG_R25D, + UC_AVR_REG_R26D, + UC_AVR_REG_R27D, + UC_AVR_REG_R28D, + + //> Alias registers + UC_AVR_REG_Xhi = UC_AVR_REG_R27, + UC_AVR_REG_Xlo = UC_AVR_REG_R26, + UC_AVR_REG_Yhi = UC_AVR_REG_R29, + UC_AVR_REG_Ylo = UC_AVR_REG_R28, + UC_AVR_REG_Zhi = UC_AVR_REG_R31, + UC_AVR_REG_Zlo = UC_AVR_REG_R30, + + UC_AVR_REG_X = UC_AVR_REG_R26W, + UC_AVR_REG_Y = UC_AVR_REG_R28W, + UC_AVR_REG_Z = UC_AVR_REG_R30W, +} uc_avr_reg; + +#ifdef __cplusplus +} +#endif + +#endif /* UNICORN_AVR_H */ diff --git a/include/unicorn/rh850.h b/include/unicorn/rh850.h new file mode 100644 index 0000000000..963e0bc042 --- /dev/null +++ b/include/unicorn/rh850.h @@ -0,0 +1,111 @@ +/* Unicorn Engine */ +/* By Damien Cauquil , 2023 */ + +#ifndef UNICORN_RH850_H +#define UNICORN_RH850_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +#pragma warning(disable : 4201) +#endif + +#define UC_RH850_SYSREG_SELID0 32 +#define UC_RH850_SYSREG_SELID1 64 +#define UC_RH850_SYSREG_SELID2 96 +#define UC_RH850_SYSREG_SELID3 128 +#define UC_RH850_SYSREG_SELID4 160 +#define UC_RH850_SYSREG_SELID5 192 +#define UC_RH850_SYSREG_SELID6 224 +#define UC_RH850_SYSREG_SELID7 256 + +//> RH850 global purpose registers +typedef enum uc_rh850_reg { + UC_RH850_REG_R0 = 0, + UC_RH850_REG_R1, + UC_RH850_REG_R2, + UC_RH850_REG_R3, + UC_RH850_REG_R4, + UC_RH850_REG_R5, + UC_RH850_REG_R6, + UC_RH850_REG_R7, + UC_RH850_REG_R8, + UC_RH850_REG_R9, + UC_RH850_REG_R10, + UC_RH850_REG_R11, + UC_RH850_REG_R12, + UC_RH850_REG_R13, + UC_RH850_REG_R14, + UC_RH850_REG_R15, + UC_RH850_REG_R16, + UC_RH850_REG_R17, + UC_RH850_REG_R18, + UC_RH850_REG_R19, + UC_RH850_REG_R20, + UC_RH850_REG_R21, + UC_RH850_REG_R22, + UC_RH850_REG_R23, + UC_RH850_REG_R24, + UC_RH850_REG_R25, + UC_RH850_REG_R26, + UC_RH850_REG_R27, + UC_RH850_REG_R28, + UC_RH850_REG_R29, + UC_RH850_REG_R30, + UC_RH850_REG_R31, + + //> RH850 system registers, selection ID 0 + UC_RH850_REG_EIPC = UC_RH850_SYSREG_SELID0, + UC_RH850_REG_EIPSW, + UC_RH850_REG_FEPC, + UC_RH850_REG_FEPSW, + UC_RH850_REG_ECR, + UC_RH850_REG_PSW, + UC_RH850_REG_FPSR, + UC_RH850_REG_FPEPC, + UC_RH850_REG_FPST, + UC_RH850_REG_FPCC, + UC_RH850_REG_FPCFG, + UC_RH850_REG_FPEC, + UC_RH850_REG_EIIC = UC_RH850_SYSREG_SELID0 + 13, + UC_RH850_REG_FEIC, + UC_RH850_REG_CTPC = UC_RH850_SYSREG_SELID0 + 16, + UC_RH850_REG_CTPSW, + UC_RH850_REG_CTBP = UC_RH850_SYSREG_SELID0 + 20, + UC_RH850_REG_EIWR = UC_RH850_SYSREG_SELID0 + 28, + UC_RH850_REG_FEWR = UC_RH850_SYSREG_SELID0 + 29, + UC_RH850_REG_BSEL = UC_RH850_SYSREG_SELID0 + 31, + + //> RH850 system regusters, selection ID 1 + UC_RH850_REG_MCFG0 = UC_RH850_SYSREG_SELID1, + UC_RH850_REG_RBASE, + UC_RH850_REG_EBASE, + UC_RH850_REG_INTBP, + UC_RH850_REG_MCTL, + UC_RH850_REG_PID, + UC_RH850_REG_SCCFG = UC_RH850_SYSREG_SELID1 + 11, + UC_RH850_REG_SCBP, + + //> RH850 system registers, selection ID 2 + UC_RH850_REG_HTCFG0 = UC_RH850_SYSREG_SELID2, + UC_RH850_REG_MEA = UC_RH850_SYSREG_SELID2 + 6, + UC_RH850_REG_ASID, + UC_RH850_REG_MEI, + + UC_RH850_REG_PC = UC_RH850_SYSREG_SELID7 + 32, + UC_RH850_REG_ENDING +} uc_cpu_rh850; + +//> RH8509 Registers aliases. +#define UC_RH850_REG_ZERO UC_RH850_REG_R0 +#define UC_RH850_REG_SP UC_RH850_REG_R3 +#define UC_RH850_REG_EP UC_RH850_REG_R30 +#define UC_RH850_REG_LP UC_RH850_REG_R31 + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/include/unicorn/unicorn.h b/include/unicorn/unicorn.h index faac1378c5..9c9d77b6ce 100644 --- a/include/unicorn/unicorn.h +++ b/include/unicorn/unicorn.h @@ -33,9 +33,11 @@ typedef size_t uc_hook; #include "mips.h" #include "sparc.h" #include "ppc.h" +#include "rh850.h" #include "riscv.h" #include "s390x.h" #include "tricore.h" +#include "avr.h" #ifdef __GNUC__ #define DEFAULT_VISIBILITY __attribute__((visibility("default"))) @@ -118,6 +120,8 @@ typedef enum uc_arch { UC_ARCH_RISCV, // RISCV architecture UC_ARCH_S390X, // S390X architecture UC_ARCH_TRICORE, // TriCore architecture + UC_ARCH_AVR, // AVR architecture + UC_ARCH_RH850, // Renesas RH850 architecture (V850e3v2) UC_ARCH_MAX, } uc_arch; @@ -164,6 +168,9 @@ typedef enum uc_mode { UC_MODE_SPARC64 = 1 << 3, // 64-bit mode UC_MODE_V9 = 1 << 4, // SparcV9 mode (currently unsupported) + // rh850 + UC_MODE_RH850 = 1 << 2, // 32-bit mode + // riscv UC_MODE_RISCV32 = 1 << 2, // 32-bit mode UC_MODE_RISCV64 = 1 << 3, // 64-bit mode diff --git a/msvc/avr-softmmu/config-target.h b/msvc/avr-softmmu/config-target.h new file mode 100644 index 0000000000..3afeec6c7a --- /dev/null +++ b/msvc/avr-softmmu/config-target.h @@ -0,0 +1,5 @@ +/* Automatically generated by create_config - do not modify */ +#define TARGET_AVR 1 +#define TARGET_NAME "avr" +#define TARGET_AVR 1 +#define CONFIG_SOFTMMU 1 diff --git a/msvc/rh850-softmmu/config-target.h b/msvc/rh850-softmmu/config-target.h new file mode 100644 index 0000000000..69d3c14d97 --- /dev/null +++ b/msvc/rh850-softmmu/config-target.h @@ -0,0 +1,6 @@ +/* Automatically generated by create_config - do not modify */ +#define TARGET_RH850 1 +#define TARGET_NAME "rh850" +#define TARGET_RH850 1 +#define TARGET_SYSTBL_ABI common,nospu,64 +#define CONFIG_SOFTMMU 1 diff --git a/qemu/MAINTAINERS b/qemu/MAINTAINERS index 8cbc1fac2b..ed80410291 100644 --- a/qemu/MAINTAINERS +++ b/qemu/MAINTAINERS @@ -952,6 +952,15 @@ F: include/hw/*/nrf51*.h F: include/hw/*/microbit*.h F: tests/qtest/microbit-test.c +AVR Machines +------------- + +AVR MCUs +M: Michael Rolnik +R: Sarah Harris +S: Maintained +F: default-configs/avr-softmmu.mak + CRIS Machines ------------- Axis Dev88 diff --git a/qemu/avr.h b/qemu/avr.h new file mode 100644 index 0000000000..bb37176913 --- /dev/null +++ b/qemu/avr.h @@ -0,0 +1,1297 @@ +/* Autogen header for Unicorn Engine - DONOT MODIFY */ +#ifndef UNICORN_AUTOGEN_avr_H +#define UNICORN_AUTOGEN_avr_H +#ifndef UNICORN_ARCH_POSTFIX +#define UNICORN_ARCH_POSTFIX _avr +#endif +#define uc_add_inline_hook uc_add_inline_hook_avr +#define uc_del_inline_hook uc_del_inline_hook_avr +#define tb_invalidate_phys_range tb_invalidate_phys_range_avr +#define use_idiv_instructions use_idiv_instructions_avr +#define arm_arch arm_arch_avr +#define tb_target_set_jmp_target tb_target_set_jmp_target_avr +#define have_bmi1 have_bmi1_avr +#define have_popcnt have_popcnt_avr +#define have_avx1 have_avx1_avr +#define have_avx2 have_avx2_avr +#define have_isa have_isa_avr +#define have_altivec have_altivec_avr +#define have_vsx have_vsx_avr +#define flush_icache_range flush_icache_range_avr +#define s390_facilities s390_facilities_avr +#define tcg_dump_op tcg_dump_op_avr +#define tcg_dump_ops tcg_dump_ops_avr +#define tcg_gen_and_i64 tcg_gen_and_i64_avr +#define tcg_gen_discard_i64 tcg_gen_discard_i64_avr +#define tcg_gen_ld16s_i64 tcg_gen_ld16s_i64_avr +#define tcg_gen_ld16u_i64 tcg_gen_ld16u_i64_avr +#define tcg_gen_ld32s_i64 tcg_gen_ld32s_i64_avr +#define tcg_gen_ld32u_i64 tcg_gen_ld32u_i64_avr +#define tcg_gen_ld8s_i64 tcg_gen_ld8s_i64_avr +#define tcg_gen_ld8u_i64 tcg_gen_ld8u_i64_avr +#define tcg_gen_ld_i64 tcg_gen_ld_i64_avr +#define tcg_gen_mov_i64 tcg_gen_mov_i64_avr +#define tcg_gen_movi_i64 tcg_gen_movi_i64_avr +#define tcg_gen_mul_i64 tcg_gen_mul_i64_avr +#define tcg_gen_or_i64 tcg_gen_or_i64_avr +#define tcg_gen_sar_i64 tcg_gen_sar_i64_avr +#define tcg_gen_shl_i64 tcg_gen_shl_i64_avr +#define tcg_gen_shr_i64 tcg_gen_shr_i64_avr +#define tcg_gen_st_i64 tcg_gen_st_i64_avr +#define tcg_gen_xor_i64 tcg_gen_xor_i64_avr +#define cpu_icount_to_ns cpu_icount_to_ns_avr +#define cpu_is_stopped cpu_is_stopped_avr +#define cpu_get_ticks cpu_get_ticks_avr +#define cpu_get_clock cpu_get_clock_avr +#define cpu_resume cpu_resume_avr +#define qemu_init_vcpu qemu_init_vcpu_avr +#define cpu_stop_current cpu_stop_current_avr +#define resume_all_vcpus resume_all_vcpus_avr +#define vm_start vm_start_avr +#define address_space_dispatch_compact address_space_dispatch_compact_avr +#define flatview_translate flatview_translate_avr +#define flatview_copy flatview_copy_avr +#define address_space_translate_for_iotlb address_space_translate_for_iotlb_avr +#define qemu_get_cpu qemu_get_cpu_avr +#define cpu_address_space_init cpu_address_space_init_avr +#define cpu_get_address_space cpu_get_address_space_avr +#define cpu_exec_unrealizefn cpu_exec_unrealizefn_avr +#define cpu_exec_initfn cpu_exec_initfn_avr +#define cpu_exec_realizefn cpu_exec_realizefn_avr +#define tb_invalidate_phys_addr tb_invalidate_phys_addr_avr +#define cpu_watchpoint_insert cpu_watchpoint_insert_avr +#define cpu_watchpoint_remove_by_ref cpu_watchpoint_remove_by_ref_avr +#define cpu_watchpoint_remove_all cpu_watchpoint_remove_all_avr +#define cpu_watchpoint_address_matches cpu_watchpoint_address_matches_avr +#define cpu_breakpoint_insert cpu_breakpoint_insert_avr +#define cpu_breakpoint_remove cpu_breakpoint_remove_avr +#define cpu_breakpoint_remove_by_ref cpu_breakpoint_remove_by_ref_avr +#define cpu_breakpoint_remove_all cpu_breakpoint_remove_all_avr +#define cpu_abort cpu_abort_avr +#define cpu_physical_memory_test_and_clear_dirty cpu_physical_memory_test_and_clear_dirty_avr +#define memory_region_section_get_iotlb memory_region_section_get_iotlb_avr +#define flatview_add_to_dispatch flatview_add_to_dispatch_avr +#define qemu_ram_get_host_addr qemu_ram_get_host_addr_avr +#define qemu_ram_get_offset qemu_ram_get_offset_avr +#define qemu_ram_get_used_length qemu_ram_get_used_length_avr +#define qemu_ram_is_shared qemu_ram_is_shared_avr +#define qemu_ram_pagesize qemu_ram_pagesize_avr +#define qemu_ram_alloc_from_ptr qemu_ram_alloc_from_ptr_avr +#define qemu_ram_alloc qemu_ram_alloc_avr +#define qemu_ram_free qemu_ram_free_avr +#define qemu_map_ram_ptr qemu_map_ram_ptr_avr +#define qemu_ram_block_host_offset qemu_ram_block_host_offset_avr +#define qemu_ram_block_from_host qemu_ram_block_from_host_avr +#define qemu_ram_addr_from_host qemu_ram_addr_from_host_avr +#define cpu_check_watchpoint cpu_check_watchpoint_avr +#define iotlb_to_section iotlb_to_section_avr +#define address_space_dispatch_new address_space_dispatch_new_avr +#define address_space_dispatch_free address_space_dispatch_free_avr +#define address_space_dispatch_clear address_space_dispatch_clear_avr +#define flatview_read_continue flatview_read_continue_avr +#define address_space_read_full address_space_read_full_avr +#define address_space_write address_space_write_avr +#define address_space_rw address_space_rw_avr +#define cpu_physical_memory_rw cpu_physical_memory_rw_avr +#define address_space_write_rom address_space_write_rom_avr +#define cpu_flush_icache_range cpu_flush_icache_range_avr +#define cpu_exec_init_all cpu_exec_init_all_avr +#define address_space_access_valid address_space_access_valid_avr +#define address_space_map address_space_map_avr +#define address_space_unmap address_space_unmap_avr +#define cpu_physical_memory_map cpu_physical_memory_map_avr +#define cpu_physical_memory_unmap cpu_physical_memory_unmap_avr +#define cpu_memory_rw_debug cpu_memory_rw_debug_avr +#define qemu_target_page_size qemu_target_page_size_avr +#define qemu_target_page_bits qemu_target_page_bits_avr +#define qemu_target_page_bits_min qemu_target_page_bits_min_avr +#define target_words_bigendian target_words_bigendian_avr +#define cpu_physical_memory_is_io cpu_physical_memory_is_io_avr +#define ram_block_discard_range ram_block_discard_range_avr +#define ramblock_is_pmem ramblock_is_pmem_avr +#define page_size_init page_size_init_avr +#define set_preferred_target_page_bits set_preferred_target_page_bits_avr +#define finalize_target_page_bits finalize_target_page_bits_avr +#define cpu_outb cpu_outb_avr +#define cpu_outw cpu_outw_avr +#define cpu_outl cpu_outl_avr +#define cpu_inb cpu_inb_avr +#define cpu_inw cpu_inw_avr +#define cpu_inl cpu_inl_avr +#define memory_map memory_map_avr +#define memory_map_io memory_map_io_avr +#define memory_map_ptr memory_map_ptr_avr +#define memory_unmap memory_unmap_avr +#define memory_free memory_free_avr +#define flatview_unref flatview_unref_avr +#define address_space_get_flatview address_space_get_flatview_avr +#define memory_region_transaction_begin memory_region_transaction_begin_avr +#define memory_region_transaction_commit memory_region_transaction_commit_avr +#define memory_region_init memory_region_init_avr +#define memory_region_access_valid memory_region_access_valid_avr +#define memory_region_dispatch_read memory_region_dispatch_read_avr +#define memory_region_dispatch_write memory_region_dispatch_write_avr +#define memory_region_init_io memory_region_init_io_avr +#define memory_region_init_ram_ptr memory_region_init_ram_ptr_avr +#define memory_region_size memory_region_size_avr +#define memory_region_set_readonly memory_region_set_readonly_avr +#define memory_region_get_ram_ptr memory_region_get_ram_ptr_avr +#define memory_region_from_host memory_region_from_host_avr +#define memory_region_get_ram_addr memory_region_get_ram_addr_avr +#define memory_region_add_subregion memory_region_add_subregion_avr +#define memory_region_del_subregion memory_region_del_subregion_avr +#define memory_region_find memory_region_find_avr +#define memory_listener_register memory_listener_register_avr +#define memory_listener_unregister memory_listener_unregister_avr +#define address_space_remove_listeners address_space_remove_listeners_avr +#define address_space_init address_space_init_avr +#define address_space_destroy address_space_destroy_avr +#define memory_region_init_ram memory_region_init_ram_avr +#define memory_mapping_list_add_merge_sorted memory_mapping_list_add_merge_sorted_avr +#define exec_inline_op exec_inline_op_avr +#define floatx80_default_nan floatx80_default_nan_avr +#define float_raise float_raise_avr +#define float16_is_quiet_nan float16_is_quiet_nan_avr +#define float16_is_signaling_nan float16_is_signaling_nan_avr +#define float32_is_quiet_nan float32_is_quiet_nan_avr +#define float32_is_signaling_nan float32_is_signaling_nan_avr +#define float64_is_quiet_nan float64_is_quiet_nan_avr +#define float64_is_signaling_nan float64_is_signaling_nan_avr +#define floatx80_is_quiet_nan floatx80_is_quiet_nan_avr +#define floatx80_is_signaling_nan floatx80_is_signaling_nan_avr +#define floatx80_silence_nan floatx80_silence_nan_avr +#define propagateFloatx80NaN propagateFloatx80NaN_avr +#define float128_is_quiet_nan float128_is_quiet_nan_avr +#define float128_is_signaling_nan float128_is_signaling_nan_avr +#define float128_silence_nan float128_silence_nan_avr +#define float16_add float16_add_avr +#define float16_sub float16_sub_avr +#define float32_add float32_add_avr +#define float32_sub float32_sub_avr +#define float64_add float64_add_avr +#define float64_sub float64_sub_avr +#define float16_mul float16_mul_avr +#define float32_mul float32_mul_avr +#define float64_mul float64_mul_avr +#define float16_muladd float16_muladd_avr +#define float32_muladd float32_muladd_avr +#define float64_muladd float64_muladd_avr +#define float16_div float16_div_avr +#define float32_div float32_div_avr +#define float64_div float64_div_avr +#define float16_to_float32 float16_to_float32_avr +#define float16_to_float64 float16_to_float64_avr +#define float32_to_float16 float32_to_float16_avr +#define float32_to_float64 float32_to_float64_avr +#define float64_to_float16 float64_to_float16_avr +#define float64_to_float32 float64_to_float32_avr +#define float16_round_to_int float16_round_to_int_avr +#define float32_round_to_int float32_round_to_int_avr +#define float64_round_to_int float64_round_to_int_avr +#define float16_to_int16_scalbn float16_to_int16_scalbn_avr +#define float16_to_int32_scalbn float16_to_int32_scalbn_avr +#define float16_to_int64_scalbn float16_to_int64_scalbn_avr +#define float32_to_int16_scalbn float32_to_int16_scalbn_avr +#define float32_to_int32_scalbn float32_to_int32_scalbn_avr +#define float32_to_int64_scalbn float32_to_int64_scalbn_avr +#define float64_to_int16_scalbn float64_to_int16_scalbn_avr +#define float64_to_int32_scalbn float64_to_int32_scalbn_avr +#define float64_to_int64_scalbn float64_to_int64_scalbn_avr +#define float16_to_int16 float16_to_int16_avr +#define float16_to_int32 float16_to_int32_avr +#define float16_to_int64 float16_to_int64_avr +#define float32_to_int16 float32_to_int16_avr +#define float32_to_int32 float32_to_int32_avr +#define float32_to_int64 float32_to_int64_avr +#define float64_to_int16 float64_to_int16_avr +#define float64_to_int32 float64_to_int32_avr +#define float64_to_int64 float64_to_int64_avr +#define float16_to_int16_round_to_zero float16_to_int16_round_to_zero_avr +#define float16_to_int32_round_to_zero float16_to_int32_round_to_zero_avr +#define float16_to_int64_round_to_zero float16_to_int64_round_to_zero_avr +#define float32_to_int16_round_to_zero float32_to_int16_round_to_zero_avr +#define float32_to_int32_round_to_zero float32_to_int32_round_to_zero_avr +#define float32_to_int64_round_to_zero float32_to_int64_round_to_zero_avr +#define float64_to_int16_round_to_zero float64_to_int16_round_to_zero_avr +#define float64_to_int32_round_to_zero float64_to_int32_round_to_zero_avr +#define float64_to_int64_round_to_zero float64_to_int64_round_to_zero_avr +#define float16_to_uint16_scalbn float16_to_uint16_scalbn_avr +#define float16_to_uint32_scalbn float16_to_uint32_scalbn_avr +#define float16_to_uint64_scalbn float16_to_uint64_scalbn_avr +#define float32_to_uint16_scalbn float32_to_uint16_scalbn_avr +#define float32_to_uint32_scalbn float32_to_uint32_scalbn_avr +#define float32_to_uint64_scalbn float32_to_uint64_scalbn_avr +#define float64_to_uint16_scalbn float64_to_uint16_scalbn_avr +#define float64_to_uint32_scalbn float64_to_uint32_scalbn_avr +#define float64_to_uint64_scalbn float64_to_uint64_scalbn_avr +#define float16_to_uint16 float16_to_uint16_avr +#define float16_to_uint32 float16_to_uint32_avr +#define float16_to_uint64 float16_to_uint64_avr +#define float32_to_uint16 float32_to_uint16_avr +#define float32_to_uint32 float32_to_uint32_avr +#define float32_to_uint64 float32_to_uint64_avr +#define float64_to_uint16 float64_to_uint16_avr +#define float64_to_uint32 float64_to_uint32_avr +#define float64_to_uint64 float64_to_uint64_avr +#define float16_to_uint16_round_to_zero float16_to_uint16_round_to_zero_avr +#define float16_to_uint32_round_to_zero float16_to_uint32_round_to_zero_avr +#define float16_to_uint64_round_to_zero float16_to_uint64_round_to_zero_avr +#define float32_to_uint16_round_to_zero float32_to_uint16_round_to_zero_avr +#define float32_to_uint32_round_to_zero float32_to_uint32_round_to_zero_avr +#define float32_to_uint64_round_to_zero float32_to_uint64_round_to_zero_avr +#define float64_to_uint16_round_to_zero float64_to_uint16_round_to_zero_avr +#define float64_to_uint32_round_to_zero float64_to_uint32_round_to_zero_avr +#define float64_to_uint64_round_to_zero float64_to_uint64_round_to_zero_avr +#define int64_to_float16_scalbn int64_to_float16_scalbn_avr +#define int32_to_float16_scalbn int32_to_float16_scalbn_avr +#define int16_to_float16_scalbn int16_to_float16_scalbn_avr +#define int64_to_float16 int64_to_float16_avr +#define int32_to_float16 int32_to_float16_avr +#define int16_to_float16 int16_to_float16_avr +#define int64_to_float32_scalbn int64_to_float32_scalbn_avr +#define int32_to_float32_scalbn int32_to_float32_scalbn_avr +#define int16_to_float32_scalbn int16_to_float32_scalbn_avr +#define int64_to_float32 int64_to_float32_avr +#define int32_to_float32 int32_to_float32_avr +#define int16_to_float32 int16_to_float32_avr +#define int64_to_float64_scalbn int64_to_float64_scalbn_avr +#define int32_to_float64_scalbn int32_to_float64_scalbn_avr +#define int16_to_float64_scalbn int16_to_float64_scalbn_avr +#define int64_to_float64 int64_to_float64_avr +#define int32_to_float64 int32_to_float64_avr +#define int16_to_float64 int16_to_float64_avr +#define uint64_to_float16_scalbn uint64_to_float16_scalbn_avr +#define uint32_to_float16_scalbn uint32_to_float16_scalbn_avr +#define uint16_to_float16_scalbn uint16_to_float16_scalbn_avr +#define uint64_to_float16 uint64_to_float16_avr +#define uint32_to_float16 uint32_to_float16_avr +#define uint16_to_float16 uint16_to_float16_avr +#define uint64_to_float32_scalbn uint64_to_float32_scalbn_avr +#define uint32_to_float32_scalbn uint32_to_float32_scalbn_avr +#define uint16_to_float32_scalbn uint16_to_float32_scalbn_avr +#define uint64_to_float32 uint64_to_float32_avr +#define uint32_to_float32 uint32_to_float32_avr +#define uint16_to_float32 uint16_to_float32_avr +#define uint64_to_float64_scalbn uint64_to_float64_scalbn_avr +#define uint32_to_float64_scalbn uint32_to_float64_scalbn_avr +#define uint16_to_float64_scalbn uint16_to_float64_scalbn_avr +#define uint64_to_float64 uint64_to_float64_avr +#define uint32_to_float64 uint32_to_float64_avr +#define uint16_to_float64 uint16_to_float64_avr +#define float16_min float16_min_avr +#define float16_minnum float16_minnum_avr +#define float16_minnummag float16_minnummag_avr +#define float16_max float16_max_avr +#define float16_maxnum float16_maxnum_avr +#define float16_maxnummag float16_maxnummag_avr +#define float32_min float32_min_avr +#define float32_minnum float32_minnum_avr +#define float32_minnummag float32_minnummag_avr +#define float32_max float32_max_avr +#define float32_maxnum float32_maxnum_avr +#define float32_maxnummag float32_maxnummag_avr +#define float64_min float64_min_avr +#define float64_minnum float64_minnum_avr +#define float64_minnummag float64_minnummag_avr +#define float64_max float64_max_avr +#define float64_maxnum float64_maxnum_avr +#define float64_maxnummag float64_maxnummag_avr +#define float16_compare float16_compare_avr +#define float16_compare_quiet float16_compare_quiet_avr +#define float32_compare float32_compare_avr +#define float32_compare_quiet float32_compare_quiet_avr +#define float64_compare float64_compare_avr +#define float64_compare_quiet float64_compare_quiet_avr +#define float16_scalbn float16_scalbn_avr +#define float32_scalbn float32_scalbn_avr +#define float64_scalbn float64_scalbn_avr +#define float16_sqrt float16_sqrt_avr +#define float32_sqrt float32_sqrt_avr +#define float64_sqrt float64_sqrt_avr +#define float16_default_nan float16_default_nan_avr +#define float32_default_nan float32_default_nan_avr +#define float64_default_nan float64_default_nan_avr +#define float128_default_nan float128_default_nan_avr +#define float16_silence_nan float16_silence_nan_avr +#define float32_silence_nan float32_silence_nan_avr +#define float64_silence_nan float64_silence_nan_avr +#define float16_squash_input_denormal float16_squash_input_denormal_avr +#define float32_squash_input_denormal float32_squash_input_denormal_avr +#define float64_squash_input_denormal float64_squash_input_denormal_avr +#define normalizeFloatx80Subnormal normalizeFloatx80Subnormal_avr +#define roundAndPackFloatx80 roundAndPackFloatx80_avr +#define normalizeRoundAndPackFloatx80 normalizeRoundAndPackFloatx80_avr +#define int32_to_floatx80 int32_to_floatx80_avr +#define int32_to_float128 int32_to_float128_avr +#define int64_to_floatx80 int64_to_floatx80_avr +#define int64_to_float128 int64_to_float128_avr +#define uint64_to_float128 uint64_to_float128_avr +#define float32_to_floatx80 float32_to_floatx80_avr +#define float32_to_float128 float32_to_float128_avr +#define float32_rem float32_rem_avr +#define float32_exp2 float32_exp2_avr +#define float32_log2 float32_log2_avr +#define float32_eq float32_eq_avr +#define float32_le float32_le_avr +#define float32_lt float32_lt_avr +#define float32_unordered float32_unordered_avr +#define float32_eq_quiet float32_eq_quiet_avr +#define float32_le_quiet float32_le_quiet_avr +#define float32_lt_quiet float32_lt_quiet_avr +#define float32_unordered_quiet float32_unordered_quiet_avr +#define float64_to_floatx80 float64_to_floatx80_avr +#define float64_to_float128 float64_to_float128_avr +#define float64_rem float64_rem_avr +#define float64_log2 float64_log2_avr +#define float64_eq float64_eq_avr +#define float64_le float64_le_avr +#define float64_lt float64_lt_avr +#define float64_unordered float64_unordered_avr +#define float64_eq_quiet float64_eq_quiet_avr +#define float64_le_quiet float64_le_quiet_avr +#define float64_lt_quiet float64_lt_quiet_avr +#define float64_unordered_quiet float64_unordered_quiet_avr +#define floatx80_to_int32 floatx80_to_int32_avr +#define floatx80_to_int32_round_to_zero floatx80_to_int32_round_to_zero_avr +#define floatx80_to_int64 floatx80_to_int64_avr +#define floatx80_to_int64_round_to_zero floatx80_to_int64_round_to_zero_avr +#define floatx80_to_float32 floatx80_to_float32_avr +#define floatx80_to_float64 floatx80_to_float64_avr +#define floatx80_to_float128 floatx80_to_float128_avr +#define floatx80_round floatx80_round_avr +#define floatx80_round_to_int floatx80_round_to_int_avr +#define floatx80_add floatx80_add_avr +#define floatx80_sub floatx80_sub_avr +#define floatx80_mul floatx80_mul_avr +#define floatx80_div floatx80_div_avr +#define floatx80_rem floatx80_rem_avr +#define floatx80_sqrt floatx80_sqrt_avr +#define floatx80_eq floatx80_eq_avr +#define floatx80_le floatx80_le_avr +#define floatx80_lt floatx80_lt_avr +#define floatx80_unordered floatx80_unordered_avr +#define floatx80_eq_quiet floatx80_eq_quiet_avr +#define floatx80_le_quiet floatx80_le_quiet_avr +#define floatx80_lt_quiet floatx80_lt_quiet_avr +#define floatx80_unordered_quiet floatx80_unordered_quiet_avr +#define float128_to_int32 float128_to_int32_avr +#define float128_to_int32_round_to_zero float128_to_int32_round_to_zero_avr +#define float128_to_int64 float128_to_int64_avr +#define float128_to_int64_round_to_zero float128_to_int64_round_to_zero_avr +#define float128_to_uint64 float128_to_uint64_avr +#define float128_to_uint64_round_to_zero float128_to_uint64_round_to_zero_avr +#define float128_to_uint32_round_to_zero float128_to_uint32_round_to_zero_avr +#define float128_to_uint32 float128_to_uint32_avr +#define float128_to_float32 float128_to_float32_avr +#define float128_to_float64 float128_to_float64_avr +#define float128_to_floatx80 float128_to_floatx80_avr +#define float128_round_to_int float128_round_to_int_avr +#define float128_add float128_add_avr +#define float128_sub float128_sub_avr +#define float128_mul float128_mul_avr +#define float128_div float128_div_avr +#define float128_rem float128_rem_avr +#define float128_sqrt float128_sqrt_avr +#define float128_eq float128_eq_avr +#define float128_le float128_le_avr +#define float128_lt float128_lt_avr +#define float128_unordered float128_unordered_avr +#define float128_eq_quiet float128_eq_quiet_avr +#define float128_le_quiet float128_le_quiet_avr +#define float128_lt_quiet float128_lt_quiet_avr +#define float128_unordered_quiet float128_unordered_quiet_avr +#define floatx80_compare floatx80_compare_avr +#define floatx80_compare_quiet floatx80_compare_quiet_avr +#define float128_compare float128_compare_avr +#define float128_compare_quiet float128_compare_quiet_avr +#define floatx80_scalbn floatx80_scalbn_avr +#define float128_scalbn float128_scalbn_avr +#define softfloat_init softfloat_init_avr +#define tcg_optimize tcg_optimize_avr +#define gen_new_label gen_new_label_avr +#define tcg_can_emit_vec_op tcg_can_emit_vec_op_avr +#define tcg_expand_vec_op tcg_expand_vec_op_avr +#define tcg_register_jit tcg_register_jit_avr +#define tcg_tb_insert tcg_tb_insert_avr +#define tcg_tb_remove tcg_tb_remove_avr +#define tcg_tb_lookup tcg_tb_lookup_avr +#define tcg_tb_foreach tcg_tb_foreach_avr +#define tcg_nb_tbs tcg_nb_tbs_avr +#define tcg_region_reset_all tcg_region_reset_all_avr +#define tcg_region_init tcg_region_init_avr +#define tcg_code_size tcg_code_size_avr +#define tcg_code_capacity tcg_code_capacity_avr +#define tcg_tb_phys_invalidate_count tcg_tb_phys_invalidate_count_avr +#define tcg_malloc_internal tcg_malloc_internal_avr +#define tcg_pool_reset tcg_pool_reset_avr +#define tcg_context_init tcg_context_init_avr +#define tcg_tb_alloc tcg_tb_alloc_avr +#define tcg_prologue_init tcg_prologue_init_avr +#define tcg_func_start tcg_func_start_avr +#define tcg_set_frame tcg_set_frame_avr +#define tcg_global_mem_new_internal tcg_global_mem_new_internal_avr +#define tcg_temp_new_internal tcg_temp_new_internal_avr +#define tcg_temp_new_vec tcg_temp_new_vec_avr +#define tcg_temp_new_vec_matching tcg_temp_new_vec_matching_avr +#define tcg_temp_free_internal tcg_temp_free_internal_avr +#define tcg_const_i32 tcg_const_i32_avr +#define tcg_const_i64 tcg_const_i64_avr +#define tcg_const_local_i32 tcg_const_local_i32_avr +#define tcg_const_local_i64 tcg_const_local_i64_avr +#define tcg_op_supported tcg_op_supported_avr +#define tcg_gen_callN tcg_gen_callN_avr +#define tcg_op_remove tcg_op_remove_avr +#define tcg_emit_op tcg_emit_op_avr +#define tcg_op_insert_before tcg_op_insert_before_avr +#define tcg_op_insert_after tcg_op_insert_after_avr +#define tcg_cpu_exec_time tcg_cpu_exec_time_avr +#define tcg_gen_code tcg_gen_code_avr +#define tcg_gen_op1 tcg_gen_op1_avr +#define tcg_gen_op2 tcg_gen_op2_avr +#define tcg_gen_op3 tcg_gen_op3_avr +#define tcg_gen_op4 tcg_gen_op4_avr +#define tcg_gen_op5 tcg_gen_op5_avr +#define tcg_gen_op6 tcg_gen_op6_avr +#define tcg_gen_mb tcg_gen_mb_avr +#define tcg_gen_addi_i32 tcg_gen_addi_i32_avr +#define tcg_gen_subfi_i32 tcg_gen_subfi_i32_avr +#define tcg_gen_subi_i32 tcg_gen_subi_i32_avr +#define tcg_gen_andi_i32 tcg_gen_andi_i32_avr +#define tcg_gen_ori_i32 tcg_gen_ori_i32_avr +#define tcg_gen_xori_i32 tcg_gen_xori_i32_avr +#define tcg_gen_shli_i32 tcg_gen_shli_i32_avr +#define tcg_gen_shri_i32 tcg_gen_shri_i32_avr +#define tcg_gen_sari_i32 tcg_gen_sari_i32_avr +#define tcg_gen_brcond_i32 tcg_gen_brcond_i32_avr +#define tcg_gen_brcondi_i32 tcg_gen_brcondi_i32_avr +#define tcg_gen_setcond_i32 tcg_gen_setcond_i32_avr +#define tcg_gen_setcondi_i32 tcg_gen_setcondi_i32_avr +#define tcg_gen_muli_i32 tcg_gen_muli_i32_avr +#define tcg_gen_div_i32 tcg_gen_div_i32_avr +#define tcg_gen_rem_i32 tcg_gen_rem_i32_avr +#define tcg_gen_divu_i32 tcg_gen_divu_i32_avr +#define tcg_gen_remu_i32 tcg_gen_remu_i32_avr +#define tcg_gen_andc_i32 tcg_gen_andc_i32_avr +#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_avr +#define tcg_gen_nand_i32 tcg_gen_nand_i32_avr +#define tcg_gen_nor_i32 tcg_gen_nor_i32_avr +#define tcg_gen_orc_i32 tcg_gen_orc_i32_avr +#define tcg_gen_clz_i32 tcg_gen_clz_i32_avr +#define tcg_gen_clzi_i32 tcg_gen_clzi_i32_avr +#define tcg_gen_ctz_i32 tcg_gen_ctz_i32_avr +#define tcg_gen_ctzi_i32 tcg_gen_ctzi_i32_avr +#define tcg_gen_clrsb_i32 tcg_gen_clrsb_i32_avr +#define tcg_gen_ctpop_i32 tcg_gen_ctpop_i32_avr +#define tcg_gen_rotl_i32 tcg_gen_rotl_i32_avr +#define tcg_gen_rotli_i32 tcg_gen_rotli_i32_avr +#define tcg_gen_rotr_i32 tcg_gen_rotr_i32_avr +#define tcg_gen_rotri_i32 tcg_gen_rotri_i32_avr +#define tcg_gen_deposit_i32 tcg_gen_deposit_i32_avr +#define tcg_gen_deposit_z_i32 tcg_gen_deposit_z_i32_avr +#define tcg_gen_extract_i32 tcg_gen_extract_i32_avr +#define tcg_gen_sextract_i32 tcg_gen_sextract_i32_avr +#define tcg_gen_extract2_i32 tcg_gen_extract2_i32_avr +#define tcg_gen_movcond_i32 tcg_gen_movcond_i32_avr +#define tcg_gen_add2_i32 tcg_gen_add2_i32_avr +#define tcg_gen_sub2_i32 tcg_gen_sub2_i32_avr +#define tcg_gen_mulu2_i32 tcg_gen_mulu2_i32_avr +#define tcg_gen_muls2_i32 tcg_gen_muls2_i32_avr +#define tcg_gen_mulsu2_i32 tcg_gen_mulsu2_i32_avr +#define tcg_gen_ext8s_i32 tcg_gen_ext8s_i32_avr +#define tcg_gen_ext16s_i32 tcg_gen_ext16s_i32_avr +#define tcg_gen_ext8u_i32 tcg_gen_ext8u_i32_avr +#define tcg_gen_ext16u_i32 tcg_gen_ext16u_i32_avr +#define tcg_gen_bswap16_i32 tcg_gen_bswap16_i32_avr +#define tcg_gen_bswap32_i32 tcg_gen_bswap32_i32_avr +#define tcg_gen_smin_i32 tcg_gen_smin_i32_avr +#define tcg_gen_umin_i32 tcg_gen_umin_i32_avr +#define tcg_gen_smax_i32 tcg_gen_smax_i32_avr +#define tcg_gen_umax_i32 tcg_gen_umax_i32_avr +#define tcg_gen_abs_i32 tcg_gen_abs_i32_avr +#define tcg_gen_addi_i64 tcg_gen_addi_i64_avr +#define tcg_gen_subfi_i64 tcg_gen_subfi_i64_avr +#define tcg_gen_subi_i64 tcg_gen_subi_i64_avr +#define tcg_gen_andi_i64 tcg_gen_andi_i64_avr +#define tcg_gen_ori_i64 tcg_gen_ori_i64_avr +#define tcg_gen_xori_i64 tcg_gen_xori_i64_avr +#define tcg_gen_shli_i64 tcg_gen_shli_i64_avr +#define tcg_gen_shri_i64 tcg_gen_shri_i64_avr +#define tcg_gen_sari_i64 tcg_gen_sari_i64_avr +#define tcg_gen_brcond_i64 tcg_gen_brcond_i64_avr +#define tcg_gen_brcondi_i64 tcg_gen_brcondi_i64_avr +#define tcg_gen_setcond_i64 tcg_gen_setcond_i64_avr +#define tcg_gen_setcondi_i64 tcg_gen_setcondi_i64_avr +#define tcg_gen_muli_i64 tcg_gen_muli_i64_avr +#define tcg_gen_div_i64 tcg_gen_div_i64_avr +#define tcg_gen_rem_i64 tcg_gen_rem_i64_avr +#define tcg_gen_divu_i64 tcg_gen_divu_i64_avr +#define tcg_gen_remu_i64 tcg_gen_remu_i64_avr +#define tcg_gen_ext8s_i64 tcg_gen_ext8s_i64_avr +#define tcg_gen_ext16s_i64 tcg_gen_ext16s_i64_avr +#define tcg_gen_ext32s_i64 tcg_gen_ext32s_i64_avr +#define tcg_gen_ext8u_i64 tcg_gen_ext8u_i64_avr +#define tcg_gen_ext16u_i64 tcg_gen_ext16u_i64_avr +#define tcg_gen_ext32u_i64 tcg_gen_ext32u_i64_avr +#define tcg_gen_bswap16_i64 tcg_gen_bswap16_i64_avr +#define tcg_gen_bswap32_i64 tcg_gen_bswap32_i64_avr +#define tcg_gen_bswap64_i64 tcg_gen_bswap64_i64_avr +#define tcg_gen_not_i64 tcg_gen_not_i64_avr +#define tcg_gen_andc_i64 tcg_gen_andc_i64_avr +#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_avr +#define tcg_gen_nand_i64 tcg_gen_nand_i64_avr +#define tcg_gen_nor_i64 tcg_gen_nor_i64_avr +#define tcg_gen_orc_i64 tcg_gen_orc_i64_avr +#define tcg_gen_clz_i64 tcg_gen_clz_i64_avr +#define tcg_gen_clzi_i64 tcg_gen_clzi_i64_avr +#define tcg_gen_ctz_i64 tcg_gen_ctz_i64_avr +#define tcg_gen_ctzi_i64 tcg_gen_ctzi_i64_avr +#define tcg_gen_clrsb_i64 tcg_gen_clrsb_i64_avr +#define tcg_gen_ctpop_i64 tcg_gen_ctpop_i64_avr +#define tcg_gen_rotl_i64 tcg_gen_rotl_i64_avr +#define tcg_gen_rotli_i64 tcg_gen_rotli_i64_avr +#define tcg_gen_rotr_i64 tcg_gen_rotr_i64_avr +#define tcg_gen_rotri_i64 tcg_gen_rotri_i64_avr +#define tcg_gen_deposit_i64 tcg_gen_deposit_i64_avr +#define tcg_gen_deposit_z_i64 tcg_gen_deposit_z_i64_avr +#define tcg_gen_extract_i64 tcg_gen_extract_i64_avr +#define tcg_gen_sextract_i64 tcg_gen_sextract_i64_avr +#define tcg_gen_extract2_i64 tcg_gen_extract2_i64_avr +#define tcg_gen_movcond_i64 tcg_gen_movcond_i64_avr +#define tcg_gen_add2_i64 tcg_gen_add2_i64_avr +#define tcg_gen_sub2_i64 tcg_gen_sub2_i64_avr +#define tcg_gen_mulu2_i64 tcg_gen_mulu2_i64_avr +#define tcg_gen_muls2_i64 tcg_gen_muls2_i64_avr +#define tcg_gen_mulsu2_i64 tcg_gen_mulsu2_i64_avr +#define tcg_gen_smin_i64 tcg_gen_smin_i64_avr +#define tcg_gen_umin_i64 tcg_gen_umin_i64_avr +#define tcg_gen_smax_i64 tcg_gen_smax_i64_avr +#define tcg_gen_umax_i64 tcg_gen_umax_i64_avr +#define tcg_gen_abs_i64 tcg_gen_abs_i64_avr +#define tcg_gen_extrl_i64_i32 tcg_gen_extrl_i64_i32_avr +#define tcg_gen_extrh_i64_i32 tcg_gen_extrh_i64_i32_avr +#define tcg_gen_extu_i32_i64 tcg_gen_extu_i32_i64_avr +#define tcg_gen_ext_i32_i64 tcg_gen_ext_i32_i64_avr +#define tcg_gen_concat_i32_i64 tcg_gen_concat_i32_i64_avr +#define tcg_gen_extr_i64_i32 tcg_gen_extr_i64_i32_avr +#define tcg_gen_extr32_i64 tcg_gen_extr32_i64_avr +#define tcg_gen_exit_tb tcg_gen_exit_tb_avr +#define tcg_gen_goto_tb tcg_gen_goto_tb_avr +#define tcg_gen_lookup_and_goto_ptr tcg_gen_lookup_and_goto_ptr_avr +#define check_exit_request check_exit_request_avr +#define tcg_gen_qemu_ld_i32 tcg_gen_qemu_ld_i32_avr +#define tcg_gen_qemu_st_i32 tcg_gen_qemu_st_i32_avr +#define tcg_gen_qemu_ld_i64 tcg_gen_qemu_ld_i64_avr +#define tcg_gen_qemu_st_i64 tcg_gen_qemu_st_i64_avr +#define tcg_gen_atomic_cmpxchg_i32 tcg_gen_atomic_cmpxchg_i32_avr +#define tcg_gen_atomic_cmpxchg_i64 tcg_gen_atomic_cmpxchg_i64_avr +#define tcg_gen_atomic_fetch_add_i32 tcg_gen_atomic_fetch_add_i32_avr +#define tcg_gen_atomic_fetch_add_i64 tcg_gen_atomic_fetch_add_i64_avr +#define tcg_gen_atomic_fetch_and_i32 tcg_gen_atomic_fetch_and_i32_avr +#define tcg_gen_atomic_fetch_and_i64 tcg_gen_atomic_fetch_and_i64_avr +#define tcg_gen_atomic_fetch_or_i32 tcg_gen_atomic_fetch_or_i32_avr +#define tcg_gen_atomic_fetch_or_i64 tcg_gen_atomic_fetch_or_i64_avr +#define tcg_gen_atomic_fetch_xor_i32 tcg_gen_atomic_fetch_xor_i32_avr +#define tcg_gen_atomic_fetch_xor_i64 tcg_gen_atomic_fetch_xor_i64_avr +#define tcg_gen_atomic_fetch_smin_i32 tcg_gen_atomic_fetch_smin_i32_avr +#define tcg_gen_atomic_fetch_smin_i64 tcg_gen_atomic_fetch_smin_i64_avr +#define tcg_gen_atomic_fetch_umin_i32 tcg_gen_atomic_fetch_umin_i32_avr +#define tcg_gen_atomic_fetch_umin_i64 tcg_gen_atomic_fetch_umin_i64_avr +#define tcg_gen_atomic_fetch_smax_i32 tcg_gen_atomic_fetch_smax_i32_avr +#define tcg_gen_atomic_fetch_smax_i64 tcg_gen_atomic_fetch_smax_i64_avr +#define tcg_gen_atomic_fetch_umax_i32 tcg_gen_atomic_fetch_umax_i32_avr +#define tcg_gen_atomic_fetch_umax_i64 tcg_gen_atomic_fetch_umax_i64_avr +#define tcg_gen_atomic_add_fetch_i32 tcg_gen_atomic_add_fetch_i32_avr +#define tcg_gen_atomic_add_fetch_i64 tcg_gen_atomic_add_fetch_i64_avr +#define tcg_gen_atomic_and_fetch_i32 tcg_gen_atomic_and_fetch_i32_avr +#define tcg_gen_atomic_and_fetch_i64 tcg_gen_atomic_and_fetch_i64_avr +#define tcg_gen_atomic_or_fetch_i32 tcg_gen_atomic_or_fetch_i32_avr +#define tcg_gen_atomic_or_fetch_i64 tcg_gen_atomic_or_fetch_i64_avr +#define tcg_gen_atomic_xor_fetch_i32 tcg_gen_atomic_xor_fetch_i32_avr +#define tcg_gen_atomic_xor_fetch_i64 tcg_gen_atomic_xor_fetch_i64_avr +#define tcg_gen_atomic_smin_fetch_i32 tcg_gen_atomic_smin_fetch_i32_avr +#define tcg_gen_atomic_smin_fetch_i64 tcg_gen_atomic_smin_fetch_i64_avr +#define tcg_gen_atomic_umin_fetch_i32 tcg_gen_atomic_umin_fetch_i32_avr +#define tcg_gen_atomic_umin_fetch_i64 tcg_gen_atomic_umin_fetch_i64_avr +#define tcg_gen_atomic_smax_fetch_i32 tcg_gen_atomic_smax_fetch_i32_avr +#define tcg_gen_atomic_smax_fetch_i64 tcg_gen_atomic_smax_fetch_i64_avr +#define tcg_gen_atomic_umax_fetch_i32 tcg_gen_atomic_umax_fetch_i32_avr +#define tcg_gen_atomic_umax_fetch_i64 tcg_gen_atomic_umax_fetch_i64_avr +#define tcg_gen_atomic_xchg_i32 tcg_gen_atomic_xchg_i32_avr +#define tcg_gen_atomic_xchg_i64 tcg_gen_atomic_xchg_i64_avr +#define simd_desc simd_desc_avr +#define tcg_gen_gvec_2_ool tcg_gen_gvec_2_ool_avr +#define tcg_gen_gvec_2i_ool tcg_gen_gvec_2i_ool_avr +#define tcg_gen_gvec_3_ool tcg_gen_gvec_3_ool_avr +#define tcg_gen_gvec_4_ool tcg_gen_gvec_4_ool_avr +#define tcg_gen_gvec_5_ool tcg_gen_gvec_5_ool_avr +#define tcg_gen_gvec_2_ptr tcg_gen_gvec_2_ptr_avr +#define tcg_gen_gvec_3_ptr tcg_gen_gvec_3_ptr_avr +#define tcg_gen_gvec_4_ptr tcg_gen_gvec_4_ptr_avr +#define tcg_gen_gvec_5_ptr tcg_gen_gvec_5_ptr_avr +#define tcg_gen_gvec_2 tcg_gen_gvec_2_avr +#define tcg_gen_gvec_2i tcg_gen_gvec_2i_avr +#define tcg_gen_gvec_2s tcg_gen_gvec_2s_avr +#define tcg_gen_gvec_3 tcg_gen_gvec_3_avr +#define tcg_gen_gvec_3i tcg_gen_gvec_3i_avr +#define tcg_gen_gvec_4 tcg_gen_gvec_4_avr +#define tcg_gen_gvec_mov tcg_gen_gvec_mov_avr +#define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_avr +#define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_avr +#define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_avr +#define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_avr +#define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_avr +#define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_avr +#define tcg_gen_gvec_dup8i tcg_gen_gvec_dup8i_avr +#define tcg_gen_gvec_not tcg_gen_gvec_not_avr +#define tcg_gen_vec_add8_i64 tcg_gen_vec_add8_i64_avr +#define tcg_gen_vec_add16_i64 tcg_gen_vec_add16_i64_avr +#define tcg_gen_vec_add32_i64 tcg_gen_vec_add32_i64_avr +#define tcg_gen_gvec_add tcg_gen_gvec_add_avr +#define tcg_gen_gvec_adds tcg_gen_gvec_adds_avr +#define tcg_gen_gvec_addi tcg_gen_gvec_addi_avr +#define tcg_gen_gvec_subs tcg_gen_gvec_subs_avr +#define tcg_gen_vec_sub8_i64 tcg_gen_vec_sub8_i64_avr +#define tcg_gen_vec_sub16_i64 tcg_gen_vec_sub16_i64_avr +#define tcg_gen_vec_sub32_i64 tcg_gen_vec_sub32_i64_avr +#define tcg_gen_gvec_sub tcg_gen_gvec_sub_avr +#define tcg_gen_gvec_mul tcg_gen_gvec_mul_avr +#define tcg_gen_gvec_muls tcg_gen_gvec_muls_avr +#define tcg_gen_gvec_muli tcg_gen_gvec_muli_avr +#define tcg_gen_gvec_ssadd tcg_gen_gvec_ssadd_avr +#define tcg_gen_gvec_sssub tcg_gen_gvec_sssub_avr +#define tcg_gen_gvec_usadd tcg_gen_gvec_usadd_avr +#define tcg_gen_gvec_ussub tcg_gen_gvec_ussub_avr +#define tcg_gen_gvec_smin tcg_gen_gvec_smin_avr +#define tcg_gen_gvec_umin tcg_gen_gvec_umin_avr +#define tcg_gen_gvec_smax tcg_gen_gvec_smax_avr +#define tcg_gen_gvec_umax tcg_gen_gvec_umax_avr +#define tcg_gen_vec_neg8_i64 tcg_gen_vec_neg8_i64_avr +#define tcg_gen_vec_neg16_i64 tcg_gen_vec_neg16_i64_avr +#define tcg_gen_vec_neg32_i64 tcg_gen_vec_neg32_i64_avr +#define tcg_gen_gvec_neg tcg_gen_gvec_neg_avr +#define tcg_gen_gvec_abs tcg_gen_gvec_abs_avr +#define tcg_gen_gvec_and tcg_gen_gvec_and_avr +#define tcg_gen_gvec_or tcg_gen_gvec_or_avr +#define tcg_gen_gvec_xor tcg_gen_gvec_xor_avr +#define tcg_gen_gvec_andc tcg_gen_gvec_andc_avr +#define tcg_gen_gvec_orc tcg_gen_gvec_orc_avr +#define tcg_gen_gvec_nand tcg_gen_gvec_nand_avr +#define tcg_gen_gvec_nor tcg_gen_gvec_nor_avr +#define tcg_gen_gvec_eqv tcg_gen_gvec_eqv_avr +#define tcg_gen_gvec_ands tcg_gen_gvec_ands_avr +#define tcg_gen_gvec_andi tcg_gen_gvec_andi_avr +#define tcg_gen_gvec_xors tcg_gen_gvec_xors_avr +#define tcg_gen_gvec_xori tcg_gen_gvec_xori_avr +#define tcg_gen_gvec_ors tcg_gen_gvec_ors_avr +#define tcg_gen_gvec_ori tcg_gen_gvec_ori_avr +#define tcg_gen_vec_shl8i_i64 tcg_gen_vec_shl8i_i64_avr +#define tcg_gen_vec_shl16i_i64 tcg_gen_vec_shl16i_i64_avr +#define tcg_gen_gvec_shli tcg_gen_gvec_shli_avr +#define tcg_gen_vec_shr8i_i64 tcg_gen_vec_shr8i_i64_avr +#define tcg_gen_vec_shr16i_i64 tcg_gen_vec_shr16i_i64_avr +#define tcg_gen_gvec_shri tcg_gen_gvec_shri_avr +#define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_avr +#define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_avr +#define tcg_gen_gvec_sari tcg_gen_gvec_sari_avr +#define tcg_gen_gvec_shls tcg_gen_gvec_shls_avr +#define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_avr +#define tcg_gen_gvec_sars tcg_gen_gvec_sars_avr +#define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_avr +#define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_avr +#define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_avr +#define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_avr +#define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_avr +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_avr +#define vec_gen_2 vec_gen_2_avr +#define vec_gen_3 vec_gen_3_avr +#define vec_gen_4 vec_gen_4_avr +#define tcg_gen_mov_vec tcg_gen_mov_vec_avr +#define tcg_const_zeros_vec tcg_const_zeros_vec_avr +#define tcg_const_ones_vec tcg_const_ones_vec_avr +#define tcg_const_zeros_vec_matching tcg_const_zeros_vec_matching_avr +#define tcg_const_ones_vec_matching tcg_const_ones_vec_matching_avr +#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_avr +#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_avr +#define tcg_gen_dup16i_vec tcg_gen_dup16i_vec_avr +#define tcg_gen_dup8i_vec tcg_gen_dup8i_vec_avr +#define tcg_gen_dupi_vec tcg_gen_dupi_vec_avr +#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_avr +#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_avr +#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_avr +#define tcg_gen_ld_vec tcg_gen_ld_vec_avr +#define tcg_gen_st_vec tcg_gen_st_vec_avr +#define tcg_gen_stl_vec tcg_gen_stl_vec_avr +#define tcg_gen_and_vec tcg_gen_and_vec_avr +#define tcg_gen_or_vec tcg_gen_or_vec_avr +#define tcg_gen_xor_vec tcg_gen_xor_vec_avr +#define tcg_gen_andc_vec tcg_gen_andc_vec_avr +#define tcg_gen_orc_vec tcg_gen_orc_vec_avr +#define tcg_gen_nand_vec tcg_gen_nand_vec_avr +#define tcg_gen_nor_vec tcg_gen_nor_vec_avr +#define tcg_gen_eqv_vec tcg_gen_eqv_vec_avr +#define tcg_gen_not_vec tcg_gen_not_vec_avr +#define tcg_gen_neg_vec tcg_gen_neg_vec_avr +#define tcg_gen_abs_vec tcg_gen_abs_vec_avr +#define tcg_gen_shli_vec tcg_gen_shli_vec_avr +#define tcg_gen_shri_vec tcg_gen_shri_vec_avr +#define tcg_gen_sari_vec tcg_gen_sari_vec_avr +#define tcg_gen_cmp_vec tcg_gen_cmp_vec_avr +#define tcg_gen_add_vec tcg_gen_add_vec_avr +#define tcg_gen_sub_vec tcg_gen_sub_vec_avr +#define tcg_gen_mul_vec tcg_gen_mul_vec_avr +#define tcg_gen_ssadd_vec tcg_gen_ssadd_vec_avr +#define tcg_gen_usadd_vec tcg_gen_usadd_vec_avr +#define tcg_gen_sssub_vec tcg_gen_sssub_vec_avr +#define tcg_gen_ussub_vec tcg_gen_ussub_vec_avr +#define tcg_gen_smin_vec tcg_gen_smin_vec_avr +#define tcg_gen_umin_vec tcg_gen_umin_vec_avr +#define tcg_gen_smax_vec tcg_gen_smax_vec_avr +#define tcg_gen_umax_vec tcg_gen_umax_vec_avr +#define tcg_gen_shlv_vec tcg_gen_shlv_vec_avr +#define tcg_gen_shrv_vec tcg_gen_shrv_vec_avr +#define tcg_gen_sarv_vec tcg_gen_sarv_vec_avr +#define tcg_gen_shls_vec tcg_gen_shls_vec_avr +#define tcg_gen_shrs_vec tcg_gen_shrs_vec_avr +#define tcg_gen_sars_vec tcg_gen_sars_vec_avr +#define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_avr +#define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_avr +#define tb_htable_lookup tb_htable_lookup_avr +#define tb_set_jmp_target tb_set_jmp_target_avr +#define cpu_exec cpu_exec_avr +#define cpu_loop_exit_noexc cpu_loop_exit_noexc_avr +#define cpu_reloading_memory_map cpu_reloading_memory_map_avr +#define cpu_loop_exit cpu_loop_exit_avr +#define cpu_loop_exit_restore cpu_loop_exit_restore_avr +#define cpu_loop_exit_atomic cpu_loop_exit_atomic_avr +#define tlb_init tlb_init_avr +#define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_avr +#define tlb_flush tlb_flush_avr +#define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_avr +#define tlb_flush_all_cpus tlb_flush_all_cpus_avr +#define tlb_flush_by_mmuidx_all_cpus_synced tlb_flush_by_mmuidx_all_cpus_synced_avr +#define tlb_flush_all_cpus_synced tlb_flush_all_cpus_synced_avr +#define tlb_flush_page_by_mmuidx tlb_flush_page_by_mmuidx_avr +#define tlb_flush_page tlb_flush_page_avr +#define tlb_flush_page_by_mmuidx_all_cpus tlb_flush_page_by_mmuidx_all_cpus_avr +#define tlb_flush_page_all_cpus tlb_flush_page_all_cpus_avr +#define tlb_flush_page_by_mmuidx_all_cpus_synced tlb_flush_page_by_mmuidx_all_cpus_synced_avr +#define tlb_flush_page_all_cpus_synced tlb_flush_page_all_cpus_synced_avr +#define tlb_protect_code tlb_protect_code_avr +#define tlb_unprotect_code tlb_unprotect_code_avr +#define tlb_reset_dirty tlb_reset_dirty_avr +#define tlb_set_dirty tlb_set_dirty_avr +#define tlb_set_page_with_attrs tlb_set_page_with_attrs_avr +#define tlb_set_page tlb_set_page_avr +#define get_page_addr_code_hostp get_page_addr_code_hostp_avr +#define get_page_addr_code get_page_addr_code_avr +#define probe_access probe_access_avr +#define tlb_vaddr_to_host tlb_vaddr_to_host_avr +#define helper_ret_ldub_mmu helper_ret_ldub_mmu_avr +#define helper_le_lduw_mmu helper_le_lduw_mmu_avr +#define helper_be_lduw_mmu helper_be_lduw_mmu_avr +#define helper_le_ldul_mmu helper_le_ldul_mmu_avr +#define helper_be_ldul_mmu helper_be_ldul_mmu_avr +#define helper_le_ldq_mmu helper_le_ldq_mmu_avr +#define helper_be_ldq_mmu helper_be_ldq_mmu_avr +#define helper_ret_ldsb_mmu helper_ret_ldsb_mmu_avr +#define helper_le_ldsw_mmu helper_le_ldsw_mmu_avr +#define helper_be_ldsw_mmu helper_be_ldsw_mmu_avr +#define helper_le_ldsl_mmu helper_le_ldsl_mmu_avr +#define helper_be_ldsl_mmu helper_be_ldsl_mmu_avr +#define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_avr +#define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_avr +#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_avr +#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_avr +#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_avr +#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_avr +#define cpu_ldub_data_ra cpu_ldub_data_ra_avr +#define cpu_ldsb_data_ra cpu_ldsb_data_ra_avr +#define cpu_lduw_data_ra cpu_lduw_data_ra_avr +#define cpu_ldsw_data_ra cpu_ldsw_data_ra_avr +#define cpu_ldl_data_ra cpu_ldl_data_ra_avr +#define cpu_ldq_data_ra cpu_ldq_data_ra_avr +#define cpu_ldub_data cpu_ldub_data_avr +#define cpu_ldsb_data cpu_ldsb_data_avr +#define cpu_lduw_data cpu_lduw_data_avr +#define cpu_ldsw_data cpu_ldsw_data_avr +#define cpu_ldl_data cpu_ldl_data_avr +#define cpu_ldq_data cpu_ldq_data_avr +#define helper_ret_stb_mmu helper_ret_stb_mmu_avr +#define helper_le_stw_mmu helper_le_stw_mmu_avr +#define helper_be_stw_mmu helper_be_stw_mmu_avr +#define helper_le_stl_mmu helper_le_stl_mmu_avr +#define helper_be_stl_mmu helper_be_stl_mmu_avr +#define helper_le_stq_mmu helper_le_stq_mmu_avr +#define helper_be_stq_mmu helper_be_stq_mmu_avr +#define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_avr +#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_avr +#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_avr +#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_avr +#define cpu_stb_data_ra cpu_stb_data_ra_avr +#define cpu_stw_data_ra cpu_stw_data_ra_avr +#define cpu_stl_data_ra cpu_stl_data_ra_avr +#define cpu_stq_data_ra cpu_stq_data_ra_avr +#define cpu_stb_data cpu_stb_data_avr +#define cpu_stw_data cpu_stw_data_avr +#define cpu_stl_data cpu_stl_data_avr +#define cpu_stq_data cpu_stq_data_avr +#define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_avr +#define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_avr +#define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_avr +#define helper_atomic_fetch_andb_mmu helper_atomic_fetch_andb_mmu_avr +#define helper_atomic_fetch_orb_mmu helper_atomic_fetch_orb_mmu_avr +#define helper_atomic_fetch_xorb_mmu helper_atomic_fetch_xorb_mmu_avr +#define helper_atomic_add_fetchb_mmu helper_atomic_add_fetchb_mmu_avr +#define helper_atomic_and_fetchb_mmu helper_atomic_and_fetchb_mmu_avr +#define helper_atomic_or_fetchb_mmu helper_atomic_or_fetchb_mmu_avr +#define helper_atomic_xor_fetchb_mmu helper_atomic_xor_fetchb_mmu_avr +#define helper_atomic_fetch_sminb_mmu helper_atomic_fetch_sminb_mmu_avr +#define helper_atomic_fetch_uminb_mmu helper_atomic_fetch_uminb_mmu_avr +#define helper_atomic_fetch_smaxb_mmu helper_atomic_fetch_smaxb_mmu_avr +#define helper_atomic_fetch_umaxb_mmu helper_atomic_fetch_umaxb_mmu_avr +#define helper_atomic_smin_fetchb_mmu helper_atomic_smin_fetchb_mmu_avr +#define helper_atomic_umin_fetchb_mmu helper_atomic_umin_fetchb_mmu_avr +#define helper_atomic_smax_fetchb_mmu helper_atomic_smax_fetchb_mmu_avr +#define helper_atomic_umax_fetchb_mmu helper_atomic_umax_fetchb_mmu_avr +#define helper_atomic_cmpxchgw_le_mmu helper_atomic_cmpxchgw_le_mmu_avr +#define helper_atomic_xchgw_le_mmu helper_atomic_xchgw_le_mmu_avr +#define helper_atomic_fetch_addw_le_mmu helper_atomic_fetch_addw_le_mmu_avr +#define helper_atomic_fetch_andw_le_mmu helper_atomic_fetch_andw_le_mmu_avr +#define helper_atomic_fetch_orw_le_mmu helper_atomic_fetch_orw_le_mmu_avr +#define helper_atomic_fetch_xorw_le_mmu helper_atomic_fetch_xorw_le_mmu_avr +#define helper_atomic_add_fetchw_le_mmu helper_atomic_add_fetchw_le_mmu_avr +#define helper_atomic_and_fetchw_le_mmu helper_atomic_and_fetchw_le_mmu_avr +#define helper_atomic_or_fetchw_le_mmu helper_atomic_or_fetchw_le_mmu_avr +#define helper_atomic_xor_fetchw_le_mmu helper_atomic_xor_fetchw_le_mmu_avr +#define helper_atomic_fetch_sminw_le_mmu helper_atomic_fetch_sminw_le_mmu_avr +#define helper_atomic_fetch_uminw_le_mmu helper_atomic_fetch_uminw_le_mmu_avr +#define helper_atomic_fetch_smaxw_le_mmu helper_atomic_fetch_smaxw_le_mmu_avr +#define helper_atomic_fetch_umaxw_le_mmu helper_atomic_fetch_umaxw_le_mmu_avr +#define helper_atomic_smin_fetchw_le_mmu helper_atomic_smin_fetchw_le_mmu_avr +#define helper_atomic_umin_fetchw_le_mmu helper_atomic_umin_fetchw_le_mmu_avr +#define helper_atomic_smax_fetchw_le_mmu helper_atomic_smax_fetchw_le_mmu_avr +#define helper_atomic_umax_fetchw_le_mmu helper_atomic_umax_fetchw_le_mmu_avr +#define helper_atomic_cmpxchgw_be_mmu helper_atomic_cmpxchgw_be_mmu_avr +#define helper_atomic_xchgw_be_mmu helper_atomic_xchgw_be_mmu_avr +#define helper_atomic_fetch_andw_be_mmu helper_atomic_fetch_andw_be_mmu_avr +#define helper_atomic_fetch_orw_be_mmu helper_atomic_fetch_orw_be_mmu_avr +#define helper_atomic_fetch_xorw_be_mmu helper_atomic_fetch_xorw_be_mmu_avr +#define helper_atomic_and_fetchw_be_mmu helper_atomic_and_fetchw_be_mmu_avr +#define helper_atomic_or_fetchw_be_mmu helper_atomic_or_fetchw_be_mmu_avr +#define helper_atomic_xor_fetchw_be_mmu helper_atomic_xor_fetchw_be_mmu_avr +#define helper_atomic_fetch_sminw_be_mmu helper_atomic_fetch_sminw_be_mmu_avr +#define helper_atomic_fetch_uminw_be_mmu helper_atomic_fetch_uminw_be_mmu_avr +#define helper_atomic_fetch_smaxw_be_mmu helper_atomic_fetch_smaxw_be_mmu_avr +#define helper_atomic_fetch_umaxw_be_mmu helper_atomic_fetch_umaxw_be_mmu_avr +#define helper_atomic_smin_fetchw_be_mmu helper_atomic_smin_fetchw_be_mmu_avr +#define helper_atomic_umin_fetchw_be_mmu helper_atomic_umin_fetchw_be_mmu_avr +#define helper_atomic_smax_fetchw_be_mmu helper_atomic_smax_fetchw_be_mmu_avr +#define helper_atomic_umax_fetchw_be_mmu helper_atomic_umax_fetchw_be_mmu_avr +#define helper_atomic_fetch_addw_be_mmu helper_atomic_fetch_addw_be_mmu_avr +#define helper_atomic_add_fetchw_be_mmu helper_atomic_add_fetchw_be_mmu_avr +#define helper_atomic_cmpxchgl_le_mmu helper_atomic_cmpxchgl_le_mmu_avr +#define helper_atomic_xchgl_le_mmu helper_atomic_xchgl_le_mmu_avr +#define helper_atomic_fetch_addl_le_mmu helper_atomic_fetch_addl_le_mmu_avr +#define helper_atomic_fetch_andl_le_mmu helper_atomic_fetch_andl_le_mmu_avr +#define helper_atomic_fetch_orl_le_mmu helper_atomic_fetch_orl_le_mmu_avr +#define helper_atomic_fetch_xorl_le_mmu helper_atomic_fetch_xorl_le_mmu_avr +#define helper_atomic_add_fetchl_le_mmu helper_atomic_add_fetchl_le_mmu_avr +#define helper_atomic_and_fetchl_le_mmu helper_atomic_and_fetchl_le_mmu_avr +#define helper_atomic_or_fetchl_le_mmu helper_atomic_or_fetchl_le_mmu_avr +#define helper_atomic_xor_fetchl_le_mmu helper_atomic_xor_fetchl_le_mmu_avr +#define helper_atomic_fetch_sminl_le_mmu helper_atomic_fetch_sminl_le_mmu_avr +#define helper_atomic_fetch_uminl_le_mmu helper_atomic_fetch_uminl_le_mmu_avr +#define helper_atomic_fetch_smaxl_le_mmu helper_atomic_fetch_smaxl_le_mmu_avr +#define helper_atomic_fetch_umaxl_le_mmu helper_atomic_fetch_umaxl_le_mmu_avr +#define helper_atomic_smin_fetchl_le_mmu helper_atomic_smin_fetchl_le_mmu_avr +#define helper_atomic_umin_fetchl_le_mmu helper_atomic_umin_fetchl_le_mmu_avr +#define helper_atomic_smax_fetchl_le_mmu helper_atomic_smax_fetchl_le_mmu_avr +#define helper_atomic_umax_fetchl_le_mmu helper_atomic_umax_fetchl_le_mmu_avr +#define helper_atomic_cmpxchgl_be_mmu helper_atomic_cmpxchgl_be_mmu_avr +#define helper_atomic_xchgl_be_mmu helper_atomic_xchgl_be_mmu_avr +#define helper_atomic_fetch_andl_be_mmu helper_atomic_fetch_andl_be_mmu_avr +#define helper_atomic_fetch_orl_be_mmu helper_atomic_fetch_orl_be_mmu_avr +#define helper_atomic_fetch_xorl_be_mmu helper_atomic_fetch_xorl_be_mmu_avr +#define helper_atomic_and_fetchl_be_mmu helper_atomic_and_fetchl_be_mmu_avr +#define helper_atomic_or_fetchl_be_mmu helper_atomic_or_fetchl_be_mmu_avr +#define helper_atomic_xor_fetchl_be_mmu helper_atomic_xor_fetchl_be_mmu_avr +#define helper_atomic_fetch_sminl_be_mmu helper_atomic_fetch_sminl_be_mmu_avr +#define helper_atomic_fetch_uminl_be_mmu helper_atomic_fetch_uminl_be_mmu_avr +#define helper_atomic_fetch_smaxl_be_mmu helper_atomic_fetch_smaxl_be_mmu_avr +#define helper_atomic_fetch_umaxl_be_mmu helper_atomic_fetch_umaxl_be_mmu_avr +#define helper_atomic_smin_fetchl_be_mmu helper_atomic_smin_fetchl_be_mmu_avr +#define helper_atomic_umin_fetchl_be_mmu helper_atomic_umin_fetchl_be_mmu_avr +#define helper_atomic_smax_fetchl_be_mmu helper_atomic_smax_fetchl_be_mmu_avr +#define helper_atomic_umax_fetchl_be_mmu helper_atomic_umax_fetchl_be_mmu_avr +#define helper_atomic_fetch_addl_be_mmu helper_atomic_fetch_addl_be_mmu_avr +#define helper_atomic_add_fetchl_be_mmu helper_atomic_add_fetchl_be_mmu_avr +#define helper_atomic_cmpxchgq_le_mmu helper_atomic_cmpxchgq_le_mmu_avr +#define helper_atomic_xchgq_le_mmu helper_atomic_xchgq_le_mmu_avr +#define helper_atomic_fetch_addq_le_mmu helper_atomic_fetch_addq_le_mmu_avr +#define helper_atomic_fetch_andq_le_mmu helper_atomic_fetch_andq_le_mmu_avr +#define helper_atomic_fetch_orq_le_mmu helper_atomic_fetch_orq_le_mmu_avr +#define helper_atomic_fetch_xorq_le_mmu helper_atomic_fetch_xorq_le_mmu_avr +#define helper_atomic_add_fetchq_le_mmu helper_atomic_add_fetchq_le_mmu_avr +#define helper_atomic_and_fetchq_le_mmu helper_atomic_and_fetchq_le_mmu_avr +#define helper_atomic_or_fetchq_le_mmu helper_atomic_or_fetchq_le_mmu_avr +#define helper_atomic_xor_fetchq_le_mmu helper_atomic_xor_fetchq_le_mmu_avr +#define helper_atomic_fetch_sminq_le_mmu helper_atomic_fetch_sminq_le_mmu_avr +#define helper_atomic_fetch_uminq_le_mmu helper_atomic_fetch_uminq_le_mmu_avr +#define helper_atomic_fetch_smaxq_le_mmu helper_atomic_fetch_smaxq_le_mmu_avr +#define helper_atomic_fetch_umaxq_le_mmu helper_atomic_fetch_umaxq_le_mmu_avr +#define helper_atomic_smin_fetchq_le_mmu helper_atomic_smin_fetchq_le_mmu_avr +#define helper_atomic_umin_fetchq_le_mmu helper_atomic_umin_fetchq_le_mmu_avr +#define helper_atomic_smax_fetchq_le_mmu helper_atomic_smax_fetchq_le_mmu_avr +#define helper_atomic_umax_fetchq_le_mmu helper_atomic_umax_fetchq_le_mmu_avr +#define helper_atomic_cmpxchgq_be_mmu helper_atomic_cmpxchgq_be_mmu_avr +#define helper_atomic_xchgq_be_mmu helper_atomic_xchgq_be_mmu_avr +#define helper_atomic_fetch_andq_be_mmu helper_atomic_fetch_andq_be_mmu_avr +#define helper_atomic_fetch_orq_be_mmu helper_atomic_fetch_orq_be_mmu_avr +#define helper_atomic_fetch_xorq_be_mmu helper_atomic_fetch_xorq_be_mmu_avr +#define helper_atomic_and_fetchq_be_mmu helper_atomic_and_fetchq_be_mmu_avr +#define helper_atomic_or_fetchq_be_mmu helper_atomic_or_fetchq_be_mmu_avr +#define helper_atomic_xor_fetchq_be_mmu helper_atomic_xor_fetchq_be_mmu_avr +#define helper_atomic_fetch_sminq_be_mmu helper_atomic_fetch_sminq_be_mmu_avr +#define helper_atomic_fetch_uminq_be_mmu helper_atomic_fetch_uminq_be_mmu_avr +#define helper_atomic_fetch_smaxq_be_mmu helper_atomic_fetch_smaxq_be_mmu_avr +#define helper_atomic_fetch_umaxq_be_mmu helper_atomic_fetch_umaxq_be_mmu_avr +#define helper_atomic_smin_fetchq_be_mmu helper_atomic_smin_fetchq_be_mmu_avr +#define helper_atomic_umin_fetchq_be_mmu helper_atomic_umin_fetchq_be_mmu_avr +#define helper_atomic_smax_fetchq_be_mmu helper_atomic_smax_fetchq_be_mmu_avr +#define helper_atomic_umax_fetchq_be_mmu helper_atomic_umax_fetchq_be_mmu_avr +#define helper_atomic_fetch_addq_be_mmu helper_atomic_fetch_addq_be_mmu_avr +#define helper_atomic_add_fetchq_be_mmu helper_atomic_add_fetchq_be_mmu_avr +#define helper_atomic_cmpxchgb helper_atomic_cmpxchgb_avr +#define helper_atomic_xchgb helper_atomic_xchgb_avr +#define helper_atomic_fetch_addb helper_atomic_fetch_addb_avr +#define helper_atomic_fetch_andb helper_atomic_fetch_andb_avr +#define helper_atomic_fetch_orb helper_atomic_fetch_orb_avr +#define helper_atomic_fetch_xorb helper_atomic_fetch_xorb_avr +#define helper_atomic_add_fetchb helper_atomic_add_fetchb_avr +#define helper_atomic_and_fetchb helper_atomic_and_fetchb_avr +#define helper_atomic_or_fetchb helper_atomic_or_fetchb_avr +#define helper_atomic_xor_fetchb helper_atomic_xor_fetchb_avr +#define helper_atomic_fetch_sminb helper_atomic_fetch_sminb_avr +#define helper_atomic_fetch_uminb helper_atomic_fetch_uminb_avr +#define helper_atomic_fetch_smaxb helper_atomic_fetch_smaxb_avr +#define helper_atomic_fetch_umaxb helper_atomic_fetch_umaxb_avr +#define helper_atomic_smin_fetchb helper_atomic_smin_fetchb_avr +#define helper_atomic_umin_fetchb helper_atomic_umin_fetchb_avr +#define helper_atomic_smax_fetchb helper_atomic_smax_fetchb_avr +#define helper_atomic_umax_fetchb helper_atomic_umax_fetchb_avr +#define helper_atomic_cmpxchgw_le helper_atomic_cmpxchgw_le_avr +#define helper_atomic_xchgw_le helper_atomic_xchgw_le_avr +#define helper_atomic_fetch_addw_le helper_atomic_fetch_addw_le_avr +#define helper_atomic_fetch_andw_le helper_atomic_fetch_andw_le_avr +#define helper_atomic_fetch_orw_le helper_atomic_fetch_orw_le_avr +#define helper_atomic_fetch_xorw_le helper_atomic_fetch_xorw_le_avr +#define helper_atomic_add_fetchw_le helper_atomic_add_fetchw_le_avr +#define helper_atomic_and_fetchw_le helper_atomic_and_fetchw_le_avr +#define helper_atomic_or_fetchw_le helper_atomic_or_fetchw_le_avr +#define helper_atomic_xor_fetchw_le helper_atomic_xor_fetchw_le_avr +#define helper_atomic_fetch_sminw_le helper_atomic_fetch_sminw_le_avr +#define helper_atomic_fetch_uminw_le helper_atomic_fetch_uminw_le_avr +#define helper_atomic_fetch_smaxw_le helper_atomic_fetch_smaxw_le_avr +#define helper_atomic_fetch_umaxw_le helper_atomic_fetch_umaxw_le_avr +#define helper_atomic_smin_fetchw_le helper_atomic_smin_fetchw_le_avr +#define helper_atomic_umin_fetchw_le helper_atomic_umin_fetchw_le_avr +#define helper_atomic_smax_fetchw_le helper_atomic_smax_fetchw_le_avr +#define helper_atomic_umax_fetchw_le helper_atomic_umax_fetchw_le_avr +#define helper_atomic_cmpxchgw_be helper_atomic_cmpxchgw_be_avr +#define helper_atomic_xchgw_be helper_atomic_xchgw_be_avr +#define helper_atomic_fetch_andw_be helper_atomic_fetch_andw_be_avr +#define helper_atomic_fetch_orw_be helper_atomic_fetch_orw_be_avr +#define helper_atomic_fetch_xorw_be helper_atomic_fetch_xorw_be_avr +#define helper_atomic_and_fetchw_be helper_atomic_and_fetchw_be_avr +#define helper_atomic_or_fetchw_be helper_atomic_or_fetchw_be_avr +#define helper_atomic_xor_fetchw_be helper_atomic_xor_fetchw_be_avr +#define helper_atomic_fetch_sminw_be helper_atomic_fetch_sminw_be_avr +#define helper_atomic_fetch_uminw_be helper_atomic_fetch_uminw_be_avr +#define helper_atomic_fetch_smaxw_be helper_atomic_fetch_smaxw_be_avr +#define helper_atomic_fetch_umaxw_be helper_atomic_fetch_umaxw_be_avr +#define helper_atomic_smin_fetchw_be helper_atomic_smin_fetchw_be_avr +#define helper_atomic_umin_fetchw_be helper_atomic_umin_fetchw_be_avr +#define helper_atomic_smax_fetchw_be helper_atomic_smax_fetchw_be_avr +#define helper_atomic_umax_fetchw_be helper_atomic_umax_fetchw_be_avr +#define helper_atomic_fetch_addw_be helper_atomic_fetch_addw_be_avr +#define helper_atomic_add_fetchw_be helper_atomic_add_fetchw_be_avr +#define helper_atomic_cmpxchgl_le helper_atomic_cmpxchgl_le_avr +#define helper_atomic_xchgl_le helper_atomic_xchgl_le_avr +#define helper_atomic_fetch_addl_le helper_atomic_fetch_addl_le_avr +#define helper_atomic_fetch_andl_le helper_atomic_fetch_andl_le_avr +#define helper_atomic_fetch_orl_le helper_atomic_fetch_orl_le_avr +#define helper_atomic_fetch_xorl_le helper_atomic_fetch_xorl_le_avr +#define helper_atomic_add_fetchl_le helper_atomic_add_fetchl_le_avr +#define helper_atomic_and_fetchl_le helper_atomic_and_fetchl_le_avr +#define helper_atomic_or_fetchl_le helper_atomic_or_fetchl_le_avr +#define helper_atomic_xor_fetchl_le helper_atomic_xor_fetchl_le_avr +#define helper_atomic_fetch_sminl_le helper_atomic_fetch_sminl_le_avr +#define helper_atomic_fetch_uminl_le helper_atomic_fetch_uminl_le_avr +#define helper_atomic_fetch_smaxl_le helper_atomic_fetch_smaxl_le_avr +#define helper_atomic_fetch_umaxl_le helper_atomic_fetch_umaxl_le_avr +#define helper_atomic_smin_fetchl_le helper_atomic_smin_fetchl_le_avr +#define helper_atomic_umin_fetchl_le helper_atomic_umin_fetchl_le_avr +#define helper_atomic_smax_fetchl_le helper_atomic_smax_fetchl_le_avr +#define helper_atomic_umax_fetchl_le helper_atomic_umax_fetchl_le_avr +#define helper_atomic_cmpxchgl_be helper_atomic_cmpxchgl_be_avr +#define helper_atomic_xchgl_be helper_atomic_xchgl_be_avr +#define helper_atomic_fetch_andl_be helper_atomic_fetch_andl_be_avr +#define helper_atomic_fetch_orl_be helper_atomic_fetch_orl_be_avr +#define helper_atomic_fetch_xorl_be helper_atomic_fetch_xorl_be_avr +#define helper_atomic_and_fetchl_be helper_atomic_and_fetchl_be_avr +#define helper_atomic_or_fetchl_be helper_atomic_or_fetchl_be_avr +#define helper_atomic_xor_fetchl_be helper_atomic_xor_fetchl_be_avr +#define helper_atomic_fetch_sminl_be helper_atomic_fetch_sminl_be_avr +#define helper_atomic_fetch_uminl_be helper_atomic_fetch_uminl_be_avr +#define helper_atomic_fetch_smaxl_be helper_atomic_fetch_smaxl_be_avr +#define helper_atomic_fetch_umaxl_be helper_atomic_fetch_umaxl_be_avr +#define helper_atomic_smin_fetchl_be helper_atomic_smin_fetchl_be_avr +#define helper_atomic_umin_fetchl_be helper_atomic_umin_fetchl_be_avr +#define helper_atomic_smax_fetchl_be helper_atomic_smax_fetchl_be_avr +#define helper_atomic_umax_fetchl_be helper_atomic_umax_fetchl_be_avr +#define helper_atomic_fetch_addl_be helper_atomic_fetch_addl_be_avr +#define helper_atomic_add_fetchl_be helper_atomic_add_fetchl_be_avr +#define helper_atomic_cmpxchgq_le helper_atomic_cmpxchgq_le_avr +#define helper_atomic_xchgq_le helper_atomic_xchgq_le_avr +#define helper_atomic_fetch_addq_le helper_atomic_fetch_addq_le_avr +#define helper_atomic_fetch_andq_le helper_atomic_fetch_andq_le_avr +#define helper_atomic_fetch_orq_le helper_atomic_fetch_orq_le_avr +#define helper_atomic_fetch_xorq_le helper_atomic_fetch_xorq_le_avr +#define helper_atomic_add_fetchq_le helper_atomic_add_fetchq_le_avr +#define helper_atomic_and_fetchq_le helper_atomic_and_fetchq_le_avr +#define helper_atomic_or_fetchq_le helper_atomic_or_fetchq_le_avr +#define helper_atomic_xor_fetchq_le helper_atomic_xor_fetchq_le_avr +#define helper_atomic_fetch_sminq_le helper_atomic_fetch_sminq_le_avr +#define helper_atomic_fetch_uminq_le helper_atomic_fetch_uminq_le_avr +#define helper_atomic_fetch_smaxq_le helper_atomic_fetch_smaxq_le_avr +#define helper_atomic_fetch_umaxq_le helper_atomic_fetch_umaxq_le_avr +#define helper_atomic_smin_fetchq_le helper_atomic_smin_fetchq_le_avr +#define helper_atomic_umin_fetchq_le helper_atomic_umin_fetchq_le_avr +#define helper_atomic_smax_fetchq_le helper_atomic_smax_fetchq_le_avr +#define helper_atomic_umax_fetchq_le helper_atomic_umax_fetchq_le_avr +#define helper_atomic_cmpxchgq_be helper_atomic_cmpxchgq_be_avr +#define helper_atomic_xchgq_be helper_atomic_xchgq_be_avr +#define helper_atomic_fetch_andq_be helper_atomic_fetch_andq_be_avr +#define helper_atomic_fetch_orq_be helper_atomic_fetch_orq_be_avr +#define helper_atomic_fetch_xorq_be helper_atomic_fetch_xorq_be_avr +#define helper_atomic_and_fetchq_be helper_atomic_and_fetchq_be_avr +#define helper_atomic_or_fetchq_be helper_atomic_or_fetchq_be_avr +#define helper_atomic_xor_fetchq_be helper_atomic_xor_fetchq_be_avr +#define helper_atomic_fetch_sminq_be helper_atomic_fetch_sminq_be_avr +#define helper_atomic_fetch_uminq_be helper_atomic_fetch_uminq_be_avr +#define helper_atomic_fetch_smaxq_be helper_atomic_fetch_smaxq_be_avr +#define helper_atomic_fetch_umaxq_be helper_atomic_fetch_umaxq_be_avr +#define helper_atomic_smin_fetchq_be helper_atomic_smin_fetchq_be_avr +#define helper_atomic_umin_fetchq_be helper_atomic_umin_fetchq_be_avr +#define helper_atomic_smax_fetchq_be helper_atomic_smax_fetchq_be_avr +#define helper_atomic_umax_fetchq_be helper_atomic_umax_fetchq_be_avr +#define helper_atomic_fetch_addq_be helper_atomic_fetch_addq_be_avr +#define helper_atomic_add_fetchq_be helper_atomic_add_fetchq_be_avr +#define cpu_ldub_code cpu_ldub_code_avr +#define cpu_lduw_code cpu_lduw_code_avr +#define cpu_ldl_code cpu_ldl_code_avr +#define cpu_ldq_code cpu_ldq_code_avr +#define helper_div_i32 helper_div_i32_avr +#define helper_rem_i32 helper_rem_i32_avr +#define helper_divu_i32 helper_divu_i32_avr +#define helper_remu_i32 helper_remu_i32_avr +#define helper_shl_i64 helper_shl_i64_avr +#define helper_shr_i64 helper_shr_i64_avr +#define helper_sar_i64 helper_sar_i64_avr +#define helper_div_i64 helper_div_i64_avr +#define helper_rem_i64 helper_rem_i64_avr +#define helper_divu_i64 helper_divu_i64_avr +#define helper_remu_i64 helper_remu_i64_avr +#define helper_muluh_i64 helper_muluh_i64_avr +#define helper_mulsh_i64 helper_mulsh_i64_avr +#define helper_clz_i32 helper_clz_i32_avr +#define helper_ctz_i32 helper_ctz_i32_avr +#define helper_clz_i64 helper_clz_i64_avr +#define helper_ctz_i64 helper_ctz_i64_avr +#define helper_clrsb_i32 helper_clrsb_i32_avr +#define helper_clrsb_i64 helper_clrsb_i64_avr +#define helper_ctpop_i32 helper_ctpop_i32_avr +#define helper_ctpop_i64 helper_ctpop_i64_avr +#define helper_lookup_tb_ptr helper_lookup_tb_ptr_avr +#define helper_exit_atomic helper_exit_atomic_avr +#define helper_gvec_add8 helper_gvec_add8_avr +#define helper_gvec_add16 helper_gvec_add16_avr +#define helper_gvec_add32 helper_gvec_add32_avr +#define helper_gvec_add64 helper_gvec_add64_avr +#define helper_gvec_adds8 helper_gvec_adds8_avr +#define helper_gvec_adds16 helper_gvec_adds16_avr +#define helper_gvec_adds32 helper_gvec_adds32_avr +#define helper_gvec_adds64 helper_gvec_adds64_avr +#define helper_gvec_sub8 helper_gvec_sub8_avr +#define helper_gvec_sub16 helper_gvec_sub16_avr +#define helper_gvec_sub32 helper_gvec_sub32_avr +#define helper_gvec_sub64 helper_gvec_sub64_avr +#define helper_gvec_subs8 helper_gvec_subs8_avr +#define helper_gvec_subs16 helper_gvec_subs16_avr +#define helper_gvec_subs32 helper_gvec_subs32_avr +#define helper_gvec_subs64 helper_gvec_subs64_avr +#define helper_gvec_mul8 helper_gvec_mul8_avr +#define helper_gvec_mul16 helper_gvec_mul16_avr +#define helper_gvec_mul32 helper_gvec_mul32_avr +#define helper_gvec_mul64 helper_gvec_mul64_avr +#define helper_gvec_muls8 helper_gvec_muls8_avr +#define helper_gvec_muls16 helper_gvec_muls16_avr +#define helper_gvec_muls32 helper_gvec_muls32_avr +#define helper_gvec_muls64 helper_gvec_muls64_avr +#define helper_gvec_neg8 helper_gvec_neg8_avr +#define helper_gvec_neg16 helper_gvec_neg16_avr +#define helper_gvec_neg32 helper_gvec_neg32_avr +#define helper_gvec_neg64 helper_gvec_neg64_avr +#define helper_gvec_abs8 helper_gvec_abs8_avr +#define helper_gvec_abs16 helper_gvec_abs16_avr +#define helper_gvec_abs32 helper_gvec_abs32_avr +#define helper_gvec_abs64 helper_gvec_abs64_avr +#define helper_gvec_mov helper_gvec_mov_avr +#define helper_gvec_dup64 helper_gvec_dup64_avr +#define helper_gvec_dup32 helper_gvec_dup32_avr +#define helper_gvec_dup16 helper_gvec_dup16_avr +#define helper_gvec_dup8 helper_gvec_dup8_avr +#define helper_gvec_not helper_gvec_not_avr +#define helper_gvec_and helper_gvec_and_avr +#define helper_gvec_or helper_gvec_or_avr +#define helper_gvec_xor helper_gvec_xor_avr +#define helper_gvec_andc helper_gvec_andc_avr +#define helper_gvec_orc helper_gvec_orc_avr +#define helper_gvec_nand helper_gvec_nand_avr +#define helper_gvec_nor helper_gvec_nor_avr +#define helper_gvec_eqv helper_gvec_eqv_avr +#define helper_gvec_ands helper_gvec_ands_avr +#define helper_gvec_xors helper_gvec_xors_avr +#define helper_gvec_ors helper_gvec_ors_avr +#define helper_gvec_shl8i helper_gvec_shl8i_avr +#define helper_gvec_shl16i helper_gvec_shl16i_avr +#define helper_gvec_shl32i helper_gvec_shl32i_avr +#define helper_gvec_shl64i helper_gvec_shl64i_avr +#define helper_gvec_shr8i helper_gvec_shr8i_avr +#define helper_gvec_shr16i helper_gvec_shr16i_avr +#define helper_gvec_shr32i helper_gvec_shr32i_avr +#define helper_gvec_shr64i helper_gvec_shr64i_avr +#define helper_gvec_sar8i helper_gvec_sar8i_avr +#define helper_gvec_sar16i helper_gvec_sar16i_avr +#define helper_gvec_sar32i helper_gvec_sar32i_avr +#define helper_gvec_sar64i helper_gvec_sar64i_avr +#define helper_gvec_shl8v helper_gvec_shl8v_avr +#define helper_gvec_shl16v helper_gvec_shl16v_avr +#define helper_gvec_shl32v helper_gvec_shl32v_avr +#define helper_gvec_shl64v helper_gvec_shl64v_avr +#define helper_gvec_shr8v helper_gvec_shr8v_avr +#define helper_gvec_shr16v helper_gvec_shr16v_avr +#define helper_gvec_shr32v helper_gvec_shr32v_avr +#define helper_gvec_shr64v helper_gvec_shr64v_avr +#define helper_gvec_sar8v helper_gvec_sar8v_avr +#define helper_gvec_sar16v helper_gvec_sar16v_avr +#define helper_gvec_sar32v helper_gvec_sar32v_avr +#define helper_gvec_sar64v helper_gvec_sar64v_avr +#define helper_gvec_eq8 helper_gvec_eq8_avr +#define helper_gvec_ne8 helper_gvec_ne8_avr +#define helper_gvec_lt8 helper_gvec_lt8_avr +#define helper_gvec_le8 helper_gvec_le8_avr +#define helper_gvec_ltu8 helper_gvec_ltu8_avr +#define helper_gvec_leu8 helper_gvec_leu8_avr +#define helper_gvec_eq16 helper_gvec_eq16_avr +#define helper_gvec_ne16 helper_gvec_ne16_avr +#define helper_gvec_lt16 helper_gvec_lt16_avr +#define helper_gvec_le16 helper_gvec_le16_avr +#define helper_gvec_ltu16 helper_gvec_ltu16_avr +#define helper_gvec_leu16 helper_gvec_leu16_avr +#define helper_gvec_eq32 helper_gvec_eq32_avr +#define helper_gvec_ne32 helper_gvec_ne32_avr +#define helper_gvec_lt32 helper_gvec_lt32_avr +#define helper_gvec_le32 helper_gvec_le32_avr +#define helper_gvec_ltu32 helper_gvec_ltu32_avr +#define helper_gvec_leu32 helper_gvec_leu32_avr +#define helper_gvec_eq64 helper_gvec_eq64_avr +#define helper_gvec_ne64 helper_gvec_ne64_avr +#define helper_gvec_lt64 helper_gvec_lt64_avr +#define helper_gvec_le64 helper_gvec_le64_avr +#define helper_gvec_ltu64 helper_gvec_ltu64_avr +#define helper_gvec_leu64 helper_gvec_leu64_avr +#define helper_gvec_ssadd8 helper_gvec_ssadd8_avr +#define helper_gvec_ssadd16 helper_gvec_ssadd16_avr +#define helper_gvec_ssadd32 helper_gvec_ssadd32_avr +#define helper_gvec_ssadd64 helper_gvec_ssadd64_avr +#define helper_gvec_sssub8 helper_gvec_sssub8_avr +#define helper_gvec_sssub16 helper_gvec_sssub16_avr +#define helper_gvec_sssub32 helper_gvec_sssub32_avr +#define helper_gvec_sssub64 helper_gvec_sssub64_avr +#define helper_gvec_usadd8 helper_gvec_usadd8_avr +#define helper_gvec_usadd16 helper_gvec_usadd16_avr +#define helper_gvec_usadd32 helper_gvec_usadd32_avr +#define helper_gvec_usadd64 helper_gvec_usadd64_avr +#define helper_gvec_ussub8 helper_gvec_ussub8_avr +#define helper_gvec_ussub16 helper_gvec_ussub16_avr +#define helper_gvec_ussub32 helper_gvec_ussub32_avr +#define helper_gvec_ussub64 helper_gvec_ussub64_avr +#define helper_gvec_smin8 helper_gvec_smin8_avr +#define helper_gvec_smin16 helper_gvec_smin16_avr +#define helper_gvec_smin32 helper_gvec_smin32_avr +#define helper_gvec_smin64 helper_gvec_smin64_avr +#define helper_gvec_smax8 helper_gvec_smax8_avr +#define helper_gvec_smax16 helper_gvec_smax16_avr +#define helper_gvec_smax32 helper_gvec_smax32_avr +#define helper_gvec_smax64 helper_gvec_smax64_avr +#define helper_gvec_umin8 helper_gvec_umin8_avr +#define helper_gvec_umin16 helper_gvec_umin16_avr +#define helper_gvec_umin32 helper_gvec_umin32_avr +#define helper_gvec_umin64 helper_gvec_umin64_avr +#define helper_gvec_umax8 helper_gvec_umax8_avr +#define helper_gvec_umax16 helper_gvec_umax16_avr +#define helper_gvec_umax32 helper_gvec_umax32_avr +#define helper_gvec_umax64 helper_gvec_umax64_avr +#define helper_gvec_bitsel helper_gvec_bitsel_avr +#define cpu_restore_state cpu_restore_state_avr +#define page_collection_lock page_collection_lock_avr +#define page_collection_unlock page_collection_unlock_avr +#define free_code_gen_buffer free_code_gen_buffer_avr +#define tcg_exec_init tcg_exec_init_avr +#define tb_cleanup tb_cleanup_avr +#define tb_flush tb_flush_avr +#define tb_phys_invalidate tb_phys_invalidate_avr +#define tb_gen_code tb_gen_code_avr +#define tb_exec_lock tb_exec_lock_avr +#define tb_exec_unlock tb_exec_unlock_avr +#define tb_invalidate_phys_page_range tb_invalidate_phys_page_range_avr +#define tb_invalidate_phys_range tb_invalidate_phys_range_avr +#define tb_invalidate_phys_page_fast tb_invalidate_phys_page_fast_avr +#define tb_check_watchpoint tb_check_watchpoint_avr +#define cpu_io_recompile cpu_io_recompile_avr +#define tb_flush_jmp_cache tb_flush_jmp_cache_avr +#define tcg_flush_softmmu_tlb tcg_flush_softmmu_tlb_avr +#define translator_loop_temp_check translator_loop_temp_check_avr +#define translator_loop translator_loop_avr +#define helper_atomic_cmpxchgo_le_mmu helper_atomic_cmpxchgo_le_mmu_avr +#define helper_atomic_cmpxchgo_be_mmu helper_atomic_cmpxchgo_be_mmu_avr +#define helper_atomic_ldo_le_mmu helper_atomic_ldo_le_mmu_avr +#define helper_atomic_ldo_be_mmu helper_atomic_ldo_be_mmu_avr +#define helper_atomic_sto_le_mmu helper_atomic_sto_le_mmu_avr +#define helper_atomic_sto_be_mmu helper_atomic_sto_be_mmu_avr +#define unassigned_mem_ops unassigned_mem_ops_avr +#define floatx80_infinity floatx80_infinity_avr +#define dup_const_func dup_const_func_avr +#define gen_helper_raise_exception gen_helper_raise_exception_avr +#define gen_helper_raise_interrupt gen_helper_raise_interrupt_avr +#define gen_helper_vfp_get_fpscr gen_helper_vfp_get_fpscr_avr +#define gen_helper_vfp_set_fpscr gen_helper_vfp_set_fpscr_avr +#define gen_helper_cpsr_read gen_helper_cpsr_read_avr +#define gen_helper_cpsr_write gen_helper_cpsr_write_avr +#define helper_sleep helper_sleep_avr +#define helper_unsupported helper_unsupported_avr +#define helper_debug helper_debug_avr +#define helper_break helper_break_avr +#define helper_inb helper_inb_avr +#define helper_outb helper_outb_avr +#define helper_fullrd helper_fullrd_avr +#define helper_fullwr helper_fullwr_avr +#define helper_wdr helper_wdr_avr +#define gen_intermediate_code gen_intermediate_code_avr +#define restore_state_to_opc restore_state_to_opc_avr + +#define reg_read reg_read_avr +#define reg_write reg_write_avr +#define uc_init uc_init_avr +#endif diff --git a/qemu/configure b/qemu/configure index f52b5b9531..cc5752292f 100755 --- a/qemu/configure +++ b/qemu/configure @@ -496,6 +496,8 @@ elif check_define __aarch64__ ; then cpu="aarch64" elif check_define __tricore__ ; then cpu="tricore" +elif check_define __AVR__ ; then + cpu="avr" else cpu=$(uname -m) fi @@ -539,6 +541,10 @@ case "$cpu" in cpu="tricore" supported_cpu="yes" ;; + avr) + cpu="avr" + supported_cpu="yes" + ;; *) # This will result in either an error or falling back to TCI later ARCH=unknown @@ -867,8 +873,8 @@ QEMU_CFLAGS="$CPU_CFLAGS $QEMU_CFLAGS" default_target_list="aarch64-softmmu \ arm-softmmu m68k-softmmu mips64el-softmmu mips64-softmmu mipsel-softmmu \ mips-softmmu ppc64-softmmu ppc-softmmu sparc64-softmmu sparc-softmmu \ - x86_64-softmmu riscv32-softmmu riscv64-softmmu s390x-softmmu \ - tricore-softmmu" + x86_64-softmmu rh850-softmmu riscv32-softmmu riscv64-softmmu s390x-softmmu \ + tricore-softmmu avr-softmmu" if test x"$show_help" = x"yes" ; then cat << EOF @@ -2645,7 +2651,7 @@ config_target_mak=$target_dir/config-target.mak target_name=$(echo $target | cut -d '-' -f 1) target_aligned_only="no" case "$target_name" in - alpha|hppa|mips64el|mips64|mipsel|mips|mipsn32|mipsn32el|sh4|sh4eb|sparc|sparc64|sparc32plus|xtensa|xtensaeb) + alpha|hppa|mips64el|mips64|mipsel|mips|mipsn32|mipsn32el|rh850|sh4|sh4eb|sparc|sparc64|sparc32plus|xtensa|xtensaeb) target_aligned_only="yes" ;; esac @@ -2761,6 +2767,11 @@ case "$target_name" in TARGET_SYSTBL_ABI=common,nospu,32 echo "TARGET_ABI32=y" >> $config_target_mak ;; + rh850) + TARGET_ARCH=rh850 + TARGET_ABI_DIR=rh850 + mttcg=no # system emulation is not supported for RH850 + ;; riscv32) TARGET_BASE_ARCH=riscv TARGET_ABI_DIR=riscv @@ -2803,6 +2814,10 @@ case "$target_name" in TARGET_ARCH=tricore TARGET_BASE_ARCH=tricore ;; + avr) + TARGET_ARCH=avr + TARGET_BASE_ARCH=avr + ;; unicore32) ;; xtensa|xtensaeb) diff --git a/qemu/include/tcg/tcg.h b/qemu/include/tcg/tcg.h index 695609df0c..f1559fcde0 100644 --- a/qemu/include/tcg/tcg.h +++ b/qemu/include/tcg/tcg.h @@ -725,7 +725,7 @@ struct TCGContext { void *tb_ret_addr; /* target/riscv/translate.c */ - TCGv cpu_gpr[32], cpu_pc, cpu_vl; // also target/mips/translate.c + TCGv cpu_gpr[32], cpu_pc, cpu_vl; // also target/mips/translate.c, target/avr/translate.c TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */ TCGv load_res; TCGv load_val; @@ -820,6 +820,23 @@ struct TCGContext { char s390x_cpu_reg_names[16][4]; // renamed from original cpu_reg_names[][] to avoid name clash with m68k TCGv_i64 regs[16]; + + // target/avr/translate.c + TCGv cpu_Cf; + TCGv cpu_Zf; + TCGv cpu_Nf; + TCGv cpu_Vf; + TCGv cpu_Sf; + TCGv cpu_Hf; + TCGv cpu_Tf; + TCGv cpu_If; + TCGv cpu_rampD; + TCGv cpu_rampX; + TCGv cpu_rampY; + TCGv cpu_rampZ; + TCGv cpu_eind; + TCGv cpu_sp; + TCGv cpu_skip; }; static inline size_t temp_idx(TCGContext *tcg_ctx, TCGTemp *ts) diff --git a/qemu/rh850.h b/qemu/rh850.h new file mode 100644 index 0000000000..071393cb7c --- /dev/null +++ b/qemu/rh850.h @@ -0,0 +1,1294 @@ +/* Autogen header for Unicorn Engine - DONOT MODIFY */ +#ifndef UNICORN_AUTOGEN_rh850_H +#define UNICORN_AUTOGEN_rh850_H +#ifndef UNICORN_ARCH_POSTFIX +#define UNICORN_ARCH_POSTFIX _rh850 +#endif +#define unicorn_fill_tlb unicorn_fill_tlb_rh850 +#define reg_read reg_read_rh850 +#define reg_write reg_write_rh850 +#define uc_init uc_init_rh850 +#define uc_add_inline_hook uc_add_inline_hook_rh850 +#define uc_del_inline_hook uc_del_inline_hook_rh850 +#define tb_invalidate_phys_range tb_invalidate_phys_range_rh850 +#define use_idiv_instructions use_idiv_instructions_rh850 +#define arm_arch arm_arch_rh850 +#define tb_target_set_jmp_target tb_target_set_jmp_target_rh850 +#define have_bmi1 have_bmi1_rh850 +#define have_popcnt have_popcnt_rh850 +#define have_avx1 have_avx1_rh850 +#define have_avx2 have_avx2_rh850 +#define have_isa have_isa_rh850 +#define have_altivec have_altivec_rh850 +#define have_vsx have_vsx_rh850 +#define flush_icache_range flush_icache_range_rh850 +#define s390_facilities s390_facilities_rh850 +#define tcg_dump_op tcg_dump_op_rh850 +#define tcg_dump_ops tcg_dump_ops_rh850 +#define tcg_gen_and_i64 tcg_gen_and_i64_rh850 +#define tcg_gen_discard_i64 tcg_gen_discard_i64_rh850 +#define tcg_gen_ld16s_i64 tcg_gen_ld16s_i64_rh850 +#define tcg_gen_ld16u_i64 tcg_gen_ld16u_i64_rh850 +#define tcg_gen_ld32s_i64 tcg_gen_ld32s_i64_rh850 +#define tcg_gen_ld32u_i64 tcg_gen_ld32u_i64_rh850 +#define tcg_gen_ld8s_i64 tcg_gen_ld8s_i64_rh850 +#define tcg_gen_ld8u_i64 tcg_gen_ld8u_i64_rh850 +#define tcg_gen_ld_i64 tcg_gen_ld_i64_rh850 +#define tcg_gen_mov_i64 tcg_gen_mov_i64_rh850 +#define tcg_gen_movi_i64 tcg_gen_movi_i64_rh850 +#define tcg_gen_mul_i64 tcg_gen_mul_i64_rh850 +#define tcg_gen_or_i64 tcg_gen_or_i64_rh850 +#define tcg_gen_sar_i64 tcg_gen_sar_i64_rh850 +#define tcg_gen_shl_i64 tcg_gen_shl_i64_rh850 +#define tcg_gen_shr_i64 tcg_gen_shr_i64_rh850 +#define tcg_gen_st_i64 tcg_gen_st_i64_rh850 +#define tcg_gen_xor_i64 tcg_gen_xor_i64_rh850 +#define cpu_icount_to_ns cpu_icount_to_ns_rh850 +#define cpu_is_stopped cpu_is_stopped_rh850 +#define cpu_get_ticks cpu_get_ticks_rh850 +#define cpu_get_clock cpu_get_clock_rh850 +#define cpu_resume cpu_resume_rh850 +#define qemu_init_vcpu qemu_init_vcpu_rh850 +#define cpu_stop_current cpu_stop_current_rh850 +#define resume_all_vcpus resume_all_vcpus_rh850 +#define vm_start vm_start_rh850 +#define address_space_dispatch_compact address_space_dispatch_compact_rh850 +#define flatview_translate flatview_translate_rh850 +#define address_space_translate_for_iotlb address_space_translate_for_iotlb_rh850 +#define qemu_get_cpu qemu_get_cpu_rh850 +#define cpu_address_space_init cpu_address_space_init_rh850 +#define cpu_get_address_space cpu_get_address_space_rh850 +#define cpu_exec_unrealizefn cpu_exec_unrealizefn_rh850 +#define cpu_exec_initfn cpu_exec_initfn_rh850 +#define cpu_exec_realizefn cpu_exec_realizefn_rh850 +#define tb_invalidate_phys_addr tb_invalidate_phys_addr_rh850 +#define cpu_watchpoint_insert cpu_watchpoint_insert_rh850 +#define cpu_watchpoint_remove_by_ref cpu_watchpoint_remove_by_ref_rh850 +#define cpu_watchpoint_remove_all cpu_watchpoint_remove_all_rh850 +#define cpu_watchpoint_address_matches cpu_watchpoint_address_matches_rh850 +#define cpu_breakpoint_insert cpu_breakpoint_insert_rh850 +#define cpu_breakpoint_remove cpu_breakpoint_remove_rh850 +#define cpu_breakpoint_remove_by_ref cpu_breakpoint_remove_by_ref_rh850 +#define cpu_breakpoint_remove_all cpu_breakpoint_remove_all_rh850 +#define cpu_abort cpu_abort_rh850 +#define cpu_physical_memory_test_and_clear_dirty cpu_physical_memory_test_and_clear_dirty_rh850 +#define memory_region_section_get_iotlb memory_region_section_get_iotlb_rh850 +#define flatview_add_to_dispatch flatview_add_to_dispatch_rh850 +#define qemu_ram_get_host_addr qemu_ram_get_host_addr_rh850 +#define qemu_ram_get_offset qemu_ram_get_offset_rh850 +#define qemu_ram_get_used_length qemu_ram_get_used_length_rh850 +#define qemu_ram_is_shared qemu_ram_is_shared_rh850 +#define qemu_ram_pagesize qemu_ram_pagesize_rh850 +#define qemu_ram_alloc_from_ptr qemu_ram_alloc_from_ptr_rh850 +#define qemu_ram_alloc qemu_ram_alloc_rh850 +#define qemu_ram_free qemu_ram_free_rh850 +#define qemu_map_ram_ptr qemu_map_ram_ptr_rh850 +#define qemu_ram_block_host_offset qemu_ram_block_host_offset_rh850 +#define qemu_ram_block_from_host qemu_ram_block_from_host_rh850 +#define qemu_ram_addr_from_host qemu_ram_addr_from_host_rh850 +#define cpu_check_watchpoint cpu_check_watchpoint_rh850 +#define iotlb_to_section iotlb_to_section_rh850 +#define address_space_dispatch_new address_space_dispatch_new_rh850 +#define address_space_dispatch_free address_space_dispatch_free_rh850 +#define flatview_read_continue flatview_read_continue_rh850 +#define address_space_read_full address_space_read_full_rh850 +#define address_space_write address_space_write_rh850 +#define address_space_rw address_space_rw_rh850 +#define cpu_physical_memory_rw cpu_physical_memory_rw_rh850 +#define address_space_write_rom address_space_write_rom_rh850 +#define cpu_flush_icache_range cpu_flush_icache_range_rh850 +#define cpu_exec_init_all cpu_exec_init_all_rh850 +#define address_space_access_valid address_space_access_valid_rh850 +#define address_space_map address_space_map_rh850 +#define address_space_unmap address_space_unmap_rh850 +#define cpu_physical_memory_map cpu_physical_memory_map_rh850 +#define cpu_physical_memory_unmap cpu_physical_memory_unmap_rh850 +#define cpu_memory_rw_debug cpu_memory_rw_debug_rh850 +#define qemu_target_page_size qemu_target_page_size_rh850 +#define qemu_target_page_bits qemu_target_page_bits_rh850 +#define qemu_target_page_bits_min qemu_target_page_bits_min_rh850 +#define target_words_bigendian target_words_bigendian_rh850 +#define cpu_physical_memory_is_io cpu_physical_memory_is_io_rh850 +#define ram_block_discard_range ram_block_discard_range_rh850 +#define ramblock_is_pmem ramblock_is_pmem_rh850 +#define page_size_init page_size_init_rh850 +#define set_preferred_target_page_bits set_preferred_target_page_bits_rh850 +#define finalize_target_page_bits finalize_target_page_bits_rh850 +#define cpu_outb cpu_outb_rh850 +#define cpu_outw cpu_outw_rh850 +#define cpu_outl cpu_outl_rh850 +#define cpu_inb cpu_inb_rh850 +#define cpu_inw cpu_inw_rh850 +#define cpu_inl cpu_inl_rh850 +#define memory_map memory_map_rh850 +#define memory_map_io memory_map_io_rh850 +#define memory_map_ptr memory_map_ptr_rh850 +#define memory_cow memory_cow_rh850 +#define memory_unmap memory_unmap_rh850 +#define memory_moveout memory_moveout_rh850 +#define memory_movein memory_movein_rh850 +#define memory_free memory_free_rh850 +#define flatview_unref flatview_unref_rh850 +#define address_space_get_flatview address_space_get_flatview_rh850 +#define memory_region_transaction_begin memory_region_transaction_begin_rh850 +#define memory_region_transaction_commit memory_region_transaction_commit_rh850 +#define memory_region_init memory_region_init_rh850 +#define memory_region_access_valid memory_region_access_valid_rh850 +#define memory_region_dispatch_read memory_region_dispatch_read_rh850 +#define memory_region_dispatch_write memory_region_dispatch_write_rh850 +#define memory_region_init_io memory_region_init_io_rh850 +#define memory_region_init_ram_ptr memory_region_init_ram_ptr_rh850 +#define memory_region_size memory_region_size_rh850 +#define memory_region_set_readonly memory_region_set_readonly_rh850 +#define memory_region_get_ram_ptr memory_region_get_ram_ptr_rh850 +#define memory_region_from_host memory_region_from_host_rh850 +#define memory_region_get_ram_addr memory_region_get_ram_addr_rh850 +#define memory_region_add_subregion memory_region_add_subregion_rh850 +#define memory_region_del_subregion memory_region_del_subregion_rh850 +#define memory_region_add_subregion_overlap memory_region_add_subregion_overlap_rh850 +#define memory_region_find memory_region_find_rh850 +#define memory_region_filter_subregions memory_region_filter_subregions_rh850 +#define memory_listener_register memory_listener_register_rh850 +#define memory_listener_unregister memory_listener_unregister_rh850 +#define address_space_remove_listeners address_space_remove_listeners_rh850 +#define address_space_init address_space_init_rh850 +#define address_space_destroy address_space_destroy_rh850 +#define memory_region_init_ram memory_region_init_ram_rh850 +#define memory_mapping_list_add_merge_sorted memory_mapping_list_add_merge_sorted_rh850 +#define find_memory_mapping find_memory_mapping_rh850 +#define exec_inline_op exec_inline_op_rh850 +#define floatx80_default_nan floatx80_default_nan_rh850 +#define float_raise float_raise_rh850 +#define float16_is_quiet_nan float16_is_quiet_nan_rh850 +#define float16_is_signaling_nan float16_is_signaling_nan_rh850 +#define float32_is_quiet_nan float32_is_quiet_nan_rh850 +#define float32_is_signaling_nan float32_is_signaling_nan_rh850 +#define float64_is_quiet_nan float64_is_quiet_nan_rh850 +#define float64_is_signaling_nan float64_is_signaling_nan_rh850 +#define floatx80_is_quiet_nan floatx80_is_quiet_nan_rh850 +#define floatx80_is_signaling_nan floatx80_is_signaling_nan_rh850 +#define floatx80_silence_nan floatx80_silence_nan_rh850 +#define propagateFloatx80NaN propagateFloatx80NaN_rh850 +#define float128_is_quiet_nan float128_is_quiet_nan_rh850 +#define float128_is_signaling_nan float128_is_signaling_nan_rh850 +#define float128_silence_nan float128_silence_nan_rh850 +#define float16_add float16_add_rh850 +#define float16_sub float16_sub_rh850 +#define float32_add float32_add_rh850 +#define float32_sub float32_sub_rh850 +#define float64_add float64_add_rh850 +#define float64_sub float64_sub_rh850 +#define float16_mul float16_mul_rh850 +#define float32_mul float32_mul_rh850 +#define float64_mul float64_mul_rh850 +#define float16_muladd float16_muladd_rh850 +#define float32_muladd float32_muladd_rh850 +#define float64_muladd float64_muladd_rh850 +#define float16_div float16_div_rh850 +#define float32_div float32_div_rh850 +#define float64_div float64_div_rh850 +#define float16_to_float32 float16_to_float32_rh850 +#define float16_to_float64 float16_to_float64_rh850 +#define float32_to_float16 float32_to_float16_rh850 +#define float32_to_float64 float32_to_float64_rh850 +#define float64_to_float16 float64_to_float16_rh850 +#define float64_to_float32 float64_to_float32_rh850 +#define float16_round_to_int float16_round_to_int_rh850 +#define float32_round_to_int float32_round_to_int_rh850 +#define float64_round_to_int float64_round_to_int_rh850 +#define float16_to_int16_scalbn float16_to_int16_scalbn_rh850 +#define float16_to_int32_scalbn float16_to_int32_scalbn_rh850 +#define float16_to_int64_scalbn float16_to_int64_scalbn_rh850 +#define float32_to_int16_scalbn float32_to_int16_scalbn_rh850 +#define float32_to_int32_scalbn float32_to_int32_scalbn_rh850 +#define float32_to_int64_scalbn float32_to_int64_scalbn_rh850 +#define float64_to_int16_scalbn float64_to_int16_scalbn_rh850 +#define float64_to_int32_scalbn float64_to_int32_scalbn_rh850 +#define float64_to_int64_scalbn float64_to_int64_scalbn_rh850 +#define float16_to_int16 float16_to_int16_rh850 +#define float16_to_int32 float16_to_int32_rh850 +#define float16_to_int64 float16_to_int64_rh850 +#define float32_to_int16 float32_to_int16_rh850 +#define float32_to_int32 float32_to_int32_rh850 +#define float32_to_int64 float32_to_int64_rh850 +#define float64_to_int16 float64_to_int16_rh850 +#define float64_to_int32 float64_to_int32_rh850 +#define float64_to_int64 float64_to_int64_rh850 +#define float16_to_int16_round_to_zero float16_to_int16_round_to_zero_rh850 +#define float16_to_int32_round_to_zero float16_to_int32_round_to_zero_rh850 +#define float16_to_int64_round_to_zero float16_to_int64_round_to_zero_rh850 +#define float32_to_int16_round_to_zero float32_to_int16_round_to_zero_rh850 +#define float32_to_int32_round_to_zero float32_to_int32_round_to_zero_rh850 +#define float32_to_int64_round_to_zero float32_to_int64_round_to_zero_rh850 +#define float64_to_int16_round_to_zero float64_to_int16_round_to_zero_rh850 +#define float64_to_int32_round_to_zero float64_to_int32_round_to_zero_rh850 +#define float64_to_int64_round_to_zero float64_to_int64_round_to_zero_rh850 +#define float16_to_uint16_scalbn float16_to_uint16_scalbn_rh850 +#define float16_to_uint32_scalbn float16_to_uint32_scalbn_rh850 +#define float16_to_uint64_scalbn float16_to_uint64_scalbn_rh850 +#define float32_to_uint16_scalbn float32_to_uint16_scalbn_rh850 +#define float32_to_uint32_scalbn float32_to_uint32_scalbn_rh850 +#define float32_to_uint64_scalbn float32_to_uint64_scalbn_rh850 +#define float64_to_uint16_scalbn float64_to_uint16_scalbn_rh850 +#define float64_to_uint32_scalbn float64_to_uint32_scalbn_rh850 +#define float64_to_uint64_scalbn float64_to_uint64_scalbn_rh850 +#define float16_to_uint16 float16_to_uint16_rh850 +#define float16_to_uint32 float16_to_uint32_rh850 +#define float16_to_uint64 float16_to_uint64_rh850 +#define float32_to_uint16 float32_to_uint16_rh850 +#define float32_to_uint32 float32_to_uint32_rh850 +#define float32_to_uint64 float32_to_uint64_rh850 +#define float64_to_uint16 float64_to_uint16_rh850 +#define float64_to_uint32 float64_to_uint32_rh850 +#define float64_to_uint64 float64_to_uint64_rh850 +#define float16_to_uint16_round_to_zero float16_to_uint16_round_to_zero_rh850 +#define float16_to_uint32_round_to_zero float16_to_uint32_round_to_zero_rh850 +#define float16_to_uint64_round_to_zero float16_to_uint64_round_to_zero_rh850 +#define float32_to_uint16_round_to_zero float32_to_uint16_round_to_zero_rh850 +#define float32_to_uint32_round_to_zero float32_to_uint32_round_to_zero_rh850 +#define float32_to_uint64_round_to_zero float32_to_uint64_round_to_zero_rh850 +#define float64_to_uint16_round_to_zero float64_to_uint16_round_to_zero_rh850 +#define float64_to_uint32_round_to_zero float64_to_uint32_round_to_zero_rh850 +#define float64_to_uint64_round_to_zero float64_to_uint64_round_to_zero_rh850 +#define int64_to_float16_scalbn int64_to_float16_scalbn_rh850 +#define int32_to_float16_scalbn int32_to_float16_scalbn_rh850 +#define int16_to_float16_scalbn int16_to_float16_scalbn_rh850 +#define int64_to_float16 int64_to_float16_rh850 +#define int32_to_float16 int32_to_float16_rh850 +#define int16_to_float16 int16_to_float16_rh850 +#define int64_to_float32_scalbn int64_to_float32_scalbn_rh850 +#define int32_to_float32_scalbn int32_to_float32_scalbn_rh850 +#define int16_to_float32_scalbn int16_to_float32_scalbn_rh850 +#define int64_to_float32 int64_to_float32_rh850 +#define int32_to_float32 int32_to_float32_rh850 +#define int16_to_float32 int16_to_float32_rh850 +#define int64_to_float64_scalbn int64_to_float64_scalbn_rh850 +#define int32_to_float64_scalbn int32_to_float64_scalbn_rh850 +#define int16_to_float64_scalbn int16_to_float64_scalbn_rh850 +#define int64_to_float64 int64_to_float64_rh850 +#define int32_to_float64 int32_to_float64_rh850 +#define int16_to_float64 int16_to_float64_rh850 +#define uint64_to_float16_scalbn uint64_to_float16_scalbn_rh850 +#define uint32_to_float16_scalbn uint32_to_float16_scalbn_rh850 +#define uint16_to_float16_scalbn uint16_to_float16_scalbn_rh850 +#define uint64_to_float16 uint64_to_float16_rh850 +#define uint32_to_float16 uint32_to_float16_rh850 +#define uint16_to_float16 uint16_to_float16_rh850 +#define uint64_to_float32_scalbn uint64_to_float32_scalbn_rh850 +#define uint32_to_float32_scalbn uint32_to_float32_scalbn_rh850 +#define uint16_to_float32_scalbn uint16_to_float32_scalbn_rh850 +#define uint64_to_float32 uint64_to_float32_rh850 +#define uint32_to_float32 uint32_to_float32_rh850 +#define uint16_to_float32 uint16_to_float32_rh850 +#define uint64_to_float64_scalbn uint64_to_float64_scalbn_rh850 +#define uint32_to_float64_scalbn uint32_to_float64_scalbn_rh850 +#define uint16_to_float64_scalbn uint16_to_float64_scalbn_rh850 +#define uint64_to_float64 uint64_to_float64_rh850 +#define uint32_to_float64 uint32_to_float64_rh850 +#define uint16_to_float64 uint16_to_float64_rh850 +#define float16_min float16_min_rh850 +#define float16_minnum float16_minnum_rh850 +#define float16_minnummag float16_minnummag_rh850 +#define float16_max float16_max_rh850 +#define float16_maxnum float16_maxnum_rh850 +#define float16_maxnummag float16_maxnummag_rh850 +#define float32_min float32_min_rh850 +#define float32_minnum float32_minnum_rh850 +#define float32_minnummag float32_minnummag_rh850 +#define float32_max float32_max_rh850 +#define float32_maxnum float32_maxnum_rh850 +#define float32_maxnummag float32_maxnummag_rh850 +#define float64_min float64_min_rh850 +#define float64_minnum float64_minnum_rh850 +#define float64_minnummag float64_minnummag_rh850 +#define float64_max float64_max_rh850 +#define float64_maxnum float64_maxnum_rh850 +#define float64_maxnummag float64_maxnummag_rh850 +#define float16_compare float16_compare_rh850 +#define float16_compare_quiet float16_compare_quiet_rh850 +#define float32_compare float32_compare_rh850 +#define float32_compare_quiet float32_compare_quiet_rh850 +#define float64_compare float64_compare_rh850 +#define float64_compare_quiet float64_compare_quiet_rh850 +#define float16_scalbn float16_scalbn_rh850 +#define float32_scalbn float32_scalbn_rh850 +#define float64_scalbn float64_scalbn_rh850 +#define float16_sqrt float16_sqrt_rh850 +#define float32_sqrt float32_sqrt_rh850 +#define float64_sqrt float64_sqrt_rh850 +#define float16_default_nan float16_default_nan_rh850 +#define float32_default_nan float32_default_nan_rh850 +#define float64_default_nan float64_default_nan_rh850 +#define float128_default_nan float128_default_nan_rh850 +#define float16_silence_nan float16_silence_nan_rh850 +#define float32_silence_nan float32_silence_nan_rh850 +#define float64_silence_nan float64_silence_nan_rh850 +#define float16_squash_input_denormal float16_squash_input_denormal_rh850 +#define float32_squash_input_denormal float32_squash_input_denormal_rh850 +#define float64_squash_input_denormal float64_squash_input_denormal_rh850 +#define normalizeFloatx80Subnormal normalizeFloatx80Subnormal_rh850 +#define roundAndPackFloatx80 roundAndPackFloatx80_rh850 +#define normalizeRoundAndPackFloatx80 normalizeRoundAndPackFloatx80_rh850 +#define int32_to_floatx80 int32_to_floatx80_rh850 +#define int32_to_float128 int32_to_float128_rh850 +#define int64_to_floatx80 int64_to_floatx80_rh850 +#define int64_to_float128 int64_to_float128_rh850 +#define uint64_to_float128 uint64_to_float128_rh850 +#define float32_to_floatx80 float32_to_floatx80_rh850 +#define float32_to_float128 float32_to_float128_rh850 +#define float32_rem float32_rem_rh850 +#define float32_exp2 float32_exp2_rh850 +#define float32_log2 float32_log2_rh850 +#define float32_eq float32_eq_rh850 +#define float32_le float32_le_rh850 +#define float32_lt float32_lt_rh850 +#define float32_unordered float32_unordered_rh850 +#define float32_eq_quiet float32_eq_quiet_rh850 +#define float32_le_quiet float32_le_quiet_rh850 +#define float32_lt_quiet float32_lt_quiet_rh850 +#define float32_unordered_quiet float32_unordered_quiet_rh850 +#define float64_to_floatx80 float64_to_floatx80_rh850 +#define float64_to_float128 float64_to_float128_rh850 +#define float64_rem float64_rem_rh850 +#define float64_log2 float64_log2_rh850 +#define float64_eq float64_eq_rh850 +#define float64_le float64_le_rh850 +#define float64_lt float64_lt_rh850 +#define float64_unordered float64_unordered_rh850 +#define float64_eq_quiet float64_eq_quiet_rh850 +#define float64_le_quiet float64_le_quiet_rh850 +#define float64_lt_quiet float64_lt_quiet_rh850 +#define float64_unordered_quiet float64_unordered_quiet_rh850 +#define floatx80_to_int32 floatx80_to_int32_rh850 +#define floatx80_to_int32_round_to_zero floatx80_to_int32_round_to_zero_rh850 +#define floatx80_to_int64 floatx80_to_int64_rh850 +#define floatx80_to_int64_round_to_zero floatx80_to_int64_round_to_zero_rh850 +#define floatx80_to_float32 floatx80_to_float32_rh850 +#define floatx80_to_float64 floatx80_to_float64_rh850 +#define floatx80_to_float128 floatx80_to_float128_rh850 +#define floatx80_round floatx80_round_rh850 +#define floatx80_round_to_int floatx80_round_to_int_rh850 +#define floatx80_add floatx80_add_rh850 +#define floatx80_sub floatx80_sub_rh850 +#define floatx80_mul floatx80_mul_rh850 +#define floatx80_div floatx80_div_rh850 +#define floatx80_rem floatx80_rem_rh850 +#define floatx80_sqrt floatx80_sqrt_rh850 +#define floatx80_eq floatx80_eq_rh850 +#define floatx80_le floatx80_le_rh850 +#define floatx80_lt floatx80_lt_rh850 +#define floatx80_unordered floatx80_unordered_rh850 +#define floatx80_eq_quiet floatx80_eq_quiet_rh850 +#define floatx80_le_quiet floatx80_le_quiet_rh850 +#define floatx80_lt_quiet floatx80_lt_quiet_rh850 +#define floatx80_unordered_quiet floatx80_unordered_quiet_rh850 +#define float128_to_int32 float128_to_int32_rh850 +#define float128_to_int32_round_to_zero float128_to_int32_round_to_zero_rh850 +#define float128_to_int64 float128_to_int64_rh850 +#define float128_to_int64_round_to_zero float128_to_int64_round_to_zero_rh850 +#define float128_to_uint64 float128_to_uint64_rh850 +#define float128_to_uint64_round_to_zero float128_to_uint64_round_to_zero_rh850 +#define float128_to_uint32_round_to_zero float128_to_uint32_round_to_zero_rh850 +#define float128_to_uint32 float128_to_uint32_rh850 +#define float128_to_float32 float128_to_float32_rh850 +#define float128_to_float64 float128_to_float64_rh850 +#define float128_to_floatx80 float128_to_floatx80_rh850 +#define float128_round_to_int float128_round_to_int_rh850 +#define float128_add float128_add_rh850 +#define float128_sub float128_sub_rh850 +#define float128_mul float128_mul_rh850 +#define float128_div float128_div_rh850 +#define float128_rem float128_rem_rh850 +#define float128_sqrt float128_sqrt_rh850 +#define float128_eq float128_eq_rh850 +#define float128_le float128_le_rh850 +#define float128_lt float128_lt_rh850 +#define float128_unordered float128_unordered_rh850 +#define float128_eq_quiet float128_eq_quiet_rh850 +#define float128_le_quiet float128_le_quiet_rh850 +#define float128_lt_quiet float128_lt_quiet_rh850 +#define float128_unordered_quiet float128_unordered_quiet_rh850 +#define floatx80_compare floatx80_compare_rh850 +#define floatx80_compare_quiet floatx80_compare_quiet_rh850 +#define float128_compare float128_compare_rh850 +#define float128_compare_quiet float128_compare_quiet_rh850 +#define floatx80_scalbn floatx80_scalbn_rh850 +#define float128_scalbn float128_scalbn_rh850 +#define softfloat_init softfloat_init_rh850 +#define tcg_optimize tcg_optimize_rh850 +#define gen_new_label gen_new_label_rh850 +#define tcg_can_emit_vec_op tcg_can_emit_vec_op_rh850 +#define tcg_expand_vec_op tcg_expand_vec_op_rh850 +#define tcg_register_jit tcg_register_jit_rh850 +#define tcg_tb_insert tcg_tb_insert_rh850 +#define tcg_tb_remove tcg_tb_remove_rh850 +#define tcg_tb_lookup tcg_tb_lookup_rh850 +#define tcg_tb_foreach tcg_tb_foreach_rh850 +#define tcg_nb_tbs tcg_nb_tbs_rh850 +#define tcg_region_reset_all tcg_region_reset_all_rh850 +#define tcg_region_init tcg_region_init_rh850 +#define tcg_code_size tcg_code_size_rh850 +#define tcg_code_capacity tcg_code_capacity_rh850 +#define tcg_tb_phys_invalidate_count tcg_tb_phys_invalidate_count_rh850 +#define tcg_malloc_internal tcg_malloc_internal_rh850 +#define tcg_pool_reset tcg_pool_reset_rh850 +#define tcg_context_init tcg_context_init_rh850 +#define tcg_tb_alloc tcg_tb_alloc_rh850 +#define tcg_prologue_init tcg_prologue_init_rh850 +#define tcg_func_start tcg_func_start_rh850 +#define tcg_set_frame tcg_set_frame_rh850 +#define tcg_global_mem_new_internal tcg_global_mem_new_internal_rh850 +#define tcg_temp_new_internal tcg_temp_new_internal_rh850 +#define tcg_temp_new_vec tcg_temp_new_vec_rh850 +#define tcg_temp_new_vec_matching tcg_temp_new_vec_matching_rh850 +#define tcg_temp_free_internal tcg_temp_free_internal_rh850 +#define tcg_const_i32 tcg_const_i32_rh850 +#define tcg_const_i64 tcg_const_i64_rh850 +#define tcg_const_local_i32 tcg_const_local_i32_rh850 +#define tcg_const_local_i64 tcg_const_local_i64_rh850 +#define tcg_op_supported tcg_op_supported_rh850 +#define tcg_gen_callN tcg_gen_callN_rh850 +#define tcg_op_remove tcg_op_remove_rh850 +#define tcg_emit_op tcg_emit_op_rh850 +#define tcg_op_insert_before tcg_op_insert_before_rh850 +#define tcg_op_insert_after tcg_op_insert_after_rh850 +#define tcg_cpu_exec_time tcg_cpu_exec_time_rh850 +#define tcg_gen_code tcg_gen_code_rh850 +#define tcg_gen_op1 tcg_gen_op1_rh850 +#define tcg_gen_op2 tcg_gen_op2_rh850 +#define tcg_gen_op3 tcg_gen_op3_rh850 +#define tcg_gen_op4 tcg_gen_op4_rh850 +#define tcg_gen_op5 tcg_gen_op5_rh850 +#define tcg_gen_op6 tcg_gen_op6_rh850 +#define tcg_gen_mb tcg_gen_mb_rh850 +#define tcg_gen_addi_i32 tcg_gen_addi_i32_rh850 +#define tcg_gen_subfi_i32 tcg_gen_subfi_i32_rh850 +#define tcg_gen_subi_i32 tcg_gen_subi_i32_rh850 +#define tcg_gen_andi_i32 tcg_gen_andi_i32_rh850 +#define tcg_gen_ori_i32 tcg_gen_ori_i32_rh850 +#define tcg_gen_xori_i32 tcg_gen_xori_i32_rh850 +#define tcg_gen_shli_i32 tcg_gen_shli_i32_rh850 +#define tcg_gen_shri_i32 tcg_gen_shri_i32_rh850 +#define tcg_gen_sari_i32 tcg_gen_sari_i32_rh850 +#define tcg_gen_brcond_i32 tcg_gen_brcond_i32_rh850 +#define tcg_gen_brcondi_i32 tcg_gen_brcondi_i32_rh850 +#define tcg_gen_setcond_i32 tcg_gen_setcond_i32_rh850 +#define tcg_gen_setcondi_i32 tcg_gen_setcondi_i32_rh850 +#define tcg_gen_muli_i32 tcg_gen_muli_i32_rh850 +#define tcg_gen_div_i32 tcg_gen_div_i32_rh850 +#define tcg_gen_rem_i32 tcg_gen_rem_i32_rh850 +#define tcg_gen_divu_i32 tcg_gen_divu_i32_rh850 +#define tcg_gen_remu_i32 tcg_gen_remu_i32_rh850 +#define tcg_gen_andc_i32 tcg_gen_andc_i32_rh850 +#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_rh850 +#define tcg_gen_nand_i32 tcg_gen_nand_i32_rh850 +#define tcg_gen_nor_i32 tcg_gen_nor_i32_rh850 +#define tcg_gen_orc_i32 tcg_gen_orc_i32_rh850 +#define tcg_gen_clz_i32 tcg_gen_clz_i32_rh850 +#define tcg_gen_clzi_i32 tcg_gen_clzi_i32_rh850 +#define tcg_gen_ctz_i32 tcg_gen_ctz_i32_rh850 +#define tcg_gen_ctzi_i32 tcg_gen_ctzi_i32_rh850 +#define tcg_gen_clrsb_i32 tcg_gen_clrsb_i32_rh850 +#define tcg_gen_ctpop_i32 tcg_gen_ctpop_i32_rh850 +#define tcg_gen_rotl_i32 tcg_gen_rotl_i32_rh850 +#define tcg_gen_rotli_i32 tcg_gen_rotli_i32_rh850 +#define tcg_gen_rotr_i32 tcg_gen_rotr_i32_rh850 +#define tcg_gen_rotri_i32 tcg_gen_rotri_i32_rh850 +#define tcg_gen_deposit_i32 tcg_gen_deposit_i32_rh850 +#define tcg_gen_deposit_z_i32 tcg_gen_deposit_z_i32_rh850 +#define tcg_gen_extract_i32 tcg_gen_extract_i32_rh850 +#define tcg_gen_sextract_i32 tcg_gen_sextract_i32_rh850 +#define tcg_gen_extract2_i32 tcg_gen_extract2_i32_rh850 +#define tcg_gen_movcond_i32 tcg_gen_movcond_i32_rh850 +#define tcg_gen_add2_i32 tcg_gen_add2_i32_rh850 +#define tcg_gen_sub2_i32 tcg_gen_sub2_i32_rh850 +#define tcg_gen_mulu2_i32 tcg_gen_mulu2_i32_rh850 +#define tcg_gen_muls2_i32 tcg_gen_muls2_i32_rh850 +#define tcg_gen_mulsu2_i32 tcg_gen_mulsu2_i32_rh850 +#define tcg_gen_ext8s_i32 tcg_gen_ext8s_i32_rh850 +#define tcg_gen_ext16s_i32 tcg_gen_ext16s_i32_rh850 +#define tcg_gen_ext8u_i32 tcg_gen_ext8u_i32_rh850 +#define tcg_gen_ext16u_i32 tcg_gen_ext16u_i32_rh850 +#define tcg_gen_bswap16_i32 tcg_gen_bswap16_i32_rh850 +#define tcg_gen_bswap32_i32 tcg_gen_bswap32_i32_rh850 +#define tcg_gen_smin_i32 tcg_gen_smin_i32_rh850 +#define tcg_gen_umin_i32 tcg_gen_umin_i32_rh850 +#define tcg_gen_smax_i32 tcg_gen_smax_i32_rh850 +#define tcg_gen_umax_i32 tcg_gen_umax_i32_rh850 +#define tcg_gen_abs_i32 tcg_gen_abs_i32_rh850 +#define tcg_gen_addi_i64 tcg_gen_addi_i64_rh850 +#define tcg_gen_subfi_i64 tcg_gen_subfi_i64_rh850 +#define tcg_gen_subi_i64 tcg_gen_subi_i64_rh850 +#define tcg_gen_andi_i64 tcg_gen_andi_i64_rh850 +#define tcg_gen_ori_i64 tcg_gen_ori_i64_rh850 +#define tcg_gen_xori_i64 tcg_gen_xori_i64_rh850 +#define tcg_gen_shli_i64 tcg_gen_shli_i64_rh850 +#define tcg_gen_shri_i64 tcg_gen_shri_i64_rh850 +#define tcg_gen_sari_i64 tcg_gen_sari_i64_rh850 +#define tcg_gen_brcond_i64 tcg_gen_brcond_i64_rh850 +#define tcg_gen_brcondi_i64 tcg_gen_brcondi_i64_rh850 +#define tcg_gen_setcond_i64 tcg_gen_setcond_i64_rh850 +#define tcg_gen_setcondi_i64 tcg_gen_setcondi_i64_rh850 +#define tcg_gen_muli_i64 tcg_gen_muli_i64_rh850 +#define tcg_gen_div_i64 tcg_gen_div_i64_rh850 +#define tcg_gen_rem_i64 tcg_gen_rem_i64_rh850 +#define tcg_gen_divu_i64 tcg_gen_divu_i64_rh850 +#define tcg_gen_remu_i64 tcg_gen_remu_i64_rh850 +#define tcg_gen_ext8s_i64 tcg_gen_ext8s_i64_rh850 +#define tcg_gen_ext16s_i64 tcg_gen_ext16s_i64_rh850 +#define tcg_gen_ext32s_i64 tcg_gen_ext32s_i64_rh850 +#define tcg_gen_ext8u_i64 tcg_gen_ext8u_i64_rh850 +#define tcg_gen_ext16u_i64 tcg_gen_ext16u_i64_rh850 +#define tcg_gen_ext32u_i64 tcg_gen_ext32u_i64_rh850 +#define tcg_gen_bswap16_i64 tcg_gen_bswap16_i64_rh850 +#define tcg_gen_bswap32_i64 tcg_gen_bswap32_i64_rh850 +#define tcg_gen_bswap64_i64 tcg_gen_bswap64_i64_rh850 +#define tcg_gen_not_i64 tcg_gen_not_i64_rh850 +#define tcg_gen_andc_i64 tcg_gen_andc_i64_rh850 +#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_rh850 +#define tcg_gen_nand_i64 tcg_gen_nand_i64_rh850 +#define tcg_gen_nor_i64 tcg_gen_nor_i64_rh850 +#define tcg_gen_orc_i64 tcg_gen_orc_i64_rh850 +#define tcg_gen_clz_i64 tcg_gen_clz_i64_rh850 +#define tcg_gen_clzi_i64 tcg_gen_clzi_i64_rh850 +#define tcg_gen_ctz_i64 tcg_gen_ctz_i64_rh850 +#define tcg_gen_ctzi_i64 tcg_gen_ctzi_i64_rh850 +#define tcg_gen_clrsb_i64 tcg_gen_clrsb_i64_rh850 +#define tcg_gen_ctpop_i64 tcg_gen_ctpop_i64_rh850 +#define tcg_gen_rotl_i64 tcg_gen_rotl_i64_rh850 +#define tcg_gen_rotli_i64 tcg_gen_rotli_i64_rh850 +#define tcg_gen_rotr_i64 tcg_gen_rotr_i64_rh850 +#define tcg_gen_rotri_i64 tcg_gen_rotri_i64_rh850 +#define tcg_gen_deposit_i64 tcg_gen_deposit_i64_rh850 +#define tcg_gen_deposit_z_i64 tcg_gen_deposit_z_i64_rh850 +#define tcg_gen_extract_i64 tcg_gen_extract_i64_rh850 +#define tcg_gen_sextract_i64 tcg_gen_sextract_i64_rh850 +#define tcg_gen_extract2_i64 tcg_gen_extract2_i64_rh850 +#define tcg_gen_movcond_i64 tcg_gen_movcond_i64_rh850 +#define tcg_gen_add2_i64 tcg_gen_add2_i64_rh850 +#define tcg_gen_sub2_i64 tcg_gen_sub2_i64_rh850 +#define tcg_gen_mulu2_i64 tcg_gen_mulu2_i64_rh850 +#define tcg_gen_muls2_i64 tcg_gen_muls2_i64_rh850 +#define tcg_gen_mulsu2_i64 tcg_gen_mulsu2_i64_rh850 +#define tcg_gen_smin_i64 tcg_gen_smin_i64_rh850 +#define tcg_gen_umin_i64 tcg_gen_umin_i64_rh850 +#define tcg_gen_smax_i64 tcg_gen_smax_i64_rh850 +#define tcg_gen_umax_i64 tcg_gen_umax_i64_rh850 +#define tcg_gen_abs_i64 tcg_gen_abs_i64_rh850 +#define tcg_gen_extrl_i64_i32 tcg_gen_extrl_i64_i32_rh850 +#define tcg_gen_extrh_i64_i32 tcg_gen_extrh_i64_i32_rh850 +#define tcg_gen_extu_i32_i64 tcg_gen_extu_i32_i64_rh850 +#define tcg_gen_ext_i32_i64 tcg_gen_ext_i32_i64_rh850 +#define tcg_gen_concat_i32_i64 tcg_gen_concat_i32_i64_rh850 +#define tcg_gen_extr_i64_i32 tcg_gen_extr_i64_i32_rh850 +#define tcg_gen_extr32_i64 tcg_gen_extr32_i64_rh850 +#define tcg_gen_exit_tb tcg_gen_exit_tb_rh850 +#define tcg_gen_goto_tb tcg_gen_goto_tb_rh850 +#define tcg_gen_lookup_and_goto_ptr tcg_gen_lookup_and_goto_ptr_rh850 +#define check_exit_request check_exit_request_rh850 +#define tcg_gen_qemu_ld_i32 tcg_gen_qemu_ld_i32_rh850 +#define tcg_gen_qemu_st_i32 tcg_gen_qemu_st_i32_rh850 +#define tcg_gen_qemu_ld_i64 tcg_gen_qemu_ld_i64_rh850 +#define tcg_gen_qemu_st_i64 tcg_gen_qemu_st_i64_rh850 +#define tcg_gen_atomic_cmpxchg_i32 tcg_gen_atomic_cmpxchg_i32_rh850 +#define tcg_gen_atomic_cmpxchg_i64 tcg_gen_atomic_cmpxchg_i64_rh850 +#define tcg_gen_atomic_fetch_add_i32 tcg_gen_atomic_fetch_add_i32_rh850 +#define tcg_gen_atomic_fetch_add_i64 tcg_gen_atomic_fetch_add_i64_rh850 +#define tcg_gen_atomic_fetch_and_i32 tcg_gen_atomic_fetch_and_i32_rh850 +#define tcg_gen_atomic_fetch_and_i64 tcg_gen_atomic_fetch_and_i64_rh850 +#define tcg_gen_atomic_fetch_or_i32 tcg_gen_atomic_fetch_or_i32_rh850 +#define tcg_gen_atomic_fetch_or_i64 tcg_gen_atomic_fetch_or_i64_rh850 +#define tcg_gen_atomic_fetch_xor_i32 tcg_gen_atomic_fetch_xor_i32_rh850 +#define tcg_gen_atomic_fetch_xor_i64 tcg_gen_atomic_fetch_xor_i64_rh850 +#define tcg_gen_atomic_fetch_smin_i32 tcg_gen_atomic_fetch_smin_i32_rh850 +#define tcg_gen_atomic_fetch_smin_i64 tcg_gen_atomic_fetch_smin_i64_rh850 +#define tcg_gen_atomic_fetch_umin_i32 tcg_gen_atomic_fetch_umin_i32_rh850 +#define tcg_gen_atomic_fetch_umin_i64 tcg_gen_atomic_fetch_umin_i64_rh850 +#define tcg_gen_atomic_fetch_smax_i32 tcg_gen_atomic_fetch_smax_i32_rh850 +#define tcg_gen_atomic_fetch_smax_i64 tcg_gen_atomic_fetch_smax_i64_rh850 +#define tcg_gen_atomic_fetch_umax_i32 tcg_gen_atomic_fetch_umax_i32_rh850 +#define tcg_gen_atomic_fetch_umax_i64 tcg_gen_atomic_fetch_umax_i64_rh850 +#define tcg_gen_atomic_add_fetch_i32 tcg_gen_atomic_add_fetch_i32_rh850 +#define tcg_gen_atomic_add_fetch_i64 tcg_gen_atomic_add_fetch_i64_rh850 +#define tcg_gen_atomic_and_fetch_i32 tcg_gen_atomic_and_fetch_i32_rh850 +#define tcg_gen_atomic_and_fetch_i64 tcg_gen_atomic_and_fetch_i64_rh850 +#define tcg_gen_atomic_or_fetch_i32 tcg_gen_atomic_or_fetch_i32_rh850 +#define tcg_gen_atomic_or_fetch_i64 tcg_gen_atomic_or_fetch_i64_rh850 +#define tcg_gen_atomic_xor_fetch_i32 tcg_gen_atomic_xor_fetch_i32_rh850 +#define tcg_gen_atomic_xor_fetch_i64 tcg_gen_atomic_xor_fetch_i64_rh850 +#define tcg_gen_atomic_smin_fetch_i32 tcg_gen_atomic_smin_fetch_i32_rh850 +#define tcg_gen_atomic_smin_fetch_i64 tcg_gen_atomic_smin_fetch_i64_rh850 +#define tcg_gen_atomic_umin_fetch_i32 tcg_gen_atomic_umin_fetch_i32_rh850 +#define tcg_gen_atomic_umin_fetch_i64 tcg_gen_atomic_umin_fetch_i64_rh850 +#define tcg_gen_atomic_smax_fetch_i32 tcg_gen_atomic_smax_fetch_i32_rh850 +#define tcg_gen_atomic_smax_fetch_i64 tcg_gen_atomic_smax_fetch_i64_rh850 +#define tcg_gen_atomic_umax_fetch_i32 tcg_gen_atomic_umax_fetch_i32_rh850 +#define tcg_gen_atomic_umax_fetch_i64 tcg_gen_atomic_umax_fetch_i64_rh850 +#define tcg_gen_atomic_xchg_i32 tcg_gen_atomic_xchg_i32_rh850 +#define tcg_gen_atomic_xchg_i64 tcg_gen_atomic_xchg_i64_rh850 +#define simd_desc simd_desc_rh850 +#define tcg_gen_gvec_2_ool tcg_gen_gvec_2_ool_rh850 +#define tcg_gen_gvec_2i_ool tcg_gen_gvec_2i_ool_rh850 +#define tcg_gen_gvec_3_ool tcg_gen_gvec_3_ool_rh850 +#define tcg_gen_gvec_4_ool tcg_gen_gvec_4_ool_rh850 +#define tcg_gen_gvec_5_ool tcg_gen_gvec_5_ool_rh850 +#define tcg_gen_gvec_2_ptr tcg_gen_gvec_2_ptr_rh850 +#define tcg_gen_gvec_3_ptr tcg_gen_gvec_3_ptr_rh850 +#define tcg_gen_gvec_4_ptr tcg_gen_gvec_4_ptr_rh850 +#define tcg_gen_gvec_5_ptr tcg_gen_gvec_5_ptr_rh850 +#define tcg_gen_gvec_2 tcg_gen_gvec_2_rh850 +#define tcg_gen_gvec_2i tcg_gen_gvec_2i_rh850 +#define tcg_gen_gvec_2s tcg_gen_gvec_2s_rh850 +#define tcg_gen_gvec_3 tcg_gen_gvec_3_rh850 +#define tcg_gen_gvec_3i tcg_gen_gvec_3i_rh850 +#define tcg_gen_gvec_4 tcg_gen_gvec_4_rh850 +#define tcg_gen_gvec_mov tcg_gen_gvec_mov_rh850 +#define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_rh850 +#define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_rh850 +#define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_rh850 +#define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_rh850 +#define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_rh850 +#define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_rh850 +#define tcg_gen_gvec_dup8i tcg_gen_gvec_dup8i_rh850 +#define tcg_gen_gvec_not tcg_gen_gvec_not_rh850 +#define tcg_gen_vec_add8_i64 tcg_gen_vec_add8_i64_rh850 +#define tcg_gen_vec_add16_i64 tcg_gen_vec_add16_i64_rh850 +#define tcg_gen_vec_add32_i64 tcg_gen_vec_add32_i64_rh850 +#define tcg_gen_gvec_add tcg_gen_gvec_add_rh850 +#define tcg_gen_gvec_adds tcg_gen_gvec_adds_rh850 +#define tcg_gen_gvec_addi tcg_gen_gvec_addi_rh850 +#define tcg_gen_gvec_subs tcg_gen_gvec_subs_rh850 +#define tcg_gen_vec_sub8_i64 tcg_gen_vec_sub8_i64_rh850 +#define tcg_gen_vec_sub16_i64 tcg_gen_vec_sub16_i64_rh850 +#define tcg_gen_vec_sub32_i64 tcg_gen_vec_sub32_i64_rh850 +#define tcg_gen_gvec_sub tcg_gen_gvec_sub_rh850 +#define tcg_gen_gvec_mul tcg_gen_gvec_mul_rh850 +#define tcg_gen_gvec_muls tcg_gen_gvec_muls_rh850 +#define tcg_gen_gvec_muli tcg_gen_gvec_muli_rh850 +#define tcg_gen_gvec_ssadd tcg_gen_gvec_ssadd_rh850 +#define tcg_gen_gvec_sssub tcg_gen_gvec_sssub_rh850 +#define tcg_gen_gvec_usadd tcg_gen_gvec_usadd_rh850 +#define tcg_gen_gvec_ussub tcg_gen_gvec_ussub_rh850 +#define tcg_gen_gvec_smin tcg_gen_gvec_smin_rh850 +#define tcg_gen_gvec_umin tcg_gen_gvec_umin_rh850 +#define tcg_gen_gvec_smax tcg_gen_gvec_smax_rh850 +#define tcg_gen_gvec_umax tcg_gen_gvec_umax_rh850 +#define tcg_gen_vec_neg8_i64 tcg_gen_vec_neg8_i64_rh850 +#define tcg_gen_vec_neg16_i64 tcg_gen_vec_neg16_i64_rh850 +#define tcg_gen_vec_neg32_i64 tcg_gen_vec_neg32_i64_rh850 +#define tcg_gen_gvec_neg tcg_gen_gvec_neg_rh850 +#define tcg_gen_gvec_abs tcg_gen_gvec_abs_rh850 +#define tcg_gen_gvec_and tcg_gen_gvec_and_rh850 +#define tcg_gen_gvec_or tcg_gen_gvec_or_rh850 +#define tcg_gen_gvec_xor tcg_gen_gvec_xor_rh850 +#define tcg_gen_gvec_andc tcg_gen_gvec_andc_rh850 +#define tcg_gen_gvec_orc tcg_gen_gvec_orc_rh850 +#define tcg_gen_gvec_nand tcg_gen_gvec_nand_rh850 +#define tcg_gen_gvec_nor tcg_gen_gvec_nor_rh850 +#define tcg_gen_gvec_eqv tcg_gen_gvec_eqv_rh850 +#define tcg_gen_gvec_ands tcg_gen_gvec_ands_rh850 +#define tcg_gen_gvec_andi tcg_gen_gvec_andi_rh850 +#define tcg_gen_gvec_xors tcg_gen_gvec_xors_rh850 +#define tcg_gen_gvec_xori tcg_gen_gvec_xori_rh850 +#define tcg_gen_gvec_ors tcg_gen_gvec_ors_rh850 +#define tcg_gen_gvec_ori tcg_gen_gvec_ori_rh850 +#define tcg_gen_vec_shl8i_i64 tcg_gen_vec_shl8i_i64_rh850 +#define tcg_gen_vec_shl16i_i64 tcg_gen_vec_shl16i_i64_rh850 +#define tcg_gen_gvec_shli tcg_gen_gvec_shli_rh850 +#define tcg_gen_vec_shr8i_i64 tcg_gen_vec_shr8i_i64_rh850 +#define tcg_gen_vec_shr16i_i64 tcg_gen_vec_shr16i_i64_rh850 +#define tcg_gen_gvec_shri tcg_gen_gvec_shri_rh850 +#define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_rh850 +#define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_rh850 +#define tcg_gen_gvec_sari tcg_gen_gvec_sari_rh850 +#define tcg_gen_gvec_shls tcg_gen_gvec_shls_rh850 +#define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_rh850 +#define tcg_gen_gvec_sars tcg_gen_gvec_sars_rh850 +#define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_rh850 +#define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_rh850 +#define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_rh850 +#define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_rh850 +#define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_rh850 +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_rh850 +#define vec_gen_2 vec_gen_2_rh850 +#define vec_gen_3 vec_gen_3_rh850 +#define vec_gen_4 vec_gen_4_rh850 +#define tcg_gen_mov_vec tcg_gen_mov_vec_rh850 +#define tcg_const_zeros_vec tcg_const_zeros_vec_rh850 +#define tcg_const_ones_vec tcg_const_ones_vec_rh850 +#define tcg_const_zeros_vec_matching tcg_const_zeros_vec_matching_rh850 +#define tcg_const_ones_vec_matching tcg_const_ones_vec_matching_rh850 +#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_rh850 +#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_rh850 +#define tcg_gen_dup16i_vec tcg_gen_dup16i_vec_rh850 +#define tcg_gen_dup8i_vec tcg_gen_dup8i_vec_rh850 +#define tcg_gen_dupi_vec tcg_gen_dupi_vec_rh850 +#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_rh850 +#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_rh850 +#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_rh850 +#define tcg_gen_ld_vec tcg_gen_ld_vec_rh850 +#define tcg_gen_st_vec tcg_gen_st_vec_rh850 +#define tcg_gen_stl_vec tcg_gen_stl_vec_rh850 +#define tcg_gen_and_vec tcg_gen_and_vec_rh850 +#define tcg_gen_or_vec tcg_gen_or_vec_rh850 +#define tcg_gen_xor_vec tcg_gen_xor_vec_rh850 +#define tcg_gen_andc_vec tcg_gen_andc_vec_rh850 +#define tcg_gen_orc_vec tcg_gen_orc_vec_rh850 +#define tcg_gen_nand_vec tcg_gen_nand_vec_rh850 +#define tcg_gen_nor_vec tcg_gen_nor_vec_rh850 +#define tcg_gen_eqv_vec tcg_gen_eqv_vec_rh850 +#define tcg_gen_not_vec tcg_gen_not_vec_rh850 +#define tcg_gen_neg_vec tcg_gen_neg_vec_rh850 +#define tcg_gen_abs_vec tcg_gen_abs_vec_rh850 +#define tcg_gen_shli_vec tcg_gen_shli_vec_rh850 +#define tcg_gen_shri_vec tcg_gen_shri_vec_rh850 +#define tcg_gen_sari_vec tcg_gen_sari_vec_rh850 +#define tcg_gen_cmp_vec tcg_gen_cmp_vec_rh850 +#define tcg_gen_add_vec tcg_gen_add_vec_rh850 +#define tcg_gen_sub_vec tcg_gen_sub_vec_rh850 +#define tcg_gen_mul_vec tcg_gen_mul_vec_rh850 +#define tcg_gen_ssadd_vec tcg_gen_ssadd_vec_rh850 +#define tcg_gen_usadd_vec tcg_gen_usadd_vec_rh850 +#define tcg_gen_sssub_vec tcg_gen_sssub_vec_rh850 +#define tcg_gen_ussub_vec tcg_gen_ussub_vec_rh850 +#define tcg_gen_smin_vec tcg_gen_smin_vec_rh850 +#define tcg_gen_umin_vec tcg_gen_umin_vec_rh850 +#define tcg_gen_smax_vec tcg_gen_smax_vec_rh850 +#define tcg_gen_umax_vec tcg_gen_umax_vec_rh850 +#define tcg_gen_shlv_vec tcg_gen_shlv_vec_rh850 +#define tcg_gen_shrv_vec tcg_gen_shrv_vec_rh850 +#define tcg_gen_sarv_vec tcg_gen_sarv_vec_rh850 +#define tcg_gen_shls_vec tcg_gen_shls_vec_rh850 +#define tcg_gen_shrs_vec tcg_gen_shrs_vec_rh850 +#define tcg_gen_sars_vec tcg_gen_sars_vec_rh850 +#define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_rh850 +#define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_rh850 +#define tb_htable_lookup tb_htable_lookup_rh850 +#define tb_set_jmp_target tb_set_jmp_target_rh850 +#define cpu_exec cpu_exec_rh850 +#define cpu_loop_exit_noexc cpu_loop_exit_noexc_rh850 +#define cpu_reloading_memory_map cpu_reloading_memory_map_rh850 +#define cpu_loop_exit cpu_loop_exit_rh850 +#define cpu_loop_exit_restore cpu_loop_exit_restore_rh850 +#define cpu_loop_exit_atomic cpu_loop_exit_atomic_rh850 +#define tlb_init tlb_init_rh850 +#define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_rh850 +#define tlb_flush tlb_flush_rh850 +#define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_rh850 +#define tlb_flush_all_cpus tlb_flush_all_cpus_rh850 +#define tlb_flush_by_mmuidx_all_cpus_synced tlb_flush_by_mmuidx_all_cpus_synced_rh850 +#define tlb_flush_all_cpus_synced tlb_flush_all_cpus_synced_rh850 +#define tlb_flush_page_by_mmuidx tlb_flush_page_by_mmuidx_rh850 +#define tlb_flush_page tlb_flush_page_rh850 +#define tlb_flush_page_by_mmuidx_all_cpus tlb_flush_page_by_mmuidx_all_cpus_rh850 +#define tlb_flush_page_all_cpus tlb_flush_page_all_cpus_rh850 +#define tlb_flush_page_by_mmuidx_all_cpus_synced tlb_flush_page_by_mmuidx_all_cpus_synced_rh850 +#define tlb_flush_page_all_cpus_synced tlb_flush_page_all_cpus_synced_rh850 +#define tlb_protect_code tlb_protect_code_rh850 +#define tlb_unprotect_code tlb_unprotect_code_rh850 +#define tlb_reset_dirty tlb_reset_dirty_rh850 +#define tlb_set_dirty tlb_set_dirty_rh850 +#define tlb_set_page_with_attrs tlb_set_page_with_attrs_rh850 +#define tlb_set_page tlb_set_page_rh850 +#define get_page_addr_code_hostp get_page_addr_code_hostp_rh850 +#define get_page_addr_code get_page_addr_code_rh850 +#define probe_access probe_access_rh850 +#define tlb_vaddr_to_host tlb_vaddr_to_host_rh850 +#define helper_ret_ldub_mmu helper_ret_ldub_mmu_rh850 +#define helper_le_lduw_mmu helper_le_lduw_mmu_rh850 +#define helper_be_lduw_mmu helper_be_lduw_mmu_rh850 +#define helper_le_ldul_mmu helper_le_ldul_mmu_rh850 +#define helper_be_ldul_mmu helper_be_ldul_mmu_rh850 +#define helper_le_ldq_mmu helper_le_ldq_mmu_rh850 +#define helper_be_ldq_mmu helper_be_ldq_mmu_rh850 +#define helper_ret_ldsb_mmu helper_ret_ldsb_mmu_rh850 +#define helper_le_ldsw_mmu helper_le_ldsw_mmu_rh850 +#define helper_be_ldsw_mmu helper_be_ldsw_mmu_rh850 +#define helper_le_ldsl_mmu helper_le_ldsl_mmu_rh850 +#define helper_be_ldsl_mmu helper_be_ldsl_mmu_rh850 +#define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_rh850 +#define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_rh850 +#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_rh850 +#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_rh850 +#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_rh850 +#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_rh850 +#define cpu_ldub_data_ra cpu_ldub_data_ra_rh850 +#define cpu_ldsb_data_ra cpu_ldsb_data_ra_rh850 +#define cpu_lduw_data_ra cpu_lduw_data_ra_rh850 +#define cpu_ldsw_data_ra cpu_ldsw_data_ra_rh850 +#define cpu_ldl_data_ra cpu_ldl_data_ra_rh850 +#define cpu_ldq_data_ra cpu_ldq_data_ra_rh850 +#define cpu_ldub_data cpu_ldub_data_rh850 +#define cpu_ldsb_data cpu_ldsb_data_rh850 +#define cpu_lduw_data cpu_lduw_data_rh850 +#define cpu_ldsw_data cpu_ldsw_data_rh850 +#define cpu_ldl_data cpu_ldl_data_rh850 +#define cpu_ldq_data cpu_ldq_data_rh850 +#define helper_ret_stb_mmu helper_ret_stb_mmu_rh850 +#define helper_le_stw_mmu helper_le_stw_mmu_rh850 +#define helper_be_stw_mmu helper_be_stw_mmu_rh850 +#define helper_le_stl_mmu helper_le_stl_mmu_rh850 +#define helper_be_stl_mmu helper_be_stl_mmu_rh850 +#define helper_le_stq_mmu helper_le_stq_mmu_rh850 +#define helper_be_stq_mmu helper_be_stq_mmu_rh850 +#define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_rh850 +#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_rh850 +#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_rh850 +#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_rh850 +#define cpu_stb_data_ra cpu_stb_data_ra_rh850 +#define cpu_stw_data_ra cpu_stw_data_ra_rh850 +#define cpu_stl_data_ra cpu_stl_data_ra_rh850 +#define cpu_stq_data_ra cpu_stq_data_ra_rh850 +#define cpu_stb_data cpu_stb_data_rh850 +#define cpu_stw_data cpu_stw_data_rh850 +#define cpu_stl_data cpu_stl_data_rh850 +#define cpu_stq_data cpu_stq_data_rh850 +#define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_rh850 +#define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_rh850 +#define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_rh850 +#define helper_atomic_fetch_andb_mmu helper_atomic_fetch_andb_mmu_rh850 +#define helper_atomic_fetch_orb_mmu helper_atomic_fetch_orb_mmu_rh850 +#define helper_atomic_fetch_xorb_mmu helper_atomic_fetch_xorb_mmu_rh850 +#define helper_atomic_add_fetchb_mmu helper_atomic_add_fetchb_mmu_rh850 +#define helper_atomic_and_fetchb_mmu helper_atomic_and_fetchb_mmu_rh850 +#define helper_atomic_or_fetchb_mmu helper_atomic_or_fetchb_mmu_rh850 +#define helper_atomic_xor_fetchb_mmu helper_atomic_xor_fetchb_mmu_rh850 +#define helper_atomic_fetch_sminb_mmu helper_atomic_fetch_sminb_mmu_rh850 +#define helper_atomic_fetch_uminb_mmu helper_atomic_fetch_uminb_mmu_rh850 +#define helper_atomic_fetch_smaxb_mmu helper_atomic_fetch_smaxb_mmu_rh850 +#define helper_atomic_fetch_umaxb_mmu helper_atomic_fetch_umaxb_mmu_rh850 +#define helper_atomic_smin_fetchb_mmu helper_atomic_smin_fetchb_mmu_rh850 +#define helper_atomic_umin_fetchb_mmu helper_atomic_umin_fetchb_mmu_rh850 +#define helper_atomic_smax_fetchb_mmu helper_atomic_smax_fetchb_mmu_rh850 +#define helper_atomic_umax_fetchb_mmu helper_atomic_umax_fetchb_mmu_rh850 +#define helper_atomic_cmpxchgw_le_mmu helper_atomic_cmpxchgw_le_mmu_rh850 +#define helper_atomic_xchgw_le_mmu helper_atomic_xchgw_le_mmu_rh850 +#define helper_atomic_fetch_addw_le_mmu helper_atomic_fetch_addw_le_mmu_rh850 +#define helper_atomic_fetch_andw_le_mmu helper_atomic_fetch_andw_le_mmu_rh850 +#define helper_atomic_fetch_orw_le_mmu helper_atomic_fetch_orw_le_mmu_rh850 +#define helper_atomic_fetch_xorw_le_mmu helper_atomic_fetch_xorw_le_mmu_rh850 +#define helper_atomic_add_fetchw_le_mmu helper_atomic_add_fetchw_le_mmu_rh850 +#define helper_atomic_and_fetchw_le_mmu helper_atomic_and_fetchw_le_mmu_rh850 +#define helper_atomic_or_fetchw_le_mmu helper_atomic_or_fetchw_le_mmu_rh850 +#define helper_atomic_xor_fetchw_le_mmu helper_atomic_xor_fetchw_le_mmu_rh850 +#define helper_atomic_fetch_sminw_le_mmu helper_atomic_fetch_sminw_le_mmu_rh850 +#define helper_atomic_fetch_uminw_le_mmu helper_atomic_fetch_uminw_le_mmu_rh850 +#define helper_atomic_fetch_smaxw_le_mmu helper_atomic_fetch_smaxw_le_mmu_rh850 +#define helper_atomic_fetch_umaxw_le_mmu helper_atomic_fetch_umaxw_le_mmu_rh850 +#define helper_atomic_smin_fetchw_le_mmu helper_atomic_smin_fetchw_le_mmu_rh850 +#define helper_atomic_umin_fetchw_le_mmu helper_atomic_umin_fetchw_le_mmu_rh850 +#define helper_atomic_smax_fetchw_le_mmu helper_atomic_smax_fetchw_le_mmu_rh850 +#define helper_atomic_umax_fetchw_le_mmu helper_atomic_umax_fetchw_le_mmu_rh850 +#define helper_atomic_cmpxchgw_be_mmu helper_atomic_cmpxchgw_be_mmu_rh850 +#define helper_atomic_xchgw_be_mmu helper_atomic_xchgw_be_mmu_rh850 +#define helper_atomic_fetch_andw_be_mmu helper_atomic_fetch_andw_be_mmu_rh850 +#define helper_atomic_fetch_orw_be_mmu helper_atomic_fetch_orw_be_mmu_rh850 +#define helper_atomic_fetch_xorw_be_mmu helper_atomic_fetch_xorw_be_mmu_rh850 +#define helper_atomic_and_fetchw_be_mmu helper_atomic_and_fetchw_be_mmu_rh850 +#define helper_atomic_or_fetchw_be_mmu helper_atomic_or_fetchw_be_mmu_rh850 +#define helper_atomic_xor_fetchw_be_mmu helper_atomic_xor_fetchw_be_mmu_rh850 +#define helper_atomic_fetch_sminw_be_mmu helper_atomic_fetch_sminw_be_mmu_rh850 +#define helper_atomic_fetch_uminw_be_mmu helper_atomic_fetch_uminw_be_mmu_rh850 +#define helper_atomic_fetch_smaxw_be_mmu helper_atomic_fetch_smaxw_be_mmu_rh850 +#define helper_atomic_fetch_umaxw_be_mmu helper_atomic_fetch_umaxw_be_mmu_rh850 +#define helper_atomic_smin_fetchw_be_mmu helper_atomic_smin_fetchw_be_mmu_rh850 +#define helper_atomic_umin_fetchw_be_mmu helper_atomic_umin_fetchw_be_mmu_rh850 +#define helper_atomic_smax_fetchw_be_mmu helper_atomic_smax_fetchw_be_mmu_rh850 +#define helper_atomic_umax_fetchw_be_mmu helper_atomic_umax_fetchw_be_mmu_rh850 +#define helper_atomic_fetch_addw_be_mmu helper_atomic_fetch_addw_be_mmu_rh850 +#define helper_atomic_add_fetchw_be_mmu helper_atomic_add_fetchw_be_mmu_rh850 +#define helper_atomic_cmpxchgl_le_mmu helper_atomic_cmpxchgl_le_mmu_rh850 +#define helper_atomic_xchgl_le_mmu helper_atomic_xchgl_le_mmu_rh850 +#define helper_atomic_fetch_addl_le_mmu helper_atomic_fetch_addl_le_mmu_rh850 +#define helper_atomic_fetch_andl_le_mmu helper_atomic_fetch_andl_le_mmu_rh850 +#define helper_atomic_fetch_orl_le_mmu helper_atomic_fetch_orl_le_mmu_rh850 +#define helper_atomic_fetch_xorl_le_mmu helper_atomic_fetch_xorl_le_mmu_rh850 +#define helper_atomic_add_fetchl_le_mmu helper_atomic_add_fetchl_le_mmu_rh850 +#define helper_atomic_and_fetchl_le_mmu helper_atomic_and_fetchl_le_mmu_rh850 +#define helper_atomic_or_fetchl_le_mmu helper_atomic_or_fetchl_le_mmu_rh850 +#define helper_atomic_xor_fetchl_le_mmu helper_atomic_xor_fetchl_le_mmu_rh850 +#define helper_atomic_fetch_sminl_le_mmu helper_atomic_fetch_sminl_le_mmu_rh850 +#define helper_atomic_fetch_uminl_le_mmu helper_atomic_fetch_uminl_le_mmu_rh850 +#define helper_atomic_fetch_smaxl_le_mmu helper_atomic_fetch_smaxl_le_mmu_rh850 +#define helper_atomic_fetch_umaxl_le_mmu helper_atomic_fetch_umaxl_le_mmu_rh850 +#define helper_atomic_smin_fetchl_le_mmu helper_atomic_smin_fetchl_le_mmu_rh850 +#define helper_atomic_umin_fetchl_le_mmu helper_atomic_umin_fetchl_le_mmu_rh850 +#define helper_atomic_smax_fetchl_le_mmu helper_atomic_smax_fetchl_le_mmu_rh850 +#define helper_atomic_umax_fetchl_le_mmu helper_atomic_umax_fetchl_le_mmu_rh850 +#define helper_atomic_cmpxchgl_be_mmu helper_atomic_cmpxchgl_be_mmu_rh850 +#define helper_atomic_xchgl_be_mmu helper_atomic_xchgl_be_mmu_rh850 +#define helper_atomic_fetch_andl_be_mmu helper_atomic_fetch_andl_be_mmu_rh850 +#define helper_atomic_fetch_orl_be_mmu helper_atomic_fetch_orl_be_mmu_rh850 +#define helper_atomic_fetch_xorl_be_mmu helper_atomic_fetch_xorl_be_mmu_rh850 +#define helper_atomic_and_fetchl_be_mmu helper_atomic_and_fetchl_be_mmu_rh850 +#define helper_atomic_or_fetchl_be_mmu helper_atomic_or_fetchl_be_mmu_rh850 +#define helper_atomic_xor_fetchl_be_mmu helper_atomic_xor_fetchl_be_mmu_rh850 +#define helper_atomic_fetch_sminl_be_mmu helper_atomic_fetch_sminl_be_mmu_rh850 +#define helper_atomic_fetch_uminl_be_mmu helper_atomic_fetch_uminl_be_mmu_rh850 +#define helper_atomic_fetch_smaxl_be_mmu helper_atomic_fetch_smaxl_be_mmu_rh850 +#define helper_atomic_fetch_umaxl_be_mmu helper_atomic_fetch_umaxl_be_mmu_rh850 +#define helper_atomic_smin_fetchl_be_mmu helper_atomic_smin_fetchl_be_mmu_rh850 +#define helper_atomic_umin_fetchl_be_mmu helper_atomic_umin_fetchl_be_mmu_rh850 +#define helper_atomic_smax_fetchl_be_mmu helper_atomic_smax_fetchl_be_mmu_rh850 +#define helper_atomic_umax_fetchl_be_mmu helper_atomic_umax_fetchl_be_mmu_rh850 +#define helper_atomic_fetch_addl_be_mmu helper_atomic_fetch_addl_be_mmu_rh850 +#define helper_atomic_add_fetchl_be_mmu helper_atomic_add_fetchl_be_mmu_rh850 +#define helper_atomic_cmpxchgq_le_mmu helper_atomic_cmpxchgq_le_mmu_rh850 +#define helper_atomic_xchgq_le_mmu helper_atomic_xchgq_le_mmu_rh850 +#define helper_atomic_fetch_addq_le_mmu helper_atomic_fetch_addq_le_mmu_rh850 +#define helper_atomic_fetch_andq_le_mmu helper_atomic_fetch_andq_le_mmu_rh850 +#define helper_atomic_fetch_orq_le_mmu helper_atomic_fetch_orq_le_mmu_rh850 +#define helper_atomic_fetch_xorq_le_mmu helper_atomic_fetch_xorq_le_mmu_rh850 +#define helper_atomic_add_fetchq_le_mmu helper_atomic_add_fetchq_le_mmu_rh850 +#define helper_atomic_and_fetchq_le_mmu helper_atomic_and_fetchq_le_mmu_rh850 +#define helper_atomic_or_fetchq_le_mmu helper_atomic_or_fetchq_le_mmu_rh850 +#define helper_atomic_xor_fetchq_le_mmu helper_atomic_xor_fetchq_le_mmu_rh850 +#define helper_atomic_fetch_sminq_le_mmu helper_atomic_fetch_sminq_le_mmu_rh850 +#define helper_atomic_fetch_uminq_le_mmu helper_atomic_fetch_uminq_le_mmu_rh850 +#define helper_atomic_fetch_smaxq_le_mmu helper_atomic_fetch_smaxq_le_mmu_rh850 +#define helper_atomic_fetch_umaxq_le_mmu helper_atomic_fetch_umaxq_le_mmu_rh850 +#define helper_atomic_smin_fetchq_le_mmu helper_atomic_smin_fetchq_le_mmu_rh850 +#define helper_atomic_umin_fetchq_le_mmu helper_atomic_umin_fetchq_le_mmu_rh850 +#define helper_atomic_smax_fetchq_le_mmu helper_atomic_smax_fetchq_le_mmu_rh850 +#define helper_atomic_umax_fetchq_le_mmu helper_atomic_umax_fetchq_le_mmu_rh850 +#define helper_atomic_cmpxchgq_be_mmu helper_atomic_cmpxchgq_be_mmu_rh850 +#define helper_atomic_xchgq_be_mmu helper_atomic_xchgq_be_mmu_rh850 +#define helper_atomic_fetch_andq_be_mmu helper_atomic_fetch_andq_be_mmu_rh850 +#define helper_atomic_fetch_orq_be_mmu helper_atomic_fetch_orq_be_mmu_rh850 +#define helper_atomic_fetch_xorq_be_mmu helper_atomic_fetch_xorq_be_mmu_rh850 +#define helper_atomic_and_fetchq_be_mmu helper_atomic_and_fetchq_be_mmu_rh850 +#define helper_atomic_or_fetchq_be_mmu helper_atomic_or_fetchq_be_mmu_rh850 +#define helper_atomic_xor_fetchq_be_mmu helper_atomic_xor_fetchq_be_mmu_rh850 +#define helper_atomic_fetch_sminq_be_mmu helper_atomic_fetch_sminq_be_mmu_rh850 +#define helper_atomic_fetch_uminq_be_mmu helper_atomic_fetch_uminq_be_mmu_rh850 +#define helper_atomic_fetch_smaxq_be_mmu helper_atomic_fetch_smaxq_be_mmu_rh850 +#define helper_atomic_fetch_umaxq_be_mmu helper_atomic_fetch_umaxq_be_mmu_rh850 +#define helper_atomic_smin_fetchq_be_mmu helper_atomic_smin_fetchq_be_mmu_rh850 +#define helper_atomic_umin_fetchq_be_mmu helper_atomic_umin_fetchq_be_mmu_rh850 +#define helper_atomic_smax_fetchq_be_mmu helper_atomic_smax_fetchq_be_mmu_rh850 +#define helper_atomic_umax_fetchq_be_mmu helper_atomic_umax_fetchq_be_mmu_rh850 +#define helper_atomic_fetch_addq_be_mmu helper_atomic_fetch_addq_be_mmu_rh850 +#define helper_atomic_add_fetchq_be_mmu helper_atomic_add_fetchq_be_mmu_rh850 +#define helper_atomic_cmpxchgb helper_atomic_cmpxchgb_rh850 +#define helper_atomic_xchgb helper_atomic_xchgb_rh850 +#define helper_atomic_fetch_addb helper_atomic_fetch_addb_rh850 +#define helper_atomic_fetch_andb helper_atomic_fetch_andb_rh850 +#define helper_atomic_fetch_orb helper_atomic_fetch_orb_rh850 +#define helper_atomic_fetch_xorb helper_atomic_fetch_xorb_rh850 +#define helper_atomic_add_fetchb helper_atomic_add_fetchb_rh850 +#define helper_atomic_and_fetchb helper_atomic_and_fetchb_rh850 +#define helper_atomic_or_fetchb helper_atomic_or_fetchb_rh850 +#define helper_atomic_xor_fetchb helper_atomic_xor_fetchb_rh850 +#define helper_atomic_fetch_sminb helper_atomic_fetch_sminb_rh850 +#define helper_atomic_fetch_uminb helper_atomic_fetch_uminb_rh850 +#define helper_atomic_fetch_smaxb helper_atomic_fetch_smaxb_rh850 +#define helper_atomic_fetch_umaxb helper_atomic_fetch_umaxb_rh850 +#define helper_atomic_smin_fetchb helper_atomic_smin_fetchb_rh850 +#define helper_atomic_umin_fetchb helper_atomic_umin_fetchb_rh850 +#define helper_atomic_smax_fetchb helper_atomic_smax_fetchb_rh850 +#define helper_atomic_umax_fetchb helper_atomic_umax_fetchb_rh850 +#define helper_atomic_cmpxchgw_le helper_atomic_cmpxchgw_le_rh850 +#define helper_atomic_xchgw_le helper_atomic_xchgw_le_rh850 +#define helper_atomic_fetch_addw_le helper_atomic_fetch_addw_le_rh850 +#define helper_atomic_fetch_andw_le helper_atomic_fetch_andw_le_rh850 +#define helper_atomic_fetch_orw_le helper_atomic_fetch_orw_le_rh850 +#define helper_atomic_fetch_xorw_le helper_atomic_fetch_xorw_le_rh850 +#define helper_atomic_add_fetchw_le helper_atomic_add_fetchw_le_rh850 +#define helper_atomic_and_fetchw_le helper_atomic_and_fetchw_le_rh850 +#define helper_atomic_or_fetchw_le helper_atomic_or_fetchw_le_rh850 +#define helper_atomic_xor_fetchw_le helper_atomic_xor_fetchw_le_rh850 +#define helper_atomic_fetch_sminw_le helper_atomic_fetch_sminw_le_rh850 +#define helper_atomic_fetch_uminw_le helper_atomic_fetch_uminw_le_rh850 +#define helper_atomic_fetch_smaxw_le helper_atomic_fetch_smaxw_le_rh850 +#define helper_atomic_fetch_umaxw_le helper_atomic_fetch_umaxw_le_rh850 +#define helper_atomic_smin_fetchw_le helper_atomic_smin_fetchw_le_rh850 +#define helper_atomic_umin_fetchw_le helper_atomic_umin_fetchw_le_rh850 +#define helper_atomic_smax_fetchw_le helper_atomic_smax_fetchw_le_rh850 +#define helper_atomic_umax_fetchw_le helper_atomic_umax_fetchw_le_rh850 +#define helper_atomic_cmpxchgw_be helper_atomic_cmpxchgw_be_rh850 +#define helper_atomic_xchgw_be helper_atomic_xchgw_be_rh850 +#define helper_atomic_fetch_andw_be helper_atomic_fetch_andw_be_rh850 +#define helper_atomic_fetch_orw_be helper_atomic_fetch_orw_be_rh850 +#define helper_atomic_fetch_xorw_be helper_atomic_fetch_xorw_be_rh850 +#define helper_atomic_and_fetchw_be helper_atomic_and_fetchw_be_rh850 +#define helper_atomic_or_fetchw_be helper_atomic_or_fetchw_be_rh850 +#define helper_atomic_xor_fetchw_be helper_atomic_xor_fetchw_be_rh850 +#define helper_atomic_fetch_sminw_be helper_atomic_fetch_sminw_be_rh850 +#define helper_atomic_fetch_uminw_be helper_atomic_fetch_uminw_be_rh850 +#define helper_atomic_fetch_smaxw_be helper_atomic_fetch_smaxw_be_rh850 +#define helper_atomic_fetch_umaxw_be helper_atomic_fetch_umaxw_be_rh850 +#define helper_atomic_smin_fetchw_be helper_atomic_smin_fetchw_be_rh850 +#define helper_atomic_umin_fetchw_be helper_atomic_umin_fetchw_be_rh850 +#define helper_atomic_smax_fetchw_be helper_atomic_smax_fetchw_be_rh850 +#define helper_atomic_umax_fetchw_be helper_atomic_umax_fetchw_be_rh850 +#define helper_atomic_fetch_addw_be helper_atomic_fetch_addw_be_rh850 +#define helper_atomic_add_fetchw_be helper_atomic_add_fetchw_be_rh850 +#define helper_atomic_cmpxchgl_le helper_atomic_cmpxchgl_le_rh850 +#define helper_atomic_xchgl_le helper_atomic_xchgl_le_rh850 +#define helper_atomic_fetch_addl_le helper_atomic_fetch_addl_le_rh850 +#define helper_atomic_fetch_andl_le helper_atomic_fetch_andl_le_rh850 +#define helper_atomic_fetch_orl_le helper_atomic_fetch_orl_le_rh850 +#define helper_atomic_fetch_xorl_le helper_atomic_fetch_xorl_le_rh850 +#define helper_atomic_add_fetchl_le helper_atomic_add_fetchl_le_rh850 +#define helper_atomic_and_fetchl_le helper_atomic_and_fetchl_le_rh850 +#define helper_atomic_or_fetchl_le helper_atomic_or_fetchl_le_rh850 +#define helper_atomic_xor_fetchl_le helper_atomic_xor_fetchl_le_rh850 +#define helper_atomic_fetch_sminl_le helper_atomic_fetch_sminl_le_rh850 +#define helper_atomic_fetch_uminl_le helper_atomic_fetch_uminl_le_rh850 +#define helper_atomic_fetch_smaxl_le helper_atomic_fetch_smaxl_le_rh850 +#define helper_atomic_fetch_umaxl_le helper_atomic_fetch_umaxl_le_rh850 +#define helper_atomic_smin_fetchl_le helper_atomic_smin_fetchl_le_rh850 +#define helper_atomic_umin_fetchl_le helper_atomic_umin_fetchl_le_rh850 +#define helper_atomic_smax_fetchl_le helper_atomic_smax_fetchl_le_rh850 +#define helper_atomic_umax_fetchl_le helper_atomic_umax_fetchl_le_rh850 +#define helper_atomic_cmpxchgl_be helper_atomic_cmpxchgl_be_rh850 +#define helper_atomic_xchgl_be helper_atomic_xchgl_be_rh850 +#define helper_atomic_fetch_andl_be helper_atomic_fetch_andl_be_rh850 +#define helper_atomic_fetch_orl_be helper_atomic_fetch_orl_be_rh850 +#define helper_atomic_fetch_xorl_be helper_atomic_fetch_xorl_be_rh850 +#define helper_atomic_and_fetchl_be helper_atomic_and_fetchl_be_rh850 +#define helper_atomic_or_fetchl_be helper_atomic_or_fetchl_be_rh850 +#define helper_atomic_xor_fetchl_be helper_atomic_xor_fetchl_be_rh850 +#define helper_atomic_fetch_sminl_be helper_atomic_fetch_sminl_be_rh850 +#define helper_atomic_fetch_uminl_be helper_atomic_fetch_uminl_be_rh850 +#define helper_atomic_fetch_smaxl_be helper_atomic_fetch_smaxl_be_rh850 +#define helper_atomic_fetch_umaxl_be helper_atomic_fetch_umaxl_be_rh850 +#define helper_atomic_smin_fetchl_be helper_atomic_smin_fetchl_be_rh850 +#define helper_atomic_umin_fetchl_be helper_atomic_umin_fetchl_be_rh850 +#define helper_atomic_smax_fetchl_be helper_atomic_smax_fetchl_be_rh850 +#define helper_atomic_umax_fetchl_be helper_atomic_umax_fetchl_be_rh850 +#define helper_atomic_fetch_addl_be helper_atomic_fetch_addl_be_rh850 +#define helper_atomic_add_fetchl_be helper_atomic_add_fetchl_be_rh850 +#define helper_atomic_cmpxchgq_le helper_atomic_cmpxchgq_le_rh850 +#define helper_atomic_xchgq_le helper_atomic_xchgq_le_rh850 +#define helper_atomic_fetch_addq_le helper_atomic_fetch_addq_le_rh850 +#define helper_atomic_fetch_andq_le helper_atomic_fetch_andq_le_rh850 +#define helper_atomic_fetch_orq_le helper_atomic_fetch_orq_le_rh850 +#define helper_atomic_fetch_xorq_le helper_atomic_fetch_xorq_le_rh850 +#define helper_atomic_add_fetchq_le helper_atomic_add_fetchq_le_rh850 +#define helper_atomic_and_fetchq_le helper_atomic_and_fetchq_le_rh850 +#define helper_atomic_or_fetchq_le helper_atomic_or_fetchq_le_rh850 +#define helper_atomic_xor_fetchq_le helper_atomic_xor_fetchq_le_rh850 +#define helper_atomic_fetch_sminq_le helper_atomic_fetch_sminq_le_rh850 +#define helper_atomic_fetch_uminq_le helper_atomic_fetch_uminq_le_rh850 +#define helper_atomic_fetch_smaxq_le helper_atomic_fetch_smaxq_le_rh850 +#define helper_atomic_fetch_umaxq_le helper_atomic_fetch_umaxq_le_rh850 +#define helper_atomic_smin_fetchq_le helper_atomic_smin_fetchq_le_rh850 +#define helper_atomic_umin_fetchq_le helper_atomic_umin_fetchq_le_rh850 +#define helper_atomic_smax_fetchq_le helper_atomic_smax_fetchq_le_rh850 +#define helper_atomic_umax_fetchq_le helper_atomic_umax_fetchq_le_rh850 +#define helper_atomic_cmpxchgq_be helper_atomic_cmpxchgq_be_rh850 +#define helper_atomic_xchgq_be helper_atomic_xchgq_be_rh850 +#define helper_atomic_fetch_andq_be helper_atomic_fetch_andq_be_rh850 +#define helper_atomic_fetch_orq_be helper_atomic_fetch_orq_be_rh850 +#define helper_atomic_fetch_xorq_be helper_atomic_fetch_xorq_be_rh850 +#define helper_atomic_and_fetchq_be helper_atomic_and_fetchq_be_rh850 +#define helper_atomic_or_fetchq_be helper_atomic_or_fetchq_be_rh850 +#define helper_atomic_xor_fetchq_be helper_atomic_xor_fetchq_be_rh850 +#define helper_atomic_fetch_sminq_be helper_atomic_fetch_sminq_be_rh850 +#define helper_atomic_fetch_uminq_be helper_atomic_fetch_uminq_be_rh850 +#define helper_atomic_fetch_smaxq_be helper_atomic_fetch_smaxq_be_rh850 +#define helper_atomic_fetch_umaxq_be helper_atomic_fetch_umaxq_be_rh850 +#define helper_atomic_smin_fetchq_be helper_atomic_smin_fetchq_be_rh850 +#define helper_atomic_umin_fetchq_be helper_atomic_umin_fetchq_be_rh850 +#define helper_atomic_smax_fetchq_be helper_atomic_smax_fetchq_be_rh850 +#define helper_atomic_umax_fetchq_be helper_atomic_umax_fetchq_be_rh850 +#define helper_atomic_fetch_addq_be helper_atomic_fetch_addq_be_rh850 +#define helper_atomic_add_fetchq_be helper_atomic_add_fetchq_be_rh850 +#define cpu_ldub_code cpu_ldub_code_rh850 +#define cpu_lduw_code cpu_lduw_code_rh850 +#define cpu_ldl_code cpu_ldl_code_rh850 +#define cpu_ldq_code cpu_ldq_code_rh850 +#define helper_div_i32 helper_div_i32_rh850 +#define helper_rem_i32 helper_rem_i32_rh850 +#define helper_divu_i32 helper_divu_i32_rh850 +#define helper_remu_i32 helper_remu_i32_rh850 +#define helper_shl_i64 helper_shl_i64_rh850 +#define helper_shr_i64 helper_shr_i64_rh850 +#define helper_sar_i64 helper_sar_i64_rh850 +#define helper_div_i64 helper_div_i64_rh850 +#define helper_rem_i64 helper_rem_i64_rh850 +#define helper_divu_i64 helper_divu_i64_rh850 +#define helper_remu_i64 helper_remu_i64_rh850 +#define helper_muluh_i64 helper_muluh_i64_rh850 +#define helper_mulsh_i64 helper_mulsh_i64_rh850 +#define helper_clz_i32 helper_clz_i32_rh850 +#define helper_ctz_i32 helper_ctz_i32_rh850 +#define helper_clz_i64 helper_clz_i64_rh850 +#define helper_ctz_i64 helper_ctz_i64_rh850 +#define helper_clrsb_i32 helper_clrsb_i32_rh850 +#define helper_clrsb_i64 helper_clrsb_i64_rh850 +#define helper_ctpop_i32 helper_ctpop_i32_rh850 +#define helper_ctpop_i64 helper_ctpop_i64_rh850 +#define helper_lookup_tb_ptr helper_lookup_tb_ptr_rh850 +#define helper_exit_atomic helper_exit_atomic_rh850 +#define helper_gvec_add8 helper_gvec_add8_rh850 +#define helper_gvec_add16 helper_gvec_add16_rh850 +#define helper_gvec_add32 helper_gvec_add32_rh850 +#define helper_gvec_add64 helper_gvec_add64_rh850 +#define helper_gvec_adds8 helper_gvec_adds8_rh850 +#define helper_gvec_adds16 helper_gvec_adds16_rh850 +#define helper_gvec_adds32 helper_gvec_adds32_rh850 +#define helper_gvec_adds64 helper_gvec_adds64_rh850 +#define helper_gvec_sub8 helper_gvec_sub8_rh850 +#define helper_gvec_sub16 helper_gvec_sub16_rh850 +#define helper_gvec_sub32 helper_gvec_sub32_rh850 +#define helper_gvec_sub64 helper_gvec_sub64_rh850 +#define helper_gvec_subs8 helper_gvec_subs8_rh850 +#define helper_gvec_subs16 helper_gvec_subs16_rh850 +#define helper_gvec_subs32 helper_gvec_subs32_rh850 +#define helper_gvec_subs64 helper_gvec_subs64_rh850 +#define helper_gvec_mul8 helper_gvec_mul8_rh850 +#define helper_gvec_mul16 helper_gvec_mul16_rh850 +#define helper_gvec_mul32 helper_gvec_mul32_rh850 +#define helper_gvec_mul64 helper_gvec_mul64_rh850 +#define helper_gvec_muls8 helper_gvec_muls8_rh850 +#define helper_gvec_muls16 helper_gvec_muls16_rh850 +#define helper_gvec_muls32 helper_gvec_muls32_rh850 +#define helper_gvec_muls64 helper_gvec_muls64_rh850 +#define helper_gvec_neg8 helper_gvec_neg8_rh850 +#define helper_gvec_neg16 helper_gvec_neg16_rh850 +#define helper_gvec_neg32 helper_gvec_neg32_rh850 +#define helper_gvec_neg64 helper_gvec_neg64_rh850 +#define helper_gvec_abs8 helper_gvec_abs8_rh850 +#define helper_gvec_abs16 helper_gvec_abs16_rh850 +#define helper_gvec_abs32 helper_gvec_abs32_rh850 +#define helper_gvec_abs64 helper_gvec_abs64_rh850 +#define helper_gvec_mov helper_gvec_mov_rh850 +#define helper_gvec_dup64 helper_gvec_dup64_rh850 +#define helper_gvec_dup32 helper_gvec_dup32_rh850 +#define helper_gvec_dup16 helper_gvec_dup16_rh850 +#define helper_gvec_dup8 helper_gvec_dup8_rh850 +#define helper_gvec_not helper_gvec_not_rh850 +#define helper_gvec_and helper_gvec_and_rh850 +#define helper_gvec_or helper_gvec_or_rh850 +#define helper_gvec_xor helper_gvec_xor_rh850 +#define helper_gvec_andc helper_gvec_andc_rh850 +#define helper_gvec_orc helper_gvec_orc_rh850 +#define helper_gvec_nand helper_gvec_nand_rh850 +#define helper_gvec_nor helper_gvec_nor_rh850 +#define helper_gvec_eqv helper_gvec_eqv_rh850 +#define helper_gvec_ands helper_gvec_ands_rh850 +#define helper_gvec_xors helper_gvec_xors_rh850 +#define helper_gvec_ors helper_gvec_ors_rh850 +#define helper_gvec_shl8i helper_gvec_shl8i_rh850 +#define helper_gvec_shl16i helper_gvec_shl16i_rh850 +#define helper_gvec_shl32i helper_gvec_shl32i_rh850 +#define helper_gvec_shl64i helper_gvec_shl64i_rh850 +#define helper_gvec_shr8i helper_gvec_shr8i_rh850 +#define helper_gvec_shr16i helper_gvec_shr16i_rh850 +#define helper_gvec_shr32i helper_gvec_shr32i_rh850 +#define helper_gvec_shr64i helper_gvec_shr64i_rh850 +#define helper_gvec_sar8i helper_gvec_sar8i_rh850 +#define helper_gvec_sar16i helper_gvec_sar16i_rh850 +#define helper_gvec_sar32i helper_gvec_sar32i_rh850 +#define helper_gvec_sar64i helper_gvec_sar64i_rh850 +#define helper_gvec_shl8v helper_gvec_shl8v_rh850 +#define helper_gvec_shl16v helper_gvec_shl16v_rh850 +#define helper_gvec_shl32v helper_gvec_shl32v_rh850 +#define helper_gvec_shl64v helper_gvec_shl64v_rh850 +#define helper_gvec_shr8v helper_gvec_shr8v_rh850 +#define helper_gvec_shr16v helper_gvec_shr16v_rh850 +#define helper_gvec_shr32v helper_gvec_shr32v_rh850 +#define helper_gvec_shr64v helper_gvec_shr64v_rh850 +#define helper_gvec_sar8v helper_gvec_sar8v_rh850 +#define helper_gvec_sar16v helper_gvec_sar16v_rh850 +#define helper_gvec_sar32v helper_gvec_sar32v_rh850 +#define helper_gvec_sar64v helper_gvec_sar64v_rh850 +#define helper_gvec_eq8 helper_gvec_eq8_rh850 +#define helper_gvec_ne8 helper_gvec_ne8_rh850 +#define helper_gvec_lt8 helper_gvec_lt8_rh850 +#define helper_gvec_le8 helper_gvec_le8_rh850 +#define helper_gvec_ltu8 helper_gvec_ltu8_rh850 +#define helper_gvec_leu8 helper_gvec_leu8_rh850 +#define helper_gvec_eq16 helper_gvec_eq16_rh850 +#define helper_gvec_ne16 helper_gvec_ne16_rh850 +#define helper_gvec_lt16 helper_gvec_lt16_rh850 +#define helper_gvec_le16 helper_gvec_le16_rh850 +#define helper_gvec_ltu16 helper_gvec_ltu16_rh850 +#define helper_gvec_leu16 helper_gvec_leu16_rh850 +#define helper_gvec_eq32 helper_gvec_eq32_rh850 +#define helper_gvec_ne32 helper_gvec_ne32_rh850 +#define helper_gvec_lt32 helper_gvec_lt32_rh850 +#define helper_gvec_le32 helper_gvec_le32_rh850 +#define helper_gvec_ltu32 helper_gvec_ltu32_rh850 +#define helper_gvec_leu32 helper_gvec_leu32_rh850 +#define helper_gvec_eq64 helper_gvec_eq64_rh850 +#define helper_gvec_ne64 helper_gvec_ne64_rh850 +#define helper_gvec_lt64 helper_gvec_lt64_rh850 +#define helper_gvec_le64 helper_gvec_le64_rh850 +#define helper_gvec_ltu64 helper_gvec_ltu64_rh850 +#define helper_gvec_leu64 helper_gvec_leu64_rh850 +#define helper_gvec_ssadd8 helper_gvec_ssadd8_rh850 +#define helper_gvec_ssadd16 helper_gvec_ssadd16_rh850 +#define helper_gvec_ssadd32 helper_gvec_ssadd32_rh850 +#define helper_gvec_ssadd64 helper_gvec_ssadd64_rh850 +#define helper_gvec_sssub8 helper_gvec_sssub8_rh850 +#define helper_gvec_sssub16 helper_gvec_sssub16_rh850 +#define helper_gvec_sssub32 helper_gvec_sssub32_rh850 +#define helper_gvec_sssub64 helper_gvec_sssub64_rh850 +#define helper_gvec_usadd8 helper_gvec_usadd8_rh850 +#define helper_gvec_usadd16 helper_gvec_usadd16_rh850 +#define helper_gvec_usadd32 helper_gvec_usadd32_rh850 +#define helper_gvec_usadd64 helper_gvec_usadd64_rh850 +#define helper_gvec_ussub8 helper_gvec_ussub8_rh850 +#define helper_gvec_ussub16 helper_gvec_ussub16_rh850 +#define helper_gvec_ussub32 helper_gvec_ussub32_rh850 +#define helper_gvec_ussub64 helper_gvec_ussub64_rh850 +#define helper_gvec_smin8 helper_gvec_smin8_rh850 +#define helper_gvec_smin16 helper_gvec_smin16_rh850 +#define helper_gvec_smin32 helper_gvec_smin32_rh850 +#define helper_gvec_smin64 helper_gvec_smin64_rh850 +#define helper_gvec_smax8 helper_gvec_smax8_rh850 +#define helper_gvec_smax16 helper_gvec_smax16_rh850 +#define helper_gvec_smax32 helper_gvec_smax32_rh850 +#define helper_gvec_smax64 helper_gvec_smax64_rh850 +#define helper_gvec_umin8 helper_gvec_umin8_rh850 +#define helper_gvec_umin16 helper_gvec_umin16_rh850 +#define helper_gvec_umin32 helper_gvec_umin32_rh850 +#define helper_gvec_umin64 helper_gvec_umin64_rh850 +#define helper_gvec_umax8 helper_gvec_umax8_rh850 +#define helper_gvec_umax16 helper_gvec_umax16_rh850 +#define helper_gvec_umax32 helper_gvec_umax32_rh850 +#define helper_gvec_umax64 helper_gvec_umax64_rh850 +#define helper_gvec_bitsel helper_gvec_bitsel_rh850 +#define cpu_restore_state cpu_restore_state_rh850 +#define page_collection_lock page_collection_lock_rh850 +#define page_collection_unlock page_collection_unlock_rh850 +#define free_code_gen_buffer free_code_gen_buffer_rh850 +#define tcg_exec_init tcg_exec_init_rh850 +#define tb_cleanup tb_cleanup_rh850 +#define tb_flush tb_flush_rh850 +#define tb_phys_invalidate tb_phys_invalidate_rh850 +#define tb_gen_code tb_gen_code_rh850 +#define tb_exec_lock tb_exec_lock_rh850 +#define tb_exec_unlock tb_exec_unlock_rh850 +#define tb_invalidate_phys_page_range tb_invalidate_phys_page_range_rh850 +#define tb_invalidate_phys_range tb_invalidate_phys_range_rh850 +#define tb_invalidate_phys_page_fast tb_invalidate_phys_page_fast_rh850 +#define tb_check_watchpoint tb_check_watchpoint_rh850 +#define cpu_io_recompile cpu_io_recompile_rh850 +#define tb_flush_jmp_cache tb_flush_jmp_cache_rh850 +#define tcg_flush_softmmu_tlb tcg_flush_softmmu_tlb_rh850 +#define translator_loop_temp_check translator_loop_temp_check_rh850 +#define translator_loop translator_loop_rh850 +#define helper_atomic_cmpxchgo_le_mmu helper_atomic_cmpxchgo_le_mmu_rh850 +#define helper_atomic_cmpxchgo_be_mmu helper_atomic_cmpxchgo_be_mmu_rh850 +#define helper_atomic_ldo_le_mmu helper_atomic_ldo_le_mmu_rh850 +#define helper_atomic_ldo_be_mmu helper_atomic_ldo_be_mmu_rh850 +#define helper_atomic_sto_le_mmu helper_atomic_sto_le_mmu_rh850 +#define helper_atomic_sto_be_mmu helper_atomic_sto_be_mmu_rh850 +#define unassigned_mem_ops unassigned_mem_ops_rh850 +#define floatx80_infinity floatx80_infinity_rh850 +#define dup_const_func dup_const_func_rh850 +#define gen_helper_raise_exception gen_helper_raise_exception_rh850 +#define gen_helper_raise_interrupt gen_helper_raise_interrupt_rh850 +#define gen_helper_vfp_get_fpscr gen_helper_vfp_get_fpscr_rh850 +#define gen_helper_vfp_set_fpscr gen_helper_vfp_set_fpscr_rh850 +#define gen_helper_cpsr_read gen_helper_cpsr_read_rh850 +#define gen_helper_cpsr_write gen_helper_cpsr_write_rh850 +#define restore_state_to_opc restore_state_to_opc_rh850 +#define helper_tlb_flush helper_tlb_flush_rh850 +#define helper_uc_rh850_exit helper_uc_rh850_exit_rh850 +#define gen_intermediate_code gen_intermediate_code_rh850 +#endif diff --git a/qemu/target/avr/cpu-param.h b/qemu/target/avr/cpu-param.h new file mode 100644 index 0000000000..7ef4e7c679 --- /dev/null +++ b/qemu/target/avr/cpu-param.h @@ -0,0 +1,36 @@ +/* + * QEMU AVR CPU + * + * Copyright (c) 2016-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#ifndef AVR_CPU_PARAM_H +#define AVR_CPU_PARAM_H + +#define TARGET_LONG_BITS 32 +/* + * TARGET_PAGE_BITS cannot be more than 8 bits because + * 1. all IO registers occupy [0x0000 .. 0x00ff] address range, and they + * should be implemented as a device and not memory + * 2. SRAM starts at the address 0x0100 + */ +#define TARGET_PAGE_BITS 8 +#define TARGET_PHYS_ADDR_SPACE_BITS 24 +#define TARGET_VIRT_ADDR_SPACE_BITS 24 +#define NB_MMU_MODES 2 + +#endif diff --git a/qemu/target/avr/cpu-qom.h b/qemu/target/avr/cpu-qom.h new file mode 100644 index 0000000000..9ba1ea1b37 --- /dev/null +++ b/qemu/target/avr/cpu-qom.h @@ -0,0 +1,56 @@ +/* + * QEMU AVR CPU + * + * Copyright (c) 2016-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#ifndef QEMU_AVR_QOM_H +#define QEMU_AVR_QOM_H + +#include "hw/core/cpu.h" + +typedef void Object; +typedef void ObjectClass; + +typedef void DeviceState; +typedef void (*DeviceRealize)(DeviceState *ds); +typedef void (*DeviceReset)(DeviceState *ds); + +#define TYPE_AVR_CPU "avr-cpu" + +#define AVR_CPU(obj) ((AVRCPU *)obj) +#define AVR_CPU_CLASS(klass) ((AVRCPUClass *)klass) +#define AVR_CPU_GET_CLASS(obj) (&((AVRCPU *)obj)->cc) + +/** + * AVRCPUClass: + * @parent_realize: The parent class' realize handler. + * @parent_reset: The parent class' reset handler. + * @vr: Version Register value. + * + * A AVR CPU model. + */ +typedef struct AVRCPUClass { + /*< private >*/ + CPUClass parent_class; + /*< public >*/ + DeviceRealize parent_realize; + DeviceReset parent_reset; +} AVRCPUClass; + + +#endif /* !defined (QEMU_AVR_CPU_QOM_H) */ diff --git a/qemu/target/avr/cpu.c b/qemu/target/avr/cpu.c new file mode 100644 index 0000000000..c062723814 --- /dev/null +++ b/qemu/target/avr/cpu.c @@ -0,0 +1,459 @@ +/* + * QEMU AVR CPU + * + * Copyright (c) 2019-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#include "qemu/osdep.h" +#include "exec/exec-all.h" +#include "cpu.h" +#include "unicorn_helper.h" + +static void avr_cpu_set_pc(CPUState *cs, vaddr value) +{ + AVRCPU *cpu = AVR_CPU(cs); + + cpu->env.pc_w = value / 2; /* internally PC points to words */ +} + +static bool avr_cpu_has_work(CPUState *cs) +{ + AVRCPU *cpu = AVR_CPU(cs); + CPUAVRState *env = &cpu->env; + + return (cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_RESET)) + && cpu_interrupts_enabled(env); +} + +static void avr_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +{ + AVRCPU *cpu = AVR_CPU(cs); + CPUAVRState *env = &cpu->env; + + env->pc_w = tb->pc / 2; /* internally PC points to words */ +} + +static void avr_cpu_reset(CPUState *cs) +{ + AVRCPU *cpu = AVR_CPU(cs); + AVRCPUClass *mcc = AVR_CPU_GET_CLASS(cpu); + CPUAVRState *env = &cpu->env; + + if (mcc->parent_reset) + mcc->parent_reset(cs); + + env->pc_w = 0; + env->sregI = 1; + env->sregC = 0; + env->sregZ = 0; + env->sregN = 0; + env->sregV = 0; + env->sregS = 0; + env->sregH = 0; + env->sregT = 0; + + env->rampD = 0; + env->rampX = 0; + env->rampY = 0; + env->rampZ = 0; + env->eind = 0; + env->sp = 0; + + env->skip = 0; + + memset(env->r, 0, sizeof(env->r)); +} + +#if 0 +static void avr_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) +{ + info->mach = bfd_arch_avr; + info->print_insn = avr_print_insn; +} +#endif + +static void avr_cpu_realizefn(DeviceState *dev) +{ + CPUState *cs = CPU(dev); + AVRCPUClass *mcc = AVR_CPU_GET_CLASS(dev); + + cpu_exec_realizefn(cs); + qemu_init_vcpu(cs); + cpu_reset(cs); + + if (mcc->parent_realize) + mcc->parent_realize(dev); +} + +#if 0 +static void avr_cpu_set_int(void *opaque, int irq, int level) +{ + AVRCPU *cpu = opaque; + CPUAVRState *env = &cpu->env; + CPUState *cs = CPU(cpu); + uint64_t mask = (1ull << irq); + + if (level) { + env->intsrc |= mask; + cpu_interrupt(cs, CPU_INTERRUPT_HARD); + } else { + env->intsrc &= ~mask; + if (env->intsrc == 0) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); + } + } +} +#endif + +static void avr_cpu_initfn(Object *obj, struct uc_struct *uc) +{ + AVRCPU *cpu = AVR_CPU(obj); + CPUAVRState *const env = &cpu->env; + + env->uc = uc; + cpu_set_cpustate_pointers(cpu); + +#if 0 + /* Set the number of interrupts supported by the CPU. */ + qdev_init_gpio_in(DEVICE(cpu), avr_cpu_set_int, + sizeof(cpu->env.intsrc) * 8); +#endif +} + +#if 0 +static ObjectClass *avr_cpu_class_by_name(const char *cpu_model) +{ + ObjectClass *oc; + + oc = object_class_by_name(cpu_model); + if (object_class_dynamic_cast(oc, TYPE_AVR_CPU) == NULL || + object_class_is_abstract(oc)) { + oc = NULL; + } + return oc; +} +#endif + +#if 0 +static void avr_cpu_dump_state(CPUState *cs, FILE *f, int flags) +{ + AVRCPU *cpu = AVR_CPU(cs); + CPUAVRState *env = &cpu->env; + int i; + + qemu_fprintf(f, "\n"); + qemu_fprintf(f, "PC: %06x\n", env->pc_w * 2); /* PC points to words */ + qemu_fprintf(f, "SP: %04x\n", env->sp); + qemu_fprintf(f, "rampD: %02x\n", env->rampD >> 16); + qemu_fprintf(f, "rampX: %02x\n", env->rampX >> 16); + qemu_fprintf(f, "rampY: %02x\n", env->rampY >> 16); + qemu_fprintf(f, "rampZ: %02x\n", env->rampZ >> 16); + qemu_fprintf(f, "EIND: %02x\n", env->eind >> 16); + qemu_fprintf(f, "X: %02x%02x\n", env->r[27], env->r[26]); + qemu_fprintf(f, "Y: %02x%02x\n", env->r[29], env->r[28]); + qemu_fprintf(f, "Z: %02x%02x\n", env->r[31], env->r[30]); + qemu_fprintf(f, "SREG: [ %c %c %c %c %c %c %c %c ]\n", + env->sregI ? 'I' : '-', + env->sregT ? 'T' : '-', + env->sregH ? 'H' : '-', + env->sregS ? 'S' : '-', + env->sregV ? 'V' : '-', + env->sregN ? '-' : 'N', /* Zf has negative logic */ + env->sregZ ? 'Z' : '-', + env->sregC ? 'I' : '-'); + qemu_fprintf(f, "SKIP: %02x\n", env->skip); + + qemu_fprintf(f, "\n"); + for (i = 0; i < ARRAY_SIZE(env->r); i++) { + qemu_fprintf(f, "R[%02d]: %02x ", i, env->r[i]); + + if ((i % 8) == 7) { + qemu_fprintf(f, "\n"); + } + } + qemu_fprintf(f, "\n"); +} +#endif + +static void avr_cpu_class_init(ObjectClass *oc, void *data) +{ + CPUClass *cc = CPU_CLASS(oc); + AVRCPUClass *mcc = AVR_CPU_CLASS(oc); + + mcc->parent_realize = NULL; + mcc->parent_reset = NULL; + +#if 0 + cc->class_by_name = avr_cpu_class_by_name; +#endif + + cc->reset = avr_cpu_reset; + cc->has_work = avr_cpu_has_work; + cc->do_interrupt = avr_cpu_do_interrupt; + cc->cpu_exec_interrupt = avr_cpu_exec_interrupt; +#if 0 + cc->dump_state = avr_cpu_dump_state; +#endif + cc->set_pc = avr_cpu_set_pc; +#if 0 + cc->memory_rw_debug = avr_cpu_memory_rw_debug; +#endif + cc->get_phys_page_debug = avr_cpu_get_phys_page_debug; + cc->tlb_fill = avr_cpu_tlb_fill; +#if 0 + cc->vmsd = &vms_avr_cpu; + cc->disas_set_info = avr_cpu_disas_set_info; +#endif + cc->tcg_initialize = avr_cpu_tcg_init; + cc->synchronize_from_tb = avr_cpu_synchronize_from_tb; +#if 0 + cc->gdb_read_register = avr_cpu_gdb_read_register; + cc->gdb_write_register = avr_cpu_gdb_write_register; + cc->gdb_num_core_regs = 35; + cc->gdb_core_xml_file = "avr-cpu.xml"; +#endif +} + +/* + * Setting features of AVR core type avr5 + * -------------------------------------- + * + * This type of AVR core is present in the following AVR MCUs: + * + * ata5702m322, ata5782, ata5790, ata5790n, ata5791, ata5795, ata5831, ata6613c, + * ata6614q, ata8210, ata8510, atmega16, atmega16a, atmega161, atmega162, + * atmega163, atmega164a, atmega164p, atmega164pa, atmega165, atmega165a, + * atmega165p, atmega165pa, atmega168, atmega168a, atmega168p, atmega168pa, + * atmega168pb, atmega169, atmega169a, atmega169p, atmega169pa, atmega16hvb, + * atmega16hvbrevb, atmega16m1, atmega16u4, atmega32a, atmega32, atmega323, + * atmega324a, atmega324p, atmega324pa, atmega325, atmega325a, atmega325p, + * atmega325pa, atmega3250, atmega3250a, atmega3250p, atmega3250pa, atmega328, + * atmega328p, atmega328pb, atmega329, atmega329a, atmega329p, atmega329pa, + * atmega3290, atmega3290a, atmega3290p, atmega3290pa, atmega32c1, atmega32m1, + * atmega32u4, atmega32u6, atmega406, atmega64, atmega64a, atmega640, atmega644, + * atmega644a, atmega644p, atmega644pa, atmega645, atmega645a, atmega645p, + * atmega6450, atmega6450a, atmega6450p, atmega649, atmega649a, atmega649p, + * atmega6490, atmega16hva, atmega16hva2, atmega32hvb, atmega6490a, atmega6490p, + * atmega64c1, atmega64m1, atmega64hve, atmega64hve2, atmega64rfr2, + * atmega644rfr2, atmega32hvbrevb, at90can32, at90can64, at90pwm161, at90pwm216, + * at90pwm316, at90scr100, at90usb646, at90usb647, at94k, m3000 + */ +static void avr_avr5_initfn(Object *obj) +{ + AVRCPU *cpu = AVR_CPU(obj); + CPUAVRState *env = &cpu->env; + + set_avr_feature(env, AVR_FEATURE_LPM); + set_avr_feature(env, AVR_FEATURE_IJMP_ICALL); + set_avr_feature(env, AVR_FEATURE_ADIW_SBIW); + set_avr_feature(env, AVR_FEATURE_SRAM); + set_avr_feature(env, AVR_FEATURE_BREAK); + + set_avr_feature(env, AVR_FEATURE_2_BYTE_PC); + set_avr_feature(env, AVR_FEATURE_2_BYTE_SP); + set_avr_feature(env, AVR_FEATURE_JMP_CALL); + set_avr_feature(env, AVR_FEATURE_LPMX); + set_avr_feature(env, AVR_FEATURE_MOVW); + set_avr_feature(env, AVR_FEATURE_MUL); +} + +/* + * Setting features of AVR core type avr51 + * -------------------------------------- + * + * This type of AVR core is present in the following AVR MCUs: + * + * atmega128, atmega128a, atmega1280, atmega1281, atmega1284, atmega1284p, + * atmega128rfa1, atmega128rfr2, atmega1284rfr2, at90can128, at90usb1286, + * at90usb1287 + */ +static void avr_avr51_initfn(Object *obj) +{ + AVRCPU *cpu = AVR_CPU(obj); + CPUAVRState *env = &cpu->env; + + set_avr_feature(env, AVR_FEATURE_LPM); + set_avr_feature(env, AVR_FEATURE_IJMP_ICALL); + set_avr_feature(env, AVR_FEATURE_ADIW_SBIW); + set_avr_feature(env, AVR_FEATURE_SRAM); + set_avr_feature(env, AVR_FEATURE_BREAK); + + set_avr_feature(env, AVR_FEATURE_2_BYTE_PC); + set_avr_feature(env, AVR_FEATURE_2_BYTE_SP); + set_avr_feature(env, AVR_FEATURE_RAMPZ); + set_avr_feature(env, AVR_FEATURE_ELPMX); + set_avr_feature(env, AVR_FEATURE_ELPM); + set_avr_feature(env, AVR_FEATURE_JMP_CALL); + set_avr_feature(env, AVR_FEATURE_LPMX); + set_avr_feature(env, AVR_FEATURE_MOVW); + set_avr_feature(env, AVR_FEATURE_MUL); +} + +/* + * Setting features of AVR core type avr6 + * -------------------------------------- + * + * This type of AVR core is present in the following AVR MCUs: + * + * atmega2560, atmega2561, atmega256rfr2, atmega2564rfr2 + */ +static void avr_avr6_initfn(Object *obj) +{ + AVRCPU *cpu = AVR_CPU(obj); + CPUAVRState *env = &cpu->env; + + set_avr_feature(env, AVR_FEATURE_LPM); + set_avr_feature(env, AVR_FEATURE_IJMP_ICALL); + set_avr_feature(env, AVR_FEATURE_ADIW_SBIW); + set_avr_feature(env, AVR_FEATURE_SRAM); + set_avr_feature(env, AVR_FEATURE_BREAK); + + set_avr_feature(env, AVR_FEATURE_3_BYTE_PC); + set_avr_feature(env, AVR_FEATURE_2_BYTE_SP); + set_avr_feature(env, AVR_FEATURE_RAMPZ); + set_avr_feature(env, AVR_FEATURE_EIJMP_EICALL); + set_avr_feature(env, AVR_FEATURE_ELPMX); + set_avr_feature(env, AVR_FEATURE_ELPM); + set_avr_feature(env, AVR_FEATURE_JMP_CALL); + set_avr_feature(env, AVR_FEATURE_LPMX); + set_avr_feature(env, AVR_FEATURE_MOVW); + set_avr_feature(env, AVR_FEATURE_MUL); +} + +typedef struct AVRCPUInfo { + int model; + const char *name; + void (*initfn)(Object *obj); +} AVRCPUInfo; + +static const AVRCPUInfo avr_cpu_info[] ={ + {UC_CPU_AVR_ATMEGA16, "arch:avr5", avr_avr5_initfn}, + {UC_CPU_AVR_ATMEGA16, "atmega16", avr_avr5_initfn}, + {UC_CPU_AVR_ATMEGA32, "atmega32", avr_avr5_initfn}, + {UC_CPU_AVR_ATMEGA64, "atmega64", avr_avr5_initfn}, + + {UC_CPU_AVR_ATMEGA128, "arch:avr51", avr_avr51_initfn}, + {UC_CPU_AVR_ATMEGA128, "atmega128", avr_avr51_initfn}, + {UC_CPU_AVR_ATMEGA128RFR2, "atmega128rfr2", avr_avr51_initfn}, + {UC_CPU_AVR_ATMEGA1280, "atmega1280", avr_avr51_initfn}, + + {UC_CPU_AVR_ATMEGA256, "arch:avr6", avr_avr6_initfn}, + {UC_CPU_AVR_ATMEGA256RFR2, "atmega256rfr2", avr_avr6_initfn}, + {UC_CPU_AVR_ATMEGA2560, "atmega2560", avr_avr6_initfn}, +}; + +static const AVRCPUInfo *avr_cpu_info_get(int cpu_model) +{ + for (int i = 0; i < ARRAY_SIZE(avr_cpu_info); i++) { + const AVRCPUInfo *const cip = &avr_cpu_info[i]; + if (cpu_model == cip->model) + return cip; + } + return NULL; +} + +DEFAULT_VISIBILITY +int avr_cpu_model_valid(int cpu_model) +{ + return avr_cpu_info_get(cpu_model) != NULL; +} + +#if 0 +static void avr_cpu_list_entry(gpointer data, gpointer user_data) +{ + const char *typename = object_class_get_name(OBJECT_CLASS(data)); + + qemu_printf("%s\n", typename); +} + +void avr_cpu_list(void) +{ + GSList *list; + list = object_class_get_list_sorted(TYPE_AVR_CPU, false); + g_slist_foreach(list, avr_cpu_list_entry, NULL); + g_slist_free(list); +} + +#define DEFINE_AVR_CPU_TYPE(model, initfn) \ + { \ + .parent = TYPE_AVR_CPU, \ + .instance_init = initfn, \ + .name = AVR_CPU_TYPE_NAME(model), \ + } + +static const TypeInfo avr_cpu_type_info[] = { + { + .name = TYPE_AVR_CPU, + .parent = TYPE_CPU, + .instance_size = sizeof(AVRCPU), + .instance_init = avr_cpu_initfn, + .class_size = sizeof(AVRCPUClass), + .class_init = avr_cpu_class_init, + .abstract = true, + }, + DEFINE_AVR_CPU_TYPE("avr5", avr_avr5_initfn), + DEFINE_AVR_CPU_TYPE("avr51", avr_avr51_initfn), + DEFINE_AVR_CPU_TYPE("avr6", avr_avr6_initfn), +}; + +DEFINE_TYPES(avr_cpu_type_info) +#endif + +AVRCPU *cpu_avr_init(struct uc_struct *uc) +{ + AVRCPU *cpu; + CPUState *cs; + CPUClass *cc; + ObjectClass *oc; + + cpu = qemu_memalign(8, sizeof(*cpu)); + if (cpu == NULL) { + return NULL; + } + memset((void *)cpu, 0, sizeof(*cpu)); + + if (uc->cpu_model == INT_MAX) + uc->cpu_model = UC_CPU_AVR_ATMEGA128; + const AVRCPUInfo *const cip = avr_cpu_info_get(uc->cpu_model); + if (!cip) { + qemu_vfree(cpu); + return NULL; + } + + cs = &cpu->parent_obj; + cc = &AVR_CPU_GET_CLASS(cpu)->parent_class; + oc = (ObjectClass *)cc; + cs->cc = cc; + cs->uc = uc; + uc->cpu = cs; + + cpu_class_init(uc, cc); + avr_cpu_class_init(oc, NULL); + + cpu_common_initfn(uc, cs); + avr_cpu_initfn(cs, uc); + cip->initfn(cs); + + avr_cpu_realizefn(cs); + + // init address space + cpu_address_space_init(cs, 0, cs->memory); + + qemu_init_vcpu(cs); + + return cpu; +} diff --git a/qemu/target/avr/cpu.h b/qemu/target/avr/cpu.h new file mode 100644 index 0000000000..f7781c7ffe --- /dev/null +++ b/qemu/target/avr/cpu.h @@ -0,0 +1,274 @@ +/* + * QEMU AVR CPU + * + * Copyright (c) 2016-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#ifndef QEMU_AVR_CPU_H +#define QEMU_AVR_CPU_H + +#include "cpu-qom.h" +#include "exec/cpu-defs.h" + +#ifdef CONFIG_USER_ONLY +#error "AVR 8-bit does not support user mode" +#endif + +#define AVR_CPU_TYPE_SUFFIX "-" TYPE_AVR_CPU +#define AVR_CPU_TYPE_NAME(name) (name AVR_CPU_TYPE_SUFFIX) +#define CPU_RESOLVING_TYPE TYPE_AVR_CPU + +#define TCG_GUEST_DEFAULT_MO 0 + +/* + * AVR has two memory spaces, data & code. + * e.g. both have 0 address + * ST/LD instructions access data space + * LPM/SPM and instruction fetching access code memory space + */ +#define MMU_CODE_IDX 0 +#define MMU_DATA_IDX 1 + +#define EXCP_RESET 1 +#define EXCP_INT(n) (EXCP_RESET + (n) + 1) + +/* Number of CPU registers */ +#define NUMBER_OF_CPU_REGISTERS 32 +/* Number of IO registers accessible by ld/st/in/out */ +#define NUMBER_OF_IO_REGISTERS 64 + +/* + * Offsets of AVR memory regions in host memory space. + * + * This is needed because the AVR has separate code and data address + * spaces that both have start from zero but have to go somewhere in + * host memory. + * + * It's also useful to know where some things are, like the IO registers. + */ +#if 1 +// Unicorn: +#define OFFSET_CODE 0x08000000 /* UC_AVR_MEM_FLASH */ +#define OFFSET_DATA 0x00000000 +#else +/* Flash program memory */ +#define OFFSET_CODE 0x00000000 +/* CPU registers, IO registers, and SRAM */ +#define OFFSET_DATA 0x00800000 +#endif +/* CPU registers specifically, these are mapped at the start of data */ +#define OFFSET_CPU_REGISTERS OFFSET_DATA +/* + * IO registers, including status register, stack pointer, and memory + * mapped peripherals, mapped just after CPU registers + */ +#define OFFSET_IO_REGISTERS (OFFSET_DATA + NUMBER_OF_CPU_REGISTERS) + +typedef enum AVRFeature { + AVR_FEATURE_SRAM, + + AVR_FEATURE_1_BYTE_PC, + AVR_FEATURE_2_BYTE_PC, + AVR_FEATURE_3_BYTE_PC, + + AVR_FEATURE_1_BYTE_SP, + AVR_FEATURE_2_BYTE_SP, + + AVR_FEATURE_BREAK, + AVR_FEATURE_DES, + AVR_FEATURE_RMW, /* Read Modify Write - XCH LAC LAS LAT */ + + AVR_FEATURE_EIJMP_EICALL, + AVR_FEATURE_IJMP_ICALL, + AVR_FEATURE_JMP_CALL, + + AVR_FEATURE_ADIW_SBIW, + + AVR_FEATURE_SPM, + AVR_FEATURE_SPMX, + + AVR_FEATURE_ELPMX, + AVR_FEATURE_ELPM, + AVR_FEATURE_LPMX, + AVR_FEATURE_LPM, + + AVR_FEATURE_MOVW, + AVR_FEATURE_MUL, + AVR_FEATURE_RAMPD, + AVR_FEATURE_RAMPX, + AVR_FEATURE_RAMPY, + AVR_FEATURE_RAMPZ, + + AVR_FEATURE_FLASH, /* Unicorn: was Flash program memory mapped? */ +} AVRFeature; + +typedef struct CPUAVRState CPUAVRState; + +struct CPUAVRState { + uint32_t pc_w; /* 0x003fffff up to 22 bits */ + + uint32_t sregC; /* 0x00000001 1 bit */ + uint32_t sregZ; /* 0x00000001 1 bit */ + uint32_t sregN; /* 0x00000001 1 bit */ + uint32_t sregV; /* 0x00000001 1 bit */ + uint32_t sregS; /* 0x00000001 1 bit */ + uint32_t sregH; /* 0x00000001 1 bit */ + uint32_t sregT; /* 0x00000001 1 bit */ + uint32_t sregI; /* 0x00000001 1 bit */ + + uint32_t rampD; /* 0x00ff0000 8 bits */ + uint32_t rampX; /* 0x00ff0000 8 bits */ + uint32_t rampY; /* 0x00ff0000 8 bits */ + uint32_t rampZ; /* 0x00ff0000 8 bits */ + uint32_t eind; /* 0x00ff0000 8 bits */ + + uint32_t r[NUMBER_OF_CPU_REGISTERS]; /* 8 bits each */ + uint32_t sp; /* 16 bits */ + + uint32_t skip; /* if set skip instruction */ + + uint64_t intsrc; /* interrupt sources */ + bool fullacc; /* CPU/MEM if true MEM only otherwise */ + + uint64_t features; + + // Unicorn engine + struct uc_struct *uc; +}; + +/** + * AVRCPU: + * @env: #CPUAVRState + * + * A AVR CPU. + */ +typedef struct AVRCPU { + /*< private >*/ + CPUState parent_obj; + /*< public >*/ + + CPUNegativeOffsetState neg; + CPUAVRState env; + + AVRCPUClass cc; +} AVRCPU; + +extern const struct VMStateDescription vms_avr_cpu; + +void avr_cpu_do_interrupt(CPUState *cpu); +bool avr_cpu_exec_interrupt(CPUState *cpu, int int_req); +hwaddr avr_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); +int avr_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); +int avr_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); + +static inline int avr_feature(CPUAVRState *env, AVRFeature feature) +{ + return (env->features & (1U << feature)) != 0; +} + +static inline void set_avr_feature(CPUAVRState *env, int feature) +{ + env->features |= (1U << feature); +} + +#define cpu_list avr_cpu_list +#define cpu_signal_handler cpu_avr_signal_handler +#define cpu_mmu_index avr_cpu_mmu_index + +static inline int avr_cpu_mmu_index(CPUAVRState *env, bool ifetch) +{ + return ifetch ? MMU_CODE_IDX : MMU_DATA_IDX; +} + +static inline uint32_t avr_code_base(CPUAVRState *env) +{ + return OFFSET_CODE && avr_feature(env, AVR_FEATURE_FLASH) ? + OFFSET_CODE : 0; +} + +void avr_cpu_tcg_init(struct uc_struct *uc); + +void avr_cpu_list(void); +int cpu_avr_exec(CPUState *cpu); +int cpu_avr_signal_handler(int host_signum, void *pinfo, void *puc); +int avr_cpu_memory_rw_debug(CPUState *cs, vaddr address, uint8_t *buf, + int len, bool is_write); + +enum { + TB_FLAGS_FULL_ACCESS = 1, + TB_FLAGS_SKIP = 2, +}; + +static inline void cpu_get_tb_cpu_state(CPUAVRState *env, target_ulong *pc, + target_ulong *cs_base, uint32_t *pflags) +{ + uint32_t flags = 0; + + *pc = env->pc_w * 2; + *cs_base = 0; + + if (env->fullacc) { + flags |= TB_FLAGS_FULL_ACCESS; + } + if (env->skip) { + flags |= TB_FLAGS_SKIP; + } + + *pflags = flags; +} + +static inline int cpu_interrupts_enabled(CPUAVRState *env) +{ + return env->sregI != 0; +} + +static inline uint8_t cpu_get_sreg(CPUAVRState *env) +{ + uint8_t sreg; + sreg = (env->sregC) << 0 + | (env->sregZ) << 1 + | (env->sregN) << 2 + | (env->sregV) << 3 + | (env->sregS) << 4 + | (env->sregH) << 5 + | (env->sregT) << 6 + | (env->sregI) << 7; + return sreg; +} + +static inline void cpu_set_sreg(CPUAVRState *env, uint8_t sreg) +{ + env->sregC = (sreg >> 0) & 0x01; + env->sregZ = (sreg >> 1) & 0x01; + env->sregN = (sreg >> 2) & 0x01; + env->sregV = (sreg >> 3) & 0x01; + env->sregS = (sreg >> 4) & 0x01; + env->sregH = (sreg >> 5) & 0x01; + env->sregT = (sreg >> 6) & 0x01; + env->sregI = (sreg >> 7) & 0x01; +} + +bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + +typedef CPUAVRState CPUArchState; +typedef AVRCPU ArchCPU; + +#include "exec/cpu-all.h" + +#endif /* !defined (QEMU_AVR_CPU_H) */ diff --git a/qemu/target/avr/decode-insn.c.inc b/qemu/target/avr/decode-insn.c.inc new file mode 100644 index 0000000000..0e96565474 --- /dev/null +++ b/qemu/target/avr/decode-insn.c.inc @@ -0,0 +1,1097 @@ +/* This file is autogenerated by scripts/decodetree.py. */ + +typedef struct { + int bit; + int rd; +} arg_decode_insn10; + +typedef struct { + int rd; +} arg_decode_insn2; + +typedef struct { + int imm; +} arg_decode_insn3; + +typedef struct { + int bit; +} arg_decode_insn4; + +typedef struct { + int bit; + int imm; +} arg_decode_insn5; + +typedef struct { + int noarg_; +} arg_decode_insn6; + +typedef struct { + int bit; + int rr; +} arg_decode_insn7; + +typedef struct { + int bit; + int reg; +} arg_decode_insn8; + +typedef struct { + int rr; +} arg_decode_insn9; + +typedef struct { + int imm; + int rd; +} arg_rd_imm; + +typedef struct { + int rd; + int rr; +} arg_rd_rr; + +typedef arg_rd_rr arg_ADD; +static bool trans_ADD(DisasContext *ctx, arg_ADD *a); +typedef arg_rd_rr arg_ADC; +static bool trans_ADC(DisasContext *ctx, arg_ADC *a); +typedef arg_rd_imm arg_ADIW; +static bool trans_ADIW(DisasContext *ctx, arg_ADIW *a); +typedef arg_rd_rr arg_SUB; +static bool trans_SUB(DisasContext *ctx, arg_SUB *a); +typedef arg_rd_imm arg_SUBI; +static bool trans_SUBI(DisasContext *ctx, arg_SUBI *a); +typedef arg_rd_rr arg_SBC; +static bool trans_SBC(DisasContext *ctx, arg_SBC *a); +typedef arg_rd_imm arg_SBCI; +static bool trans_SBCI(DisasContext *ctx, arg_SBCI *a); +typedef arg_rd_imm arg_SBIW; +static bool trans_SBIW(DisasContext *ctx, arg_SBIW *a); +typedef arg_rd_rr arg_AND; +static bool trans_AND(DisasContext *ctx, arg_AND *a); +typedef arg_rd_imm arg_ANDI; +static bool trans_ANDI(DisasContext *ctx, arg_ANDI *a); +typedef arg_rd_rr arg_OR; +static bool trans_OR(DisasContext *ctx, arg_OR *a); +typedef arg_rd_imm arg_ORI; +static bool trans_ORI(DisasContext *ctx, arg_ORI *a); +typedef arg_rd_rr arg_EOR; +static bool trans_EOR(DisasContext *ctx, arg_EOR *a); +typedef arg_decode_insn2 arg_COM; +static bool trans_COM(DisasContext *ctx, arg_COM *a); +typedef arg_decode_insn2 arg_NEG; +static bool trans_NEG(DisasContext *ctx, arg_NEG *a); +typedef arg_decode_insn2 arg_INC; +static bool trans_INC(DisasContext *ctx, arg_INC *a); +typedef arg_decode_insn2 arg_DEC; +static bool trans_DEC(DisasContext *ctx, arg_DEC *a); +typedef arg_rd_rr arg_MUL; +static bool trans_MUL(DisasContext *ctx, arg_MUL *a); +typedef arg_rd_rr arg_MULS; +static bool trans_MULS(DisasContext *ctx, arg_MULS *a); +typedef arg_rd_rr arg_MULSU; +static bool trans_MULSU(DisasContext *ctx, arg_MULSU *a); +typedef arg_rd_rr arg_FMUL; +static bool trans_FMUL(DisasContext *ctx, arg_FMUL *a); +typedef arg_rd_rr arg_FMULS; +static bool trans_FMULS(DisasContext *ctx, arg_FMULS *a); +typedef arg_rd_rr arg_FMULSU; +static bool trans_FMULSU(DisasContext *ctx, arg_FMULSU *a); +typedef arg_decode_insn3 arg_DES; +static bool trans_DES(DisasContext *ctx, arg_DES *a); +typedef arg_decode_insn3 arg_RJMP; +static bool trans_RJMP(DisasContext *ctx, arg_RJMP *a); +typedef arg_decode_insn6 arg_IJMP; +static bool trans_IJMP(DisasContext *ctx, arg_IJMP *a); +typedef arg_decode_insn6 arg_EIJMP; +static bool trans_EIJMP(DisasContext *ctx, arg_EIJMP *a); +typedef arg_decode_insn3 arg_JMP; +static bool trans_JMP(DisasContext *ctx, arg_JMP *a); +typedef arg_decode_insn3 arg_RCALL; +static bool trans_RCALL(DisasContext *ctx, arg_RCALL *a); +typedef arg_decode_insn6 arg_ICALL; +static bool trans_ICALL(DisasContext *ctx, arg_ICALL *a); +typedef arg_decode_insn6 arg_EICALL; +static bool trans_EICALL(DisasContext *ctx, arg_EICALL *a); +typedef arg_decode_insn3 arg_CALL; +static bool trans_CALL(DisasContext *ctx, arg_CALL *a); +typedef arg_decode_insn6 arg_RET; +static bool trans_RET(DisasContext *ctx, arg_RET *a); +typedef arg_decode_insn6 arg_RETI; +static bool trans_RETI(DisasContext *ctx, arg_RETI *a); +typedef arg_rd_rr arg_CPSE; +static bool trans_CPSE(DisasContext *ctx, arg_CPSE *a); +typedef arg_rd_rr arg_CP; +static bool trans_CP(DisasContext *ctx, arg_CP *a); +typedef arg_rd_rr arg_CPC; +static bool trans_CPC(DisasContext *ctx, arg_CPC *a); +typedef arg_rd_imm arg_CPI; +static bool trans_CPI(DisasContext *ctx, arg_CPI *a); +typedef arg_decode_insn7 arg_SBRC; +static bool trans_SBRC(DisasContext *ctx, arg_SBRC *a); +typedef arg_decode_insn7 arg_SBRS; +static bool trans_SBRS(DisasContext *ctx, arg_SBRS *a); +typedef arg_decode_insn8 arg_SBIC; +static bool trans_SBIC(DisasContext *ctx, arg_SBIC *a); +typedef arg_decode_insn8 arg_SBIS; +static bool trans_SBIS(DisasContext *ctx, arg_SBIS *a); +typedef arg_decode_insn5 arg_BRBS; +static bool trans_BRBS(DisasContext *ctx, arg_BRBS *a); +typedef arg_decode_insn5 arg_BRBC; +static bool trans_BRBC(DisasContext *ctx, arg_BRBC *a); +typedef arg_rd_rr arg_MOV; +static bool trans_MOV(DisasContext *ctx, arg_MOV *a); +typedef arg_rd_rr arg_MOVW; +static bool trans_MOVW(DisasContext *ctx, arg_MOVW *a); +typedef arg_rd_imm arg_LDI; +static bool trans_LDI(DisasContext *ctx, arg_LDI *a); +typedef arg_rd_imm arg_LDS; +static bool trans_LDS(DisasContext *ctx, arg_LDS *a); +typedef arg_decode_insn2 arg_LDX1; +static bool trans_LDX1(DisasContext *ctx, arg_LDX1 *a); +typedef arg_decode_insn2 arg_LDX2; +static bool trans_LDX2(DisasContext *ctx, arg_LDX2 *a); +typedef arg_decode_insn2 arg_LDX3; +static bool trans_LDX3(DisasContext *ctx, arg_LDX3 *a); +typedef arg_decode_insn2 arg_LDY2; +static bool trans_LDY2(DisasContext *ctx, arg_LDY2 *a); +typedef arg_decode_insn2 arg_LDY3; +static bool trans_LDY3(DisasContext *ctx, arg_LDY3 *a); +typedef arg_decode_insn2 arg_LDZ2; +static bool trans_LDZ2(DisasContext *ctx, arg_LDZ2 *a); +typedef arg_decode_insn2 arg_LDZ3; +static bool trans_LDZ3(DisasContext *ctx, arg_LDZ3 *a); +typedef arg_rd_imm arg_LDDY; +static bool trans_LDDY(DisasContext *ctx, arg_LDDY *a); +typedef arg_rd_imm arg_LDDZ; +static bool trans_LDDZ(DisasContext *ctx, arg_LDDZ *a); +typedef arg_rd_imm arg_STS; +static bool trans_STS(DisasContext *ctx, arg_STS *a); +typedef arg_decode_insn9 arg_STX1; +static bool trans_STX1(DisasContext *ctx, arg_STX1 *a); +typedef arg_decode_insn9 arg_STX2; +static bool trans_STX2(DisasContext *ctx, arg_STX2 *a); +typedef arg_decode_insn9 arg_STX3; +static bool trans_STX3(DisasContext *ctx, arg_STX3 *a); +typedef arg_decode_insn2 arg_STY2; +static bool trans_STY2(DisasContext *ctx, arg_STY2 *a); +typedef arg_decode_insn2 arg_STY3; +static bool trans_STY3(DisasContext *ctx, arg_STY3 *a); +typedef arg_decode_insn2 arg_STZ2; +static bool trans_STZ2(DisasContext *ctx, arg_STZ2 *a); +typedef arg_decode_insn2 arg_STZ3; +static bool trans_STZ3(DisasContext *ctx, arg_STZ3 *a); +typedef arg_rd_imm arg_STDY; +static bool trans_STDY(DisasContext *ctx, arg_STDY *a); +typedef arg_rd_imm arg_STDZ; +static bool trans_STDZ(DisasContext *ctx, arg_STDZ *a); +typedef arg_decode_insn6 arg_LPM1; +static bool trans_LPM1(DisasContext *ctx, arg_LPM1 *a); +typedef arg_decode_insn2 arg_LPM2; +static bool trans_LPM2(DisasContext *ctx, arg_LPM2 *a); +typedef arg_decode_insn2 arg_LPMX; +static bool trans_LPMX(DisasContext *ctx, arg_LPMX *a); +typedef arg_decode_insn6 arg_ELPM1; +static bool trans_ELPM1(DisasContext *ctx, arg_ELPM1 *a); +typedef arg_decode_insn2 arg_ELPM2; +static bool trans_ELPM2(DisasContext *ctx, arg_ELPM2 *a); +typedef arg_decode_insn2 arg_ELPMX; +static bool trans_ELPMX(DisasContext *ctx, arg_ELPMX *a); +typedef arg_decode_insn6 arg_SPM; +static bool trans_SPM(DisasContext *ctx, arg_SPM *a); +typedef arg_decode_insn6 arg_SPMX; +static bool trans_SPMX(DisasContext *ctx, arg_SPMX *a); +typedef arg_rd_imm arg_IN; +static bool trans_IN(DisasContext *ctx, arg_IN *a); +typedef arg_rd_imm arg_OUT; +static bool trans_OUT(DisasContext *ctx, arg_OUT *a); +typedef arg_decode_insn2 arg_PUSH; +static bool trans_PUSH(DisasContext *ctx, arg_PUSH *a); +typedef arg_decode_insn2 arg_POP; +static bool trans_POP(DisasContext *ctx, arg_POP *a); +typedef arg_decode_insn2 arg_XCH; +static bool trans_XCH(DisasContext *ctx, arg_XCH *a); +typedef arg_decode_insn2 arg_LAC; +static bool trans_LAC(DisasContext *ctx, arg_LAC *a); +typedef arg_decode_insn2 arg_LAS; +static bool trans_LAS(DisasContext *ctx, arg_LAS *a); +typedef arg_decode_insn2 arg_LAT; +static bool trans_LAT(DisasContext *ctx, arg_LAT *a); +typedef arg_decode_insn2 arg_LSR; +static bool trans_LSR(DisasContext *ctx, arg_LSR *a); +typedef arg_decode_insn2 arg_ROR; +static bool trans_ROR(DisasContext *ctx, arg_ROR *a); +typedef arg_decode_insn2 arg_ASR; +static bool trans_ASR(DisasContext *ctx, arg_ASR *a); +typedef arg_decode_insn2 arg_SWAP; +static bool trans_SWAP(DisasContext *ctx, arg_SWAP *a); +typedef arg_decode_insn8 arg_SBI; +static bool trans_SBI(DisasContext *ctx, arg_SBI *a); +typedef arg_decode_insn8 arg_CBI; +static bool trans_CBI(DisasContext *ctx, arg_CBI *a); +typedef arg_decode_insn10 arg_BST; +static bool trans_BST(DisasContext *ctx, arg_BST *a); +typedef arg_decode_insn10 arg_BLD; +static bool trans_BLD(DisasContext *ctx, arg_BLD *a); +typedef arg_decode_insn4 arg_BSET; +static bool trans_BSET(DisasContext *ctx, arg_BSET *a); +typedef arg_decode_insn4 arg_BCLR; +static bool trans_BCLR(DisasContext *ctx, arg_BCLR *a); +typedef arg_decode_insn6 arg_BREAK; +static bool trans_BREAK(DisasContext *ctx, arg_BREAK *a); +typedef arg_decode_insn6 arg_NOP; +static bool trans_NOP(DisasContext *ctx, arg_NOP *a); +typedef arg_decode_insn6 arg_SLEEP; +static bool trans_SLEEP(DisasContext *ctx, arg_SLEEP *a); +typedef arg_decode_insn6 arg_WDR; +static bool trans_WDR(DisasContext *ctx, arg_WDR *a); + +static void decode_insn_extract_decode_insn_Fmt_10(DisasContext *ctx, arg_decode_insn6 *a, uint16_t insn) +{ +} + +static void decode_insn_extract_decode_insn_Fmt_11(DisasContext *ctx, arg_decode_insn3 *a, uint16_t insn) +{ + a->imm = append_16(ctx, deposit32(extract32(insn, 0, 1), 1, 31, extract32(insn, 4, 5))); +} + +static void decode_insn_extract_decode_insn_Fmt_12(DisasContext *ctx, arg_decode_insn7 *a, uint16_t insn) +{ + a->rr = extract32(insn, 4, 5); + a->bit = extract32(insn, 0, 3); +} + +static void decode_insn_extract_decode_insn_Fmt_13(DisasContext *ctx, arg_decode_insn8 *a, uint16_t insn) +{ + a->reg = extract32(insn, 3, 5); + a->bit = extract32(insn, 0, 3); +} + +static void decode_insn_extract_decode_insn_Fmt_17(DisasContext *ctx, arg_rd_rr *a, uint16_t insn) +{ + a->rd = to_regs_00_30_by_two(ctx, extract32(insn, 4, 4)); + a->rr = to_regs_00_30_by_two(ctx, extract32(insn, 0, 4)); +} + +static void decode_insn_extract_decode_insn_Fmt_18(DisasContext *ctx, arg_decode_insn9 *a, uint16_t insn) +{ + a->rr = extract32(insn, 4, 5); +} + +static void decode_insn_extract_decode_insn_Fmt_19(DisasContext *ctx, arg_decode_insn10 *a, uint16_t insn) +{ + a->rd = extract32(insn, 4, 5); + a->bit = extract32(insn, 0, 3); +} + +static void decode_insn_extract_decode_insn_Fmt_4(DisasContext *ctx, arg_decode_insn2 *a, uint16_t insn) +{ + a->rd = extract32(insn, 4, 5); +} + +static void decode_insn_extract_decode_insn_Fmt_5(DisasContext *ctx, arg_rd_rr *a, uint16_t insn) +{ + a->rd = to_regs_16_31_by_one(ctx, extract32(insn, 4, 4)); + a->rr = to_regs_16_31_by_one(ctx, extract32(insn, 0, 4)); +} + +static void decode_insn_extract_decode_insn_Fmt_6(DisasContext *ctx, arg_decode_insn3 *a, uint16_t insn) +{ + a->imm = extract32(insn, 4, 4); +} + +static void decode_insn_extract_decode_insn_Fmt_9(DisasContext *ctx, arg_decode_insn3 *a, uint16_t insn) +{ + a->imm = sextract32(insn, 0, 12); +} + +static void decode_insn_extract_fmul(DisasContext *ctx, arg_rd_rr *a, uint16_t insn) +{ + a->rd = to_regs_16_23_by_one(ctx, extract32(insn, 4, 3)); + a->rr = to_regs_16_23_by_one(ctx, extract32(insn, 0, 3)); +} + +static void decode_insn_extract_io_rd_imm(DisasContext *ctx, arg_rd_imm *a, uint16_t insn) +{ + a->rd = extract32(insn, 4, 5); + a->imm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 9, 2)); +} + +static void decode_insn_extract_ldst_d(DisasContext *ctx, arg_rd_imm *a, uint16_t insn) +{ + a->rd = extract32(insn, 4, 5); + a->imm = deposit32(deposit32(extract32(insn, 0, 3), 3, 29, extract32(insn, 10, 2)), 5, 27, extract32(insn, 13, 1)); +} + +static void decode_insn_extract_ldst_s(DisasContext *ctx, arg_rd_imm *a, uint16_t insn) +{ + a->rd = extract32(insn, 4, 5); + a->imm = 0; +} + +static void decode_insn_extract_op_bit(DisasContext *ctx, arg_decode_insn4 *a, uint16_t insn) +{ + a->bit = extract32(insn, 4, 3); +} + +static void decode_insn_extract_op_bit_imm(DisasContext *ctx, arg_decode_insn5 *a, uint16_t insn) +{ + a->imm = sextract32(insn, 3, 7); + a->bit = extract32(insn, 0, 3); +} + +static void decode_insn_extract_op_rd_imm6(DisasContext *ctx, arg_rd_imm *a, uint16_t insn) +{ + a->rd = to_regs_24_30_by_two(ctx, extract32(insn, 4, 2)); + a->imm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 6, 2)); +} + +static void decode_insn_extract_op_rd_imm8(DisasContext *ctx, arg_rd_imm *a, uint16_t insn) +{ + a->rd = to_regs_16_31_by_one(ctx, extract32(insn, 4, 4)); + a->imm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 8, 4)); +} + +static void decode_insn_extract_op_rd_rr(DisasContext *ctx, arg_rd_rr *a, uint16_t insn) +{ + a->rd = extract32(insn, 4, 5); + a->rr = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 9, 1)); +} + +bool decode_insn(DisasContext *ctx, uint16_t insn) +{ + union { + arg_decode_insn10 f_decode_insn10; + arg_decode_insn2 f_decode_insn2; + arg_decode_insn3 f_decode_insn3; + arg_decode_insn4 f_decode_insn4; + arg_decode_insn5 f_decode_insn5; + arg_decode_insn6 f_decode_insn6; + arg_decode_insn7 f_decode_insn7; + arg_decode_insn8 f_decode_insn8; + arg_decode_insn9 f_decode_insn9; + arg_rd_imm f_rd_imm; + arg_rd_rr f_rd_rr; + } u; + + switch (insn & 0x0000d000) { + case 0x00000000: + /* 00.0.... ........ */ + switch (insn & 0x00002c00) { + case 0x00000000: + /* 000000.. ........ */ + switch ((insn >> 8) & 0x3) { + case 0x0: + /* 00000000 ........ */ + decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn); + switch (insn & 0x000000ff) { + case 0x00000000: + /* 00000000 00000000 */ + /* insn.decode:185 */ + if (trans_NOP(ctx, &u.f_decode_insn6)) return true; + break; + } + break; + case 0x1: + /* 00000001 ........ */ + /* insn.decode:128 */ + decode_insn_extract_decode_insn_Fmt_17(ctx, &u.f_rd_rr, insn); + if (trans_MOVW(ctx, &u.f_rd_rr)) return true; + break; + case 0x2: + /* 00000010 ........ */ + /* insn.decode:71 */ + decode_insn_extract_decode_insn_Fmt_5(ctx, &u.f_rd_rr, insn); + if (trans_MULS(ctx, &u.f_rd_rr)) return true; + break; + case 0x3: + /* 00000011 ........ */ + decode_insn_extract_fmul(ctx, &u.f_rd_rr, insn); + switch (insn & 0x00000088) { + case 0x00000000: + /* 00000011 0...0... */ + /* insn.decode:72 */ + if (trans_MULSU(ctx, &u.f_rd_rr)) return true; + break; + case 0x00000008: + /* 00000011 0...1... */ + /* insn.decode:73 */ + if (trans_FMUL(ctx, &u.f_rd_rr)) return true; + break; + case 0x00000080: + /* 00000011 1...0... */ + /* insn.decode:74 */ + if (trans_FMULS(ctx, &u.f_rd_rr)) return true; + break; + case 0x00000088: + /* 00000011 1...1... */ + /* insn.decode:75 */ + if (trans_FMULSU(ctx, &u.f_rd_rr)) return true; + break; + } + break; + } + break; + case 0x00000400: + /* 000001.. ........ */ + /* insn.decode:102 */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + if (trans_CPC(ctx, &u.f_rd_rr)) return true; + break; + case 0x00000800: + /* 000010.. ........ */ + /* insn.decode:58 */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + if (trans_SBC(ctx, &u.f_rd_rr)) return true; + break; + case 0x00000c00: + /* 000011.. ........ */ + /* insn.decode:53 */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + if (trans_ADD(ctx, &u.f_rd_rr)) return true; + break; + case 0x00002000: + /* 001000.. ........ */ + /* insn.decode:61 */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + if (trans_AND(ctx, &u.f_rd_rr)) return true; + break; + case 0x00002400: + /* 001001.. ........ */ + /* insn.decode:65 */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + if (trans_EOR(ctx, &u.f_rd_rr)) return true; + break; + case 0x00002800: + /* 001010.. ........ */ + /* insn.decode:63 */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + if (trans_OR(ctx, &u.f_rd_rr)) return true; + break; + case 0x00002c00: + /* 001011.. ........ */ + /* insn.decode:127 */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + if (trans_MOV(ctx, &u.f_rd_rr)) return true; + break; + } + break; + case 0x00001000: + /* 00.1.... ........ */ + switch ((insn >> 13) & 0x1) { + case 0x0: + /* 0001.... ........ */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + switch ((insn >> 10) & 0x3) { + case 0x0: + /* 000100.. ........ */ + /* insn.decode:100 */ + if (trans_CPSE(ctx, &u.f_rd_rr)) return true; + break; + case 0x1: + /* 000101.. ........ */ + /* insn.decode:101 */ + if (trans_CP(ctx, &u.f_rd_rr)) return true; + break; + case 0x2: + /* 000110.. ........ */ + /* insn.decode:56 */ + if (trans_SUB(ctx, &u.f_rd_rr)) return true; + break; + case 0x3: + /* 000111.. ........ */ + /* insn.decode:54 */ + if (trans_ADC(ctx, &u.f_rd_rr)) return true; + break; + } + break; + case 0x1: + /* 0011.... ........ */ + /* insn.decode:103 */ + decode_insn_extract_op_rd_imm8(ctx, &u.f_rd_imm, insn); + if (trans_CPI(ctx, &u.f_rd_imm)) return true; + break; + } + break; + case 0x00004000: + /* 01.0.... ........ */ + decode_insn_extract_op_rd_imm8(ctx, &u.f_rd_imm, insn); + switch ((insn >> 13) & 0x1) { + case 0x0: + /* 0100.... ........ */ + /* insn.decode:59 */ + if (trans_SBCI(ctx, &u.f_rd_imm)) return true; + break; + case 0x1: + /* 0110.... ........ */ + /* insn.decode:64 */ + if (trans_ORI(ctx, &u.f_rd_imm)) return true; + break; + } + break; + case 0x00005000: + /* 01.1.... ........ */ + decode_insn_extract_op_rd_imm8(ctx, &u.f_rd_imm, insn); + switch ((insn >> 13) & 0x1) { + case 0x0: + /* 0101.... ........ */ + /* insn.decode:57 */ + if (trans_SUBI(ctx, &u.f_rd_imm)) return true; + break; + case 0x1: + /* 0111.... ........ */ + /* insn.decode:62 */ + if (trans_ANDI(ctx, &u.f_rd_imm)) return true; + break; + } + break; + case 0x00008000: + /* 10.0.... ........ */ + decode_insn_extract_ldst_d(ctx, &u.f_rd_imm, insn); + switch (insn & 0x00000208) { + case 0x00000000: + /* 10.0..0. ....0... */ + /* insn.decode:139 */ + if (trans_LDDZ(ctx, &u.f_rd_imm)) return true; + break; + case 0x00000008: + /* 10.0..0. ....1... */ + /* insn.decode:138 */ + if (trans_LDDY(ctx, &u.f_rd_imm)) return true; + break; + case 0x00000200: + /* 10.0..1. ....0... */ + /* insn.decode:149 */ + if (trans_STDZ(ctx, &u.f_rd_imm)) return true; + break; + case 0x00000208: + /* 10.0..1. ....1... */ + /* insn.decode:148 */ + if (trans_STDY(ctx, &u.f_rd_imm)) return true; + break; + } + break; + case 0x00009000: + /* 10.1.... ........ */ + switch (insn & 0x00002800) { + case 0x00000000: + /* 10010... ........ */ + switch ((insn >> 9) & 0x3) { + case 0x0: + /* 1001000. ........ */ + switch (insn & 0x0000000f) { + case 0x00000000: + /* 1001000. ....0000 */ + /* insn.decode:130 */ + decode_insn_extract_ldst_s(ctx, &u.f_rd_imm, insn); + if (trans_LDS(ctx, &u.f_rd_imm)) return true; + break; + case 0x00000001: + /* 1001000. ....0001 */ + /* insn.decode:136 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LDZ2(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000002: + /* 1001000. ....0010 */ + /* insn.decode:137 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LDZ3(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000004: + /* 1001000. ....0100 */ + /* insn.decode:151 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LPM2(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000005: + /* 1001000. ....0101 */ + /* insn.decode:152 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LPMX(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000006: + /* 1001000. ....0110 */ + /* insn.decode:154 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_ELPM2(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000007: + /* 1001000. ....0111 */ + /* insn.decode:155 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_ELPMX(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000009: + /* 1001000. ....1001 */ + /* insn.decode:134 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LDY2(ctx, &u.f_decode_insn2)) return true; + break; + case 0x0000000a: + /* 1001000. ....1010 */ + /* insn.decode:135 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LDY3(ctx, &u.f_decode_insn2)) return true; + break; + case 0x0000000c: + /* 1001000. ....1100 */ + /* insn.decode:131 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LDX1(ctx, &u.f_decode_insn2)) return true; + break; + case 0x0000000d: + /* 1001000. ....1101 */ + /* insn.decode:132 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LDX2(ctx, &u.f_decode_insn2)) return true; + break; + case 0x0000000e: + /* 1001000. ....1110 */ + /* insn.decode:133 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LDX3(ctx, &u.f_decode_insn2)) return true; + break; + case 0x0000000f: + /* 1001000. ....1111 */ + /* insn.decode:161 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_POP(ctx, &u.f_decode_insn2)) return true; + break; + } + break; + case 0x1: + /* 1001001. ........ */ + switch (insn & 0x0000000f) { + case 0x00000000: + /* 1001001. ....0000 */ + /* insn.decode:140 */ + decode_insn_extract_ldst_s(ctx, &u.f_rd_imm, insn); + if (trans_STS(ctx, &u.f_rd_imm)) return true; + break; + case 0x00000001: + /* 1001001. ....0001 */ + /* insn.decode:146 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_STZ2(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000002: + /* 1001001. ....0010 */ + /* insn.decode:147 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_STZ3(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000004: + /* 1001001. ....0100 */ + /* insn.decode:162 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_XCH(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000005: + /* 1001001. ....0101 */ + /* insn.decode:164 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LAS(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000006: + /* 1001001. ....0110 */ + /* insn.decode:163 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LAC(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000007: + /* 1001001. ....0111 */ + /* insn.decode:165 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_LAT(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000009: + /* 1001001. ....1001 */ + /* insn.decode:144 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_STY2(ctx, &u.f_decode_insn2)) return true; + break; + case 0x0000000a: + /* 1001001. ....1010 */ + /* insn.decode:145 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_STY3(ctx, &u.f_decode_insn2)) return true; + break; + case 0x0000000c: + /* 1001001. ....1100 */ + /* insn.decode:141 */ + decode_insn_extract_decode_insn_Fmt_18(ctx, &u.f_decode_insn9, insn); + if (trans_STX1(ctx, &u.f_decode_insn9)) return true; + break; + case 0x0000000d: + /* 1001001. ....1101 */ + /* insn.decode:142 */ + decode_insn_extract_decode_insn_Fmt_18(ctx, &u.f_decode_insn9, insn); + if (trans_STX2(ctx, &u.f_decode_insn9)) return true; + break; + case 0x0000000e: + /* 1001001. ....1110 */ + /* insn.decode:143 */ + decode_insn_extract_decode_insn_Fmt_18(ctx, &u.f_decode_insn9, insn); + if (trans_STX3(ctx, &u.f_decode_insn9)) return true; + break; + case 0x0000000f: + /* 1001001. ....1111 */ + /* insn.decode:160 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_PUSH(ctx, &u.f_decode_insn2)) return true; + break; + } + break; + case 0x2: + /* 1001010. ........ */ + switch ((insn >> 1) & 0x7) { + case 0x0: + /* 1001010. ....000. */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + switch (insn & 0x00000001) { + case 0x00000000: + /* 1001010. ....0000 */ + /* insn.decode:66 */ + if (trans_COM(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000001: + /* 1001010. ....0001 */ + /* insn.decode:67 */ + if (trans_NEG(ctx, &u.f_decode_insn2)) return true; + break; + } + break; + case 0x1: + /* 1001010. ....001. */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + switch (insn & 0x00000001) { + case 0x00000000: + /* 1001010. ....0010 */ + /* insn.decode:173 */ + if (trans_SWAP(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000001: + /* 1001010. ....0011 */ + /* insn.decode:68 */ + if (trans_INC(ctx, &u.f_decode_insn2)) return true; + break; + } + break; + case 0x2: + /* 1001010. ....010. */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + switch (insn & 0x00000001) { + case 0x00000001: + /* 1001010. ....0101 */ + /* insn.decode:172 */ + if (trans_ASR(ctx, &u.f_decode_insn2)) return true; + break; + } + break; + case 0x3: + /* 1001010. ....011. */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + switch (insn & 0x00000001) { + case 0x00000000: + /* 1001010. ....0110 */ + /* insn.decode:170 */ + if (trans_LSR(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000001: + /* 1001010. ....0111 */ + /* insn.decode:171 */ + if (trans_ROR(ctx, &u.f_decode_insn2)) return true; + break; + } + break; + case 0x4: + /* 1001010. ....100. */ + switch (insn & 0x00000181) { + case 0x00000000: + /* 10010100 0...1000 */ + /* insn.decode:178 */ + decode_insn_extract_op_bit(ctx, &u.f_decode_insn4, insn); + if (trans_BSET(ctx, &u.f_decode_insn4)) return true; + break; + case 0x00000001: + /* 10010100 0...1001 */ + decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn); + switch ((insn >> 4) & 0x7) { + case 0x0: + /* 10010100 00001001 */ + /* insn.decode:91 */ + if (trans_IJMP(ctx, &u.f_decode_insn6)) return true; + break; + case 0x1: + /* 10010100 00011001 */ + /* insn.decode:92 */ + if (trans_EIJMP(ctx, &u.f_decode_insn6)) return true; + break; + } + break; + case 0x00000080: + /* 10010100 1...1000 */ + /* insn.decode:179 */ + decode_insn_extract_op_bit(ctx, &u.f_decode_insn4, insn); + if (trans_BCLR(ctx, &u.f_decode_insn4)) return true; + break; + case 0x00000100: + /* 10010101 0...1000 */ + decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn); + switch ((insn >> 4) & 0x7) { + case 0x0: + /* 10010101 00001000 */ + /* insn.decode:98 */ + if (trans_RET(ctx, &u.f_decode_insn6)) return true; + break; + case 0x1: + /* 10010101 00011000 */ + /* insn.decode:99 */ + if (trans_RETI(ctx, &u.f_decode_insn6)) return true; + break; + } + break; + case 0x00000101: + /* 10010101 0...1001 */ + decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn); + switch ((insn >> 4) & 0x7) { + case 0x0: + /* 10010101 00001001 */ + /* insn.decode:95 */ + if (trans_ICALL(ctx, &u.f_decode_insn6)) return true; + break; + case 0x1: + /* 10010101 00011001 */ + /* insn.decode:96 */ + if (trans_EICALL(ctx, &u.f_decode_insn6)) return true; + break; + } + break; + case 0x00000180: + /* 10010101 1...1000 */ + decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn); + switch ((insn >> 4) & 0x7) { + case 0x0: + /* 10010101 10001000 */ + /* insn.decode:186 */ + if (trans_SLEEP(ctx, &u.f_decode_insn6)) return true; + break; + case 0x1: + /* 10010101 10011000 */ + /* insn.decode:184 */ + if (trans_BREAK(ctx, &u.f_decode_insn6)) return true; + break; + case 0x2: + /* 10010101 10101000 */ + /* insn.decode:187 */ + if (trans_WDR(ctx, &u.f_decode_insn6)) return true; + break; + case 0x4: + /* 10010101 11001000 */ + /* insn.decode:150 */ + if (trans_LPM1(ctx, &u.f_decode_insn6)) return true; + break; + case 0x5: + /* 10010101 11011000 */ + /* insn.decode:153 */ + if (trans_ELPM1(ctx, &u.f_decode_insn6)) return true; + break; + case 0x6: + /* 10010101 11101000 */ + /* insn.decode:156 */ + if (trans_SPM(ctx, &u.f_decode_insn6)) return true; + break; + case 0x7: + /* 10010101 11111000 */ + /* insn.decode:157 */ + if (trans_SPMX(ctx, &u.f_decode_insn6)) return true; + break; + } + break; + } + break; + case 0x5: + /* 1001010. ....101. */ + switch (insn & 0x00000001) { + case 0x00000000: + /* 1001010. ....1010 */ + /* insn.decode:69 */ + decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn); + if (trans_DEC(ctx, &u.f_decode_insn2)) return true; + break; + case 0x00000001: + /* 1001010. ....1011 */ + decode_insn_extract_decode_insn_Fmt_6(ctx, &u.f_decode_insn3, insn); + switch ((insn >> 8) & 0x1) { + case 0x0: + /* 10010100 ....1011 */ + /* insn.decode:76 */ + if (trans_DES(ctx, &u.f_decode_insn3)) return true; + break; + } + break; + } + break; + case 0x6: + /* 1001010. ....110. */ + /* insn.decode:93 */ + decode_insn_extract_decode_insn_Fmt_11(ctx, &u.f_decode_insn3, insn); + if (trans_JMP(ctx, &u.f_decode_insn3)) return true; + break; + case 0x7: + /* 1001010. ....111. */ + /* insn.decode:97 */ + decode_insn_extract_decode_insn_Fmt_11(ctx, &u.f_decode_insn3, insn); + if (trans_CALL(ctx, &u.f_decode_insn3)) return true; + break; + } + break; + case 0x3: + /* 1001011. ........ */ + decode_insn_extract_op_rd_imm6(ctx, &u.f_rd_imm, insn); + switch ((insn >> 8) & 0x1) { + case 0x0: + /* 10010110 ........ */ + /* insn.decode:55 */ + if (trans_ADIW(ctx, &u.f_rd_imm)) return true; + break; + case 0x1: + /* 10010111 ........ */ + /* insn.decode:60 */ + if (trans_SBIW(ctx, &u.f_rd_imm)) return true; + break; + } + break; + } + break; + case 0x00000800: + /* 10011... ........ */ + switch ((insn >> 10) & 0x1) { + case 0x0: + /* 100110.. ........ */ + decode_insn_extract_decode_insn_Fmt_13(ctx, &u.f_decode_insn8, insn); + switch ((insn >> 8) & 0x3) { + case 0x0: + /* 10011000 ........ */ + /* insn.decode:175 */ + if (trans_CBI(ctx, &u.f_decode_insn8)) return true; + break; + case 0x1: + /* 10011001 ........ */ + /* insn.decode:106 */ + if (trans_SBIC(ctx, &u.f_decode_insn8)) return true; + break; + case 0x2: + /* 10011010 ........ */ + /* insn.decode:174 */ + if (trans_SBI(ctx, &u.f_decode_insn8)) return true; + break; + case 0x3: + /* 10011011 ........ */ + /* insn.decode:107 */ + if (trans_SBIS(ctx, &u.f_decode_insn8)) return true; + break; + } + break; + case 0x1: + /* 100111.. ........ */ + /* insn.decode:70 */ + decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn); + if (trans_MUL(ctx, &u.f_rd_rr)) return true; + break; + } + break; + case 0x00002000: + /* 10110... ........ */ + /* insn.decode:158 */ + decode_insn_extract_io_rd_imm(ctx, &u.f_rd_imm, insn); + if (trans_IN(ctx, &u.f_rd_imm)) return true; + break; + case 0x00002800: + /* 10111... ........ */ + /* insn.decode:159 */ + decode_insn_extract_io_rd_imm(ctx, &u.f_rd_imm, insn); + if (trans_OUT(ctx, &u.f_rd_imm)) return true; + break; + } + break; + case 0x0000c000: + /* 11.0.... ........ */ + switch ((insn >> 13) & 0x1) { + case 0x0: + /* 1100.... ........ */ + /* insn.decode:90 */ + decode_insn_extract_decode_insn_Fmt_9(ctx, &u.f_decode_insn3, insn); + if (trans_RJMP(ctx, &u.f_decode_insn3)) return true; + break; + case 0x1: + /* 1110.... ........ */ + /* insn.decode:129 */ + decode_insn_extract_op_rd_imm8(ctx, &u.f_rd_imm, insn); + if (trans_LDI(ctx, &u.f_rd_imm)) return true; + break; + } + break; + case 0x0000d000: + /* 11.1.... ........ */ + switch ((insn >> 13) & 0x1) { + case 0x0: + /* 1101.... ........ */ + /* insn.decode:94 */ + decode_insn_extract_decode_insn_Fmt_9(ctx, &u.f_decode_insn3, insn); + if (trans_RCALL(ctx, &u.f_decode_insn3)) return true; + break; + case 0x1: + /* 1111.... ........ */ + switch ((insn >> 10) & 0x3) { + case 0x0: + /* 111100.. ........ */ + /* insn.decode:108 */ + decode_insn_extract_op_bit_imm(ctx, &u.f_decode_insn5, insn); + if (trans_BRBS(ctx, &u.f_decode_insn5)) return true; + break; + case 0x1: + /* 111101.. ........ */ + /* insn.decode:109 */ + decode_insn_extract_op_bit_imm(ctx, &u.f_decode_insn5, insn); + if (trans_BRBC(ctx, &u.f_decode_insn5)) return true; + break; + case 0x2: + /* 111110.. ........ */ + decode_insn_extract_decode_insn_Fmt_19(ctx, &u.f_decode_insn10, insn); + switch (insn & 0x00000208) { + case 0x00000000: + /* 1111100. ....0... */ + /* insn.decode:177 */ + if (trans_BLD(ctx, &u.f_decode_insn10)) return true; + break; + case 0x00000200: + /* 1111101. ....0... */ + /* insn.decode:176 */ + if (trans_BST(ctx, &u.f_decode_insn10)) return true; + break; + } + break; + case 0x3: + /* 111111.. ........ */ + decode_insn_extract_decode_insn_Fmt_12(ctx, &u.f_decode_insn7, insn); + switch (insn & 0x00000208) { + case 0x00000000: + /* 1111110. ....0... */ + /* insn.decode:104 */ + if (trans_SBRC(ctx, &u.f_decode_insn7)) return true; + break; + case 0x00000200: + /* 1111111. ....0... */ + /* insn.decode:105 */ + if (trans_SBRS(ctx, &u.f_decode_insn7)) return true; + break; + } + break; + } + break; + } + break; + } + return false; +} diff --git a/qemu/target/avr/gdbstub.c b/qemu/target/avr/gdbstub.c new file mode 100644 index 0000000000..c28ed67efe --- /dev/null +++ b/qemu/target/avr/gdbstub.c @@ -0,0 +1,84 @@ +/* + * QEMU AVR gdbstub + * + * Copyright (c) 2016-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#include "qemu/osdep.h" +#include "exec/gdbstub.h" + +int avr_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) +{ + AVRCPU *cpu = AVR_CPU(cs); + CPUAVRState *env = &cpu->env; + + /* R */ + if (n < 32) { + return gdb_get_reg8(mem_buf, env->r[n]); + } + + /* SREG */ + if (n == 32) { + uint8_t sreg = cpu_get_sreg(env); + + return gdb_get_reg8(mem_buf, sreg); + } + + /* SP */ + if (n == 33) { + return gdb_get_reg16(mem_buf, env->sp & 0x0000ffff); + } + + /* PC */ + if (n == 34) { + return gdb_get_reg32(mem_buf, env->pc_w * 2); + } + + return 0; +} + +int avr_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + AVRCPU *cpu = AVR_CPU(cs); + CPUAVRState *env = &cpu->env; + + /* R */ + if (n < 32) { + env->r[n] = *mem_buf; + return 1; + } + + /* SREG */ + if (n == 32) { + cpu_set_sreg(env, *mem_buf); + return 1; + } + + /* SP */ + if (n == 33) { + env->sp = lduw_p(mem_buf); + return 2; + } + + /* PC */ + if (n == 34) { + env->pc_w = ldl_p(mem_buf) / 2; + return 4; + } + + return 0; +} diff --git a/qemu/target/avr/helper.c b/qemu/target/avr/helper.c new file mode 100644 index 0000000000..60d0a648eb --- /dev/null +++ b/qemu/target/avr/helper.c @@ -0,0 +1,373 @@ +/* + * QEMU AVR CPU helpers + * + * Copyright (c) 2016-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "unicorn_helper.h" + +bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + bool ret = false; + CPUClass *cc = CPU_GET_CLASS(cs); + AVRCPU *cpu = AVR_CPU(cs); + CPUAVRState *env = &cpu->env; + + if (interrupt_request & CPU_INTERRUPT_RESET) { + if (cpu_interrupts_enabled(env)) { + cs->exception_index = EXCP_RESET; + cc->do_interrupt(cs); + + cs->interrupt_request &= ~CPU_INTERRUPT_RESET; + + ret = true; + } + } + if (interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_interrupts_enabled(env) && env->intsrc != 0) { + int index = ctz32(env->intsrc); + cs->exception_index = EXCP_INT(index); + cc->do_interrupt(cs); + + env->intsrc &= env->intsrc - 1; /* clear the interrupt */ + cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + + ret = true; + } + } + return ret; +} + +void avr_cpu_do_interrupt(CPUState *cs) +{ + AVRCPU *cpu = AVR_CPU(cs); + CPUAVRState *env = &cpu->env; + + uint32_t ret = env->pc_w; + int vector = 0; + int size = avr_feature(env, AVR_FEATURE_JMP_CALL) ? 2 : 1; + int base = 0; + + if (cs->exception_index == EXCP_RESET) { + vector = 0; + } else if (env->intsrc != 0) { + vector = ctz32(env->intsrc) + 1; + } + + if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) { + cpu_stb_data(env, env->sp--, (ret & 0x0000ff)); + cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >> 8); + cpu_stb_data(env, env->sp--, (ret & 0xff0000) >> 16); + } else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) { + cpu_stb_data(env, env->sp--, (ret & 0x0000ff)); + cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >> 8); + } else { + cpu_stb_data(env, env->sp--, (ret & 0x0000ff)); + } + + env->pc_w = base + vector * size; + env->sregI = 0; /* clear Global Interrupt Flag */ + + cs->exception_index = -1; +} + +int avr_cpu_memory_rw_debug(CPUState *cs, vaddr addr, uint8_t *buf, + int len, bool is_write) +{ + return cpu_memory_rw_debug(cs, addr, buf, len, is_write); +} + +hwaddr avr_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +{ + return addr; /* I assume 1:1 address correspondance */ +} + +bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr) +{ + int prot = 0; + MemTxAttrs attrs = {0}; + uint32_t paddr; + + address &= TARGET_PAGE_MASK; + + if (mmu_idx == MMU_CODE_IDX) { + /* access to code in flash */ + paddr = avr_code_base(&AVR_CPU(cs)->env) | address; + prot = PAGE_READ | PAGE_EXEC; +#if 0 + if (paddr + TARGET_PAGE_SIZE > OFFSET_DATA) { + error_report("execution left flash memory"); + abort(); + } +#endif + } else if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) { + /* + * access to CPU registers, exit and rebuilt this TB to use full access + * incase it touches specially handled registers like SREG or SP + */ + AVRCPU *cpu = AVR_CPU(cs); + CPUAVRState *env = &cpu->env; + env->fullacc = 1; + cpu_loop_exit_restore(cs, retaddr); + } else { + /* access to memory. nothing special */ + paddr = OFFSET_DATA | address; + prot = PAGE_READ | PAGE_WRITE; + } + + tlb_set_page_with_attrs(cs, address, paddr, attrs, prot, + mmu_idx, TARGET_PAGE_SIZE); + + return true; +} + +/* + * helpers + */ + +void helper_sleep(CPUAVRState *env) +{ + CPUState *cs = env_cpu(env); + + cs->exception_index = EXCP_HLT; + cpu_loop_exit(cs); +} + +void helper_unsupported(CPUAVRState *env) +{ + CPUState *cs = env_cpu(env); + + /* + * I count not find what happens on the real platform, so + * it's EXCP_DEBUG for meanwhile + */ + cs->exception_index = EXCP_DEBUG; +#if 0 + if (qemu_loglevel_mask(LOG_UNIMP)) { + qemu_log("UNSUPPORTED\n"); + cpu_dump_state(cs, stderr, 0); + } +#endif + cpu_loop_exit(cs); +} + +void helper_debug(CPUAVRState *env) +{ + CPUState *cs = env_cpu(env); + + cs->exception_index = EXCP_DEBUG; + cpu_loop_exit(cs); +} + +void helper_break(CPUAVRState *env) +{ + CPUState *cs = env_cpu(env); + + cs->exception_index = EXCP_DEBUG; + cpu_loop_exit(cs); +} + +void helper_wdr(CPUAVRState *env) +{ + CPUState *cs = env_cpu(env); + + /* WD is not implemented yet, placeholder */ + cs->exception_index = EXCP_DEBUG; + cpu_loop_exit(cs); +} + +/* + * This function implements IN instruction + * + * It does the following + * a. if an IO register belongs to CPU, its value is read and returned + * b. otherwise io address is translated to mem address and physical memory + * is read. + * c. it caches the value for sake of SBI, SBIC, SBIS & CBI implementation + * + */ +target_ulong helper_inb(CPUAVRState *env, uint32_t port) +{ + CPUAVRState *const cpu = env; + struct uc_struct *const uc = env->uc; + + target_ulong data = 0; + + switch (port) { + case 0x38: /* RAMPD */ + data = 0xff & (env->rampD >> 16); + break; + case 0x39: /* RAMPX */ + data = 0xff & (env->rampX >> 16); + break; + case 0x3a: /* RAMPY */ + data = 0xff & (env->rampY >> 16); + break; + case 0x3b: /* RAMPZ */ + data = 0xff & (env->rampZ >> 16); + break; + case 0x3c: /* EIND */ + data = 0xff & (env->eind >> 16); + break; + case 0x3d: /* SPL */ + data = env->sp & 0x00ff; + break; + case 0x3e: /* SPH */ + data = env->sp >> 8; + break; + case 0x3f: /* SREG */ + data = cpu_get_sreg(env); + break; + default: + /* not a special register, pass to normal memory access */ + data = address_space_ldub(&address_space_memory, + OFFSET_IO_REGISTERS + port, + MEMTXATTRS_UNSPECIFIED, NULL); + } + + return data; +} + +/* + * This function implements OUT instruction + * + * It does the following + * a. if an IO register belongs to CPU, its value is written into the register + * b. otherwise io address is translated to mem address and physical memory + * is written. + * c. it caches the value for sake of SBI, SBIC, SBIS & CBI implementation + * + */ +void helper_outb(CPUAVRState *env, uint32_t port, uint32_t data) +{ + CPUAVRState *const cpu = env; + struct uc_struct *const uc = env->uc; + + data &= 0x000000ff; + + switch (port) { + case 0x38: /* RAMPD */ + if (avr_feature(env, AVR_FEATURE_RAMPD)) { + env->rampD = (data & 0xff) << 16; + } + break; + case 0x39: /* RAMPX */ + if (avr_feature(env, AVR_FEATURE_RAMPX)) { + env->rampX = (data & 0xff) << 16; + } + break; + case 0x3a: /* RAMPY */ + if (avr_feature(env, AVR_FEATURE_RAMPY)) { + env->rampY = (data & 0xff) << 16; + } + break; + case 0x3b: /* RAMPZ */ + if (avr_feature(env, AVR_FEATURE_RAMPZ)) { + env->rampZ = (data & 0xff) << 16; + } + break; + case 0x3c: /* EIDN */ + env->eind = (data & 0xff) << 16; + break; + case 0x3d: /* SPL */ + env->sp = (env->sp & 0xff00) | (data); + break; + case 0x3e: /* SPH */ + if (avr_feature(env, AVR_FEATURE_2_BYTE_SP)) { + env->sp = (env->sp & 0x00ff) | (data << 8); + } + break; + case 0x3f: /* SREG */ + cpu_set_sreg(env, data); + break; + default: + /* not a special register, pass to normal memory access */ + address_space_stb(&address_space_memory, OFFSET_IO_REGISTERS + port, + data, MEMTXATTRS_UNSPECIFIED, NULL); + } +} + +/* + * this function implements LD instruction when there is a posibility to read + * from a CPU register + */ +target_ulong helper_fullrd(CPUAVRState *env, uint32_t addr) +{ + CPUAVRState *const cpu = env; + struct uc_struct *const uc = env->uc; + + uint8_t data; + + env->fullacc = false; + + if (addr < NUMBER_OF_CPU_REGISTERS) { + /* CPU registers */ + data = env->r[addr]; + } else if (addr < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) { + /* IO registers */ + data = helper_inb(env, addr - NUMBER_OF_CPU_REGISTERS); + } else { + /* memory */ + data = address_space_ldub(&address_space_memory, OFFSET_DATA | addr, + MEMTXATTRS_UNSPECIFIED, NULL); + } + return data; +} + +/* + * this function implements ST instruction when there is a posibility to write + * into a CPU register + */ +void helper_fullwr(CPUAVRState *env, uint32_t data, uint32_t addr) +{ + CPUAVRState *const cpu = env; + struct uc_struct *const uc = env->uc; + + env->fullacc = false; + + /* Following logic assumes this: */ + assert(OFFSET_CPU_REGISTERS == OFFSET_DATA); + assert(OFFSET_IO_REGISTERS == OFFSET_CPU_REGISTERS + + NUMBER_OF_CPU_REGISTERS); + + if (addr < NUMBER_OF_CPU_REGISTERS) { + /* CPU registers */ + env->r[addr] = data; + } else if (addr < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) { + /* IO registers */ + helper_outb(env, addr - NUMBER_OF_CPU_REGISTERS, data); + } else { + /* memory */ + address_space_stb(&address_space_memory, OFFSET_DATA | addr, data, + MEMTXATTRS_UNSPECIFIED, NULL); + } +} + +void helper_uc_avr_exit(CPUAVRState *env) +{ + CPUState *cs = env_cpu(env); + + cs->exception_index = EXCP_HLT; + cs->halted = 1; + cpu_loop_exit(cs); +} diff --git a/qemu/target/avr/helper.h b/qemu/target/avr/helper.h new file mode 100644 index 0000000000..06fc1d323c --- /dev/null +++ b/qemu/target/avr/helper.h @@ -0,0 +1,37 @@ +/* + * QEMU AVR CPU helpers + * + * Copyright (c) 2016-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +/* + Modified for Unicorn Engine by Glenn Baker , 2024 +*/ + +DEF_HELPER_4(uc_tracecode, void, i32, i32, ptr, i64) +DEF_HELPER_6(uc_traceopcode, void, ptr, i64, i64, i32, ptr, i64) +DEF_HELPER_1(uc_avr_exit,void, env) + +DEF_HELPER_1(wdr, void, env) +DEF_HELPER_1(debug, void, env) +DEF_HELPER_1(break, void, env) +DEF_HELPER_1(sleep, void, env) +DEF_HELPER_1(unsupported, void, env) +DEF_HELPER_3(outb, void, env, i32, i32) +DEF_HELPER_2(inb, tl, env, i32) +DEF_HELPER_3(fullwr, void, env, i32, i32) +DEF_HELPER_2(fullrd, tl, env, i32) diff --git a/qemu/target/avr/insn.decode b/qemu/target/avr/insn.decode new file mode 100644 index 0000000000..482c23ad0c --- /dev/null +++ b/qemu/target/avr/insn.decode @@ -0,0 +1,187 @@ +# +# AVR instruction decode definitions. +# +# Copyright (c) 2019-2020 Michael Rolnik +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see . +# + +# +# regs_16_31_by_one = [16 .. 31] +# regs_16_23_by_one = [16 .. 23] +# regs_24_30_by_two = [24, 26, 28, 30] +# regs_00_30_by_two = [0, 2, 4, 6, 8, .. 30] + +%rd 4:5 +%rr 9:1 0:4 + +%rd_a 4:4 !function=to_regs_16_31_by_one +%rd_b 4:3 !function=to_regs_16_23_by_one +%rd_c 4:2 !function=to_regs_24_30_by_two +%rr_a 0:4 !function=to_regs_16_31_by_one +%rr_b 0:3 !function=to_regs_16_23_by_one + +%imm6 6:2 0:4 +%imm8 8:4 0:4 + +%io_imm 9:2 0:4 +%ldst_d_imm 13:1 10:2 0:3 + + +&rd_rr rd rr +&rd_imm rd imm + +@op_rd_rr .... .. . ..... .... &rd_rr rd=%rd rr=%rr +@op_rd_imm6 .... .... .. .. .... &rd_imm rd=%rd_c imm=%imm6 +@op_rd_imm8 .... .... .... .... &rd_imm rd=%rd_a imm=%imm8 +@fmul .... .... . ... . ... &rd_rr rd=%rd_b rr=%rr_b + +# +# Arithmetic Instructions +# +ADD 0000 11 . ..... .... @op_rd_rr +ADC 0001 11 . ..... .... @op_rd_rr +ADIW 1001 0110 .. .. .... @op_rd_imm6 +SUB 0001 10 . ..... .... @op_rd_rr +SUBI 0101 .... .... .... @op_rd_imm8 +SBC 0000 10 . ..... .... @op_rd_rr +SBCI 0100 .... .... .... @op_rd_imm8 +SBIW 1001 0111 .. .. .... @op_rd_imm6 +AND 0010 00 . ..... .... @op_rd_rr +ANDI 0111 .... .... .... @op_rd_imm8 +OR 0010 10 . ..... .... @op_rd_rr +ORI 0110 .... .... .... @op_rd_imm8 +EOR 0010 01 . ..... .... @op_rd_rr +COM 1001 010 rd:5 0000 +NEG 1001 010 rd:5 0001 +INC 1001 010 rd:5 0011 +DEC 1001 010 rd:5 1010 +MUL 1001 11 . ..... .... @op_rd_rr +MULS 0000 0010 .... .... &rd_rr rd=%rd_a rr=%rr_a +MULSU 0000 0011 0 ... 0 ... @fmul +FMUL 0000 0011 0 ... 1 ... @fmul +FMULS 0000 0011 1 ... 0 ... @fmul +FMULSU 0000 0011 1 ... 1 ... @fmul +DES 1001 0100 imm:4 1011 + +# +# Branch Instructions +# + +# The 22-bit immediate is partially in the opcode word, +# and partially in the next. Use append_16 to build the +# complete 22-bit value. +%imm_call 4:5 0:1 !function=append_16 + +@op_bit .... .... . bit:3 .... +@op_bit_imm .... .. imm:s7 bit:3 + +RJMP 1100 imm:s12 +IJMP 1001 0100 0000 1001 +EIJMP 1001 0100 0001 1001 +JMP 1001 010 ..... 110 . imm=%imm_call +RCALL 1101 imm:s12 +ICALL 1001 0101 0000 1001 +EICALL 1001 0101 0001 1001 +CALL 1001 010 ..... 111 . imm=%imm_call +RET 1001 0101 0000 1000 +RETI 1001 0101 0001 1000 +CPSE 0001 00 . ..... .... @op_rd_rr +CP 0001 01 . ..... .... @op_rd_rr +CPC 0000 01 . ..... .... @op_rd_rr +CPI 0011 .... .... .... @op_rd_imm8 +SBRC 1111 110 rr:5 0 bit:3 +SBRS 1111 111 rr:5 0 bit:3 +SBIC 1001 1001 reg:5 bit:3 +SBIS 1001 1011 reg:5 bit:3 +BRBS 1111 00 ....... ... @op_bit_imm +BRBC 1111 01 ....... ... @op_bit_imm + +# +# Data Transfer Instructions +# + +%rd_d 4:4 !function=to_regs_00_30_by_two +%rr_d 0:4 !function=to_regs_00_30_by_two + +@io_rd_imm .... . .. ..... .... &rd_imm rd=%rd imm=%io_imm +@ldst_d .. . . .. . rd:5 . ... &rd_imm imm=%ldst_d_imm + +# The 16-bit immediate is completely in the next word. +# Fields cannot be defined with no bits, so we cannot play +# the same trick and append to a zero-bit value. +# Defer reading the immediate until trans_{LDS,STS}. +@ldst_s .... ... rd:5 .... imm=0 + +MOV 0010 11 . ..... .... @op_rd_rr +MOVW 0000 0001 .... .... &rd_rr rd=%rd_d rr=%rr_d +LDI 1110 .... .... .... @op_rd_imm8 +LDS 1001 000 ..... 0000 @ldst_s +LDX1 1001 000 rd:5 1100 +LDX2 1001 000 rd:5 1101 +LDX3 1001 000 rd:5 1110 +LDY2 1001 000 rd:5 1001 +LDY3 1001 000 rd:5 1010 +LDZ2 1001 000 rd:5 0001 +LDZ3 1001 000 rd:5 0010 +LDDY 10 . 0 .. 0 ..... 1 ... @ldst_d +LDDZ 10 . 0 .. 0 ..... 0 ... @ldst_d +STS 1001 001 ..... 0000 @ldst_s +STX1 1001 001 rr:5 1100 +STX2 1001 001 rr:5 1101 +STX3 1001 001 rr:5 1110 +STY2 1001 001 rd:5 1001 +STY3 1001 001 rd:5 1010 +STZ2 1001 001 rd:5 0001 +STZ3 1001 001 rd:5 0010 +STDY 10 . 0 .. 1 ..... 1 ... @ldst_d +STDZ 10 . 0 .. 1 ..... 0 ... @ldst_d +LPM1 1001 0101 1100 1000 +LPM2 1001 000 rd:5 0100 +LPMX 1001 000 rd:5 0101 +ELPM1 1001 0101 1101 1000 +ELPM2 1001 000 rd:5 0110 +ELPMX 1001 000 rd:5 0111 +SPM 1001 0101 1110 1000 +SPMX 1001 0101 1111 1000 +IN 1011 0 .. ..... .... @io_rd_imm +OUT 1011 1 .. ..... .... @io_rd_imm +PUSH 1001 001 rd:5 1111 +POP 1001 000 rd:5 1111 +XCH 1001 001 rd:5 0100 +LAC 1001 001 rd:5 0110 +LAS 1001 001 rd:5 0101 +LAT 1001 001 rd:5 0111 + +# +# Bit and Bit-test Instructions +# +LSR 1001 010 rd:5 0110 +ROR 1001 010 rd:5 0111 +ASR 1001 010 rd:5 0101 +SWAP 1001 010 rd:5 0010 +SBI 1001 1010 reg:5 bit:3 +CBI 1001 1000 reg:5 bit:3 +BST 1111 101 rd:5 0 bit:3 +BLD 1111 100 rd:5 0 bit:3 +BSET 1001 0100 0 bit:3 1000 +BCLR 1001 0100 1 bit:3 1000 + +# +# MCU Control Instructions +# +BREAK 1001 0101 1001 1000 +NOP 0000 0000 0000 0000 +SLEEP 1001 0101 1000 1000 +WDR 1001 0101 1010 1000 diff --git a/qemu/target/avr/machine.c b/qemu/target/avr/machine.c new file mode 100644 index 0000000000..e315442787 --- /dev/null +++ b/qemu/target/avr/machine.c @@ -0,0 +1,119 @@ +/* + * QEMU AVR CPU + * + * Copyright (c) 2016-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "migration/cpu.h" + +static int get_sreg(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) +{ + CPUAVRState *env = opaque; + uint8_t sreg; + + sreg = qemu_get_byte(f); + cpu_set_sreg(env, sreg); + return 0; +} + +static int put_sreg(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, QJSON *vmdesc) +{ + CPUAVRState *env = opaque; + uint8_t sreg = cpu_get_sreg(env); + + qemu_put_byte(f, sreg); + return 0; +} + +static const VMStateInfo vms_sreg = { + .name = "sreg", + .get = get_sreg, + .put = put_sreg, +}; + +static int get_segment(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) +{ + uint32_t *ramp = opaque; + uint8_t temp; + + temp = qemu_get_byte(f); + *ramp = ((uint32_t)temp) << 16; + return 0; +} + +static int put_segment(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, QJSON *vmdesc) +{ + uint32_t *ramp = opaque; + uint8_t temp = *ramp >> 16; + + qemu_put_byte(f, temp); + return 0; +} + +static const VMStateInfo vms_rampD = { + .name = "rampD", + .get = get_segment, + .put = put_segment, +}; +static const VMStateInfo vms_rampX = { + .name = "rampX", + .get = get_segment, + .put = put_segment, +}; +static const VMStateInfo vms_rampY = { + .name = "rampY", + .get = get_segment, + .put = put_segment, +}; +static const VMStateInfo vms_rampZ = { + .name = "rampZ", + .get = get_segment, + .put = put_segment, +}; +static const VMStateInfo vms_eind = { + .name = "eind", + .get = get_segment, + .put = put_segment, +}; + +const VMStateDescription vms_avr_cpu = { + .name = "cpu", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32(env.pc_w, AVRCPU), + VMSTATE_UINT32(env.sp, AVRCPU), + VMSTATE_UINT32(env.skip, AVRCPU), + + VMSTATE_UINT32_ARRAY(env.r, AVRCPU, NUMBER_OF_CPU_REGISTERS), + + VMSTATE_SINGLE(env, AVRCPU, 0, vms_sreg, CPUAVRState), + VMSTATE_SINGLE(env.rampD, AVRCPU, 0, vms_rampD, uint32_t), + VMSTATE_SINGLE(env.rampX, AVRCPU, 0, vms_rampX, uint32_t), + VMSTATE_SINGLE(env.rampY, AVRCPU, 0, vms_rampY, uint32_t), + VMSTATE_SINGLE(env.rampZ, AVRCPU, 0, vms_rampZ, uint32_t), + VMSTATE_SINGLE(env.eind, AVRCPU, 0, vms_eind, uint32_t), + + VMSTATE_END_OF_LIST() + } +}; diff --git a/qemu/target/avr/translate.c b/qemu/target/avr/translate.c new file mode 100644 index 0000000000..9ebc7dcf45 --- /dev/null +++ b/qemu/target/avr/translate.c @@ -0,0 +1,3270 @@ +/* + * QEMU AVR CPU + * + * Copyright (c) 2019-2020 Michael Rolnik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + */ + +#include "qemu/osdep.h" +#include "tcg/tcg.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "tcg/tcg-op.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" +#include "exec/translator.h" +#include "exec/gen-icount.h" +#include "unicorn_helper.h" + +#define gen_decl(func, ...) \ + glue(gen_,func)(TCGContext *tcg_ctx, ## __VA_ARGS__) +#define gen_call(func, ...) \ + glue(gen_,func)(tcg_ctx, ## __VA_ARGS__) + +#define gen_io_end() gen_call(io_end) +#define gen_tb_start(...) gen_call(tb_start, __VA_ARGS__) +#define gen_tb_end(...) gen_call(tb_end, __VA_ARGS__) + +#define gen_helper_call(name, ...) \ + glue(gen_helper_,name)(tcg_ctx, ## __VA_ARGS__) +#define gen_helper_unsupported(...) \ + gen_helper_call(unsupported, __VA_ARGS__) + +#define gen_helper_debug(...) gen_helper_call(debug, __VA_ARGS__) +#define gen_helper_sleep(...) gen_helper_call(sleep, __VA_ARGS__) +#define gen_helper_inb(...) gen_helper_call(inb, __VA_ARGS__) +#define gen_helper_outb(...) gen_helper_call(outb, __VA_ARGS__) +#define gen_helper_fullrd(...) gen_helper_call(fullrd, __VA_ARGS__) +#define gen_helper_fullwr(...) gen_helper_call(fullwr, __VA_ARGS__) +#define gen_helper_wdr(...) gen_helper_call(wdr, __VA_ARGS__) + +/* + * Define if you want a BREAK instruction translated to a breakpoint + * Active debugging connection is assumed + * This is for + * https://github.com/seharris/qemu-avr-tests/tree/master/instruction-tests + * tests + */ +#undef BREAKPOINT_ON_BREAK + +#define cpu_pc (tcg_ctx->cpu_pc) +#define cpu_Cf (tcg_ctx->cpu_Cf) +#define cpu_Zf (tcg_ctx->cpu_ZF) +#define cpu_Nf (tcg_ctx->cpu_NF) +#define cpu_Vf (tcg_ctx->cpu_VF) +#define cpu_Sf (tcg_ctx->cpu_Sf) +#define cpu_Hf (tcg_ctx->cpu_Hf) +#define cpu_Tf (tcg_ctx->cpu_Tf) +#define cpu_If (tcg_ctx->cpu_If) +#define cpu_rampD (tcg_ctx->cpu_rampD) +#define cpu_rampX (tcg_ctx->cpu_rampX) +#define cpu_rampY (tcg_ctx->cpu_rampY) +#define cpu_rampZ (tcg_ctx->cpu_rampZ) +#define cpu_r (tcg_ctx->cpu_gpr) +#define cpu_eind (tcg_ctx->cpu_eind) +#define cpu_sp (tcg_ctx->cpu_sp) +#define cpu_skip (tcg_ctx->cpu_skip) + +static const char reg_names[NUMBER_OF_CPU_REGISTERS][8] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", +}; +#define REG(x) (cpu_r[x]) + +enum { + DISAS_EXIT = DISAS_TARGET_0, /* We want return to the cpu main loop. */ + DISAS_LOOKUP = DISAS_TARGET_1, /* We have a variable condition exit. */ + DISAS_CHAIN = DISAS_TARGET_2, /* We have a single condition exit. */ + DISAS_UC_EXIT = DISAS_TARGET_3, /* Unicorn: special state for exiting in the middle of tb. */ +}; + +typedef struct DisasContext DisasContext; + +/* This is the state at translation time. */ +struct DisasContext { + TranslationBlock *tb; + + CPUAVRState *env; + CPUState *cs; + + target_long npc; + uint32_t opcode; + + /* Routine used to access memory */ + int memidx; + int bstate; + int singlestep; + + /* + * some AVR instructions can make the following instruction to be skipped + * Let's name those instructions + * A - instruction that can skip the next one + * B - instruction that can be skipped. this depends on execution of A + * there are two scenarios + * 1. A and B belong to the same translation block + * 2. A is the last instruction in the translation block and B is the last + * + * following variables are used to simplify the skipping logic, they are + * used in the following manner (sketch) + * + * TCGLabel *skip_label = NULL; + * if (ctx.skip_cond != TCG_COND_NEVER) { + * skip_label = gen_new_label(); + * tcg_gen_brcond_tl(skip_cond, skip_var0, skip_var1, skip_label); + * } + * + * if (free_skip_var0) { + * tcg_temp_free(skip_var0); + * free_skip_var0 = false; + * } + * + * translate(&ctx); + * + * if (skip_label) { + * gen_set_label(skip_label); + * } + */ + TCGv skip_var0; + TCGv skip_var1; + TCGCond skip_cond; + bool free_skip_var0; +}; + +void avr_cpu_tcg_init(struct uc_struct *uc) +{ + int i; + + INIT_TCG_CONTEXT_FROM_UC(uc); + INIT_CPU_ENV_FROM_TCG_CONTEXT(tcg_ctx); + +#define AVR_REG_OFFS(x) offsetof(CPUAVRState, x) + cpu_pc = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(pc_w), "pc"); + cpu_Cf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregC), "Cf"); + cpu_Zf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregZ), "Zf"); + cpu_Nf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregN), "Nf"); + cpu_Vf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregV), "Vf"); + cpu_Sf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregS), "Sf"); + cpu_Hf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregH), "Hf"); + cpu_Tf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregT), "Tf"); + cpu_If = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregI), "If"); + cpu_rampD = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(rampD), "rampD"); + cpu_rampX = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(rampX), "rampX"); + cpu_rampY = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(rampY), "rampY"); + cpu_rampZ = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(rampZ), "rampZ"); + cpu_eind = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(eind), "eind"); + cpu_sp = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sp), "sp"); + cpu_skip = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(skip), "skip"); + + for (i = 0; i < NUMBER_OF_CPU_REGISTERS; i++) { + cpu_r[i] = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(r[i]), + reg_names[i]); + } +#undef AVR_REG_OFFS +} + +static int to_regs_16_31_by_one(DisasContext *ctx, int indx) +{ + return 16 + (indx % 16); +} + +static int to_regs_16_23_by_one(DisasContext *ctx, int indx) +{ + return 16 + (indx % 8); +} + +static int to_regs_24_30_by_two(DisasContext *ctx, int indx) +{ + return 24 + (indx % 4) * 2; +} + +static int to_regs_00_30_by_two(DisasContext *ctx, int indx) +{ + return (indx % 16) * 2; +} + +static uint16_t next_word(DisasContext *ctx) +{ + // Unicorn: + return cpu_lduw_code(ctx->env, avr_code_base(ctx->env) | (ctx->npc++ * 2)); +} + +static int append_16(DisasContext *ctx, int x) +{ + return x << 16 | next_word(ctx); +} + +static bool avr_have_feature(DisasContext *ctx, int feature) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + if (!avr_feature(ctx->env, feature)) { + gen_helper_unsupported(cpu_env); + ctx->bstate = DISAS_NORETURN; + return false; + } + return true; +} + +static bool decode_insn(DisasContext *ctx, uint16_t insn); +#include "decode-insn.c.inc" + +/* + * Arithmetic Instructions + */ + +/* + * Utility functions for updating status registers: + * + * - gen_add_CHf() + * - gen_add_Vf() + * - gen_sub_CHf() + * - gen_sub_Vf() + * - gen_NSf() + * - gen_ZNSf() + * + */ + +static void gen_decl(add_CHf, TCGv R, TCGv Rd, TCGv Rr) +{ + TCGv t1 = tcg_temp_new_i32(); + TCGv t2 = tcg_temp_new_i32(); + TCGv t3 = tcg_temp_new_i32(); + + tcg_gen_and_tl(t1, Rd, Rr); /* t1 = Rd & Rr */ + tcg_gen_andc_tl(t2, Rd, R); /* t2 = Rd & ~R */ + tcg_gen_andc_tl(t3, Rr, R); /* t3 = Rr & ~R */ + tcg_gen_or_tl(t1, t1, t2); /* t1 = t1 | t2 | t3 */ + tcg_gen_or_tl(t1, t1, t3); + + tcg_gen_shri_tl(cpu_Cf, t1, 7); /* Cf = t1(7) */ + tcg_gen_shri_tl(cpu_Hf, t1, 3); /* Hf = t1(3) */ + tcg_gen_andi_tl(cpu_Hf, cpu_Hf, 1); + + tcg_temp_free_i32(t3); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t1); +} + +static void gen_decl(add_Vf, TCGv R, TCGv Rd, TCGv Rr) +{ + TCGv t1 = tcg_temp_new_i32(); + TCGv t2 = tcg_temp_new_i32(); + + /* t1 = Rd & Rr & ~R | ~Rd & ~Rr & R */ + /* = (Rd ^ R) & ~(Rd ^ Rr) */ + tcg_gen_xor_tl(t1, Rd, R); + tcg_gen_xor_tl(t2, Rd, Rr); + tcg_gen_andc_tl(t1, t1, t2); + + tcg_gen_shri_tl(cpu_Vf, t1, 7); /* Vf = t1(7) */ + + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t1); +} + +static void gen_decl(sub_CHf, TCGv R, TCGv Rd, TCGv Rr) +{ + TCGv t1 = tcg_temp_new_i32(); + TCGv t2 = tcg_temp_new_i32(); + TCGv t3 = tcg_temp_new_i32(); + + tcg_gen_not_tl(t1, Rd); /* t1 = ~Rd */ + tcg_gen_and_tl(t2, t1, Rr); /* t2 = ~Rd & Rr */ + tcg_gen_or_tl(t3, t1, Rr); /* t3 = (~Rd | Rr) & R */ + tcg_gen_and_tl(t3, t3, R); + tcg_gen_or_tl(t2, t2, t3); /* t2 = ~Rd & Rr | ~Rd & R | R & Rr */ + + tcg_gen_shri_tl(cpu_Cf, t2, 7); /* Cf = t2(7) */ + tcg_gen_shri_tl(cpu_Hf, t2, 3); /* Hf = t2(3) */ + tcg_gen_andi_tl(cpu_Hf, cpu_Hf, 1); + + tcg_temp_free_i32(t3); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t1); +} + +static void gen_decl(sub_Vf, TCGv R, TCGv Rd, TCGv Rr) +{ + TCGv t1 = tcg_temp_new_i32(); + TCGv t2 = tcg_temp_new_i32(); + + /* t1 = Rd & ~Rr & ~R | ~Rd & Rr & R */ + /* = (Rd ^ R) & (Rd ^ R) */ + tcg_gen_xor_tl(t1, Rd, R); + tcg_gen_xor_tl(t2, Rd, Rr); + tcg_gen_and_tl(t1, t1, t2); + + tcg_gen_shri_tl(cpu_Vf, t1, 7); /* Vf = t1(7) */ + + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t1); +} + +static void gen_decl(NSf, TCGv R) +{ + tcg_gen_shri_tl(cpu_Nf, R, 7); /* Nf = R(7) */ + tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */ +} + +static void gen_decl(ZNSf, TCGv R) +{ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + + /* update status register */ + tcg_gen_shri_tl(cpu_Nf, R, 7); /* Nf = R(7) */ + tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */ +} + +#define gen_add_CHf(...) gen_call(add_CHf, __VA_ARGS__) +#define gen_add_Vf(...) gen_call(add_Vf, __VA_ARGS__) +#define gen_sub_CHf(...) gen_call(sub_CHf, __VA_ARGS__) +#define gen_sub_Vf(...) gen_call(sub_Vf, __VA_ARGS__) +#define gen_NSf(...) gen_call(NSf, __VA_ARGS__) +#define gen_ZNSf(...) gen_call(ZNSf, __VA_ARGS__) + +#define gen_new_label_avr() gen_call(new_label_avr) +#define gen_set_label(...) gen_call(set_label, __VA_ARGS__) + +/* + * Adds two registers without the C Flag and places the result in the + * destination register Rd. + */ +static bool trans_ADD(DisasContext *ctx, arg_ADD *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_add_tl(R, Rd, Rr); /* Rd = Rd + Rr */ + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + gen_add_CHf(R, Rd, Rr); + gen_add_Vf(R, Rd, Rr); + gen_ZNSf(R); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(R); + + return true; +} + +/* + * Adds two registers and the contents of the C Flag and places the result in + * the destination register Rd. + */ +static bool trans_ADC(DisasContext *ctx, arg_ADC *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_add_tl(R, Rd, Rr); /* R = Rd + Rr + Cf */ + tcg_gen_add_tl(R, R, cpu_Cf); + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + gen_add_CHf(R, Rd, Rr); + gen_add_Vf(R, Rd, Rr); + gen_ZNSf(R); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(R); + + return true; +} + +/* + * Adds an immediate value (0 - 63) to a register pair and places the result + * in the register pair. This instruction operates on the upper four register + * pairs, and is well suited for operations on the pointer registers. This + * instruction is not available in all devices. Refer to the device specific + * instruction set summary. + */ +static bool trans_ADIW(DisasContext *ctx, arg_ADIW *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_ADIW_SBIW)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv RdL = cpu_r[a->rd]; + TCGv RdH = cpu_r[a->rd + 1]; + int Imm = (a->imm); + TCGv R = tcg_temp_new_i32(); + TCGv Rd = tcg_temp_new_i32(); + + tcg_gen_deposit_tl(Rd, RdL, RdH, 8, 8); /* Rd = RdH:RdL */ + tcg_gen_addi_tl(R, Rd, Imm); /* R = Rd + Imm */ + tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */ + + /* update status register */ + tcg_gen_andc_tl(cpu_Cf, Rd, R); /* Cf = Rd & ~R */ + tcg_gen_shri_tl(cpu_Cf, cpu_Cf, 15); + tcg_gen_andc_tl(cpu_Vf, R, Rd); /* Vf = R & ~Rd */ + tcg_gen_shri_tl(cpu_Vf, cpu_Vf, 15); + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + tcg_gen_shri_tl(cpu_Nf, R, 15); /* Nf = R(15) */ + tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf);/* Sf = Nf ^ Vf */ + + /* update output registers */ + tcg_gen_andi_tl(RdL, R, 0xff); + tcg_gen_shri_tl(RdH, R, 8); + + tcg_temp_free_i32(Rd); + tcg_temp_free_i32(R); + + return true; +} + +/* + * Subtracts two registers and places the result in the destination + * register Rd. + */ +static bool trans_SUB(DisasContext *ctx, arg_SUB *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr */ + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + tcg_gen_andc_tl(cpu_Cf, Rd, R); /* Cf = Rd & ~R */ + gen_sub_CHf(R, Rd, Rr); + gen_sub_Vf(R, Rd, Rr); + gen_ZNSf(R); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(R); + + return true; +} + +/* + * Subtracts a register and a constant and places the result in the + * destination register Rd. This instruction is working on Register R16 to R31 + * and is very well suited for operations on the X, Y, and Z-pointers. + */ +static bool trans_SUBI(DisasContext *ctx, arg_SUBI *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = tcg_const_i32(a->imm); + TCGv R = tcg_temp_new_i32(); + + tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Imm */ + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + gen_sub_CHf(R, Rd, Rr); + gen_sub_Vf(R, Rd, Rr); + gen_ZNSf(R); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(R); + tcg_temp_free_i32(Rr); + + return true; +} + +/* + * Subtracts two registers and subtracts with the C Flag and places the + * result in the destination register Rd. + */ +static bool trans_SBC(DisasContext *ctx, arg_SBC *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + TCGv zero = tcg_const_i32(0); + + tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr - Cf */ + tcg_gen_sub_tl(R, R, cpu_Cf); + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + gen_sub_CHf(R, Rd, Rr); + gen_sub_Vf(R, Rd, Rr); + gen_NSf(R); + + /* + * Previous value remains unchanged when the result is zero; + * cleared otherwise. + */ + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_Zf, R, zero, cpu_Zf, zero); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(zero); + tcg_temp_free_i32(R); + + return true; +} + +/* + * SBCI -- Subtract Immediate with Carry + */ +static bool trans_SBCI(DisasContext *ctx, arg_SBCI *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = tcg_const_i32(a->imm); + TCGv R = tcg_temp_new_i32(); + TCGv zero = tcg_const_i32(0); + + tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr - Cf */ + tcg_gen_sub_tl(R, R, cpu_Cf); + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + gen_sub_CHf(R, Rd, Rr); + gen_sub_Vf(R, Rd, Rr); + gen_NSf(R); + + /* + * Previous value remains unchanged when the result is zero; + * cleared otherwise. + */ + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_Zf, R, zero, cpu_Zf, zero); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(zero); + tcg_temp_free_i32(R); + tcg_temp_free_i32(Rr); + + return true; +} + +/* + * Subtracts an immediate value (0-63) from a register pair and places the + * result in the register pair. This instruction operates on the upper four + * register pairs, and is well suited for operations on the Pointer Registers. + * This instruction is not available in all devices. Refer to the device + * specific instruction set summary. + */ +static bool trans_SBIW(DisasContext *ctx, arg_SBIW *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_ADIW_SBIW)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv RdL = cpu_r[a->rd]; + TCGv RdH = cpu_r[a->rd + 1]; + int Imm = (a->imm); + TCGv R = tcg_temp_new_i32(); + TCGv Rd = tcg_temp_new_i32(); + + tcg_gen_deposit_tl(Rd, RdL, RdH, 8, 8); /* Rd = RdH:RdL */ + tcg_gen_subi_tl(R, Rd, Imm); /* R = Rd - Imm */ + tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */ + + /* update status register */ + tcg_gen_andc_tl(cpu_Cf, R, Rd); + tcg_gen_shri_tl(cpu_Cf, cpu_Cf, 15); /* Cf = R & ~Rd */ + tcg_gen_andc_tl(cpu_Vf, Rd, R); + tcg_gen_shri_tl(cpu_Vf, cpu_Vf, 15); /* Vf = Rd & ~R */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + tcg_gen_shri_tl(cpu_Nf, R, 15); /* Nf = R(15) */ + tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */ + + /* update output registers */ + tcg_gen_andi_tl(RdL, R, 0xff); + tcg_gen_shri_tl(RdH, R, 8); + + tcg_temp_free_i32(Rd); + tcg_temp_free_i32(R); + + return true; +} + +/* + * Performs the logical AND between the contents of register Rd and register + * Rr and places the result in the destination register Rd. + */ +static bool trans_AND(DisasContext *ctx, arg_AND *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_and_tl(R, Rd, Rr); /* Rd = Rd and Rr */ + + /* update status register */ + tcg_gen_movi_tl(cpu_Vf, 0); /* Vf = 0 */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + gen_ZNSf(R); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(R); + + return true; +} + +/* + * Performs the logical AND between the contents of register Rd and a constant + * and places the result in the destination register Rd. + */ +static bool trans_ANDI(DisasContext *ctx, arg_ANDI *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + int Imm = (a->imm); + + tcg_gen_andi_tl(Rd, Rd, Imm); /* Rd = Rd & Imm */ + + /* update status register */ + tcg_gen_movi_tl(cpu_Vf, 0x00); /* Vf = 0 */ + gen_ZNSf(Rd); + + return true; +} + +/* + * Performs the logical OR between the contents of register Rd and register + * Rr and places the result in the destination register Rd. + */ +static bool trans_OR(DisasContext *ctx, arg_OR *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_or_tl(R, Rd, Rr); + + /* update status register */ + tcg_gen_movi_tl(cpu_Vf, 0); + gen_ZNSf(R); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(R); + + return true; +} + +/* + * Performs the logical OR between the contents of register Rd and a + * constant and places the result in the destination register Rd. + */ +static bool trans_ORI(DisasContext *ctx, arg_ORI *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + int Imm = (a->imm); + + tcg_gen_ori_tl(Rd, Rd, Imm); /* Rd = Rd | Imm */ + + /* update status register */ + tcg_gen_movi_tl(cpu_Vf, 0x00); /* Vf = 0 */ + gen_ZNSf(Rd); + + return true; +} + +/* + * Performs the logical EOR between the contents of register Rd and + * register Rr and places the result in the destination register Rd. + */ +static bool trans_EOR(DisasContext *ctx, arg_EOR *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + + tcg_gen_xor_tl(Rd, Rd, Rr); + + /* update status register */ + tcg_gen_movi_tl(cpu_Vf, 0); + gen_ZNSf(Rd); + + return true; +} + +/* + * Clears the specified bits in register Rd. Performs the logical AND + * between the contents of register Rd and the complement of the constant mask + * K. The result will be placed in register Rd. + */ +static bool trans_COM(DisasContext *ctx, arg_COM *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_xori_tl(Rd, Rd, 0xff); + + /* update status register */ + tcg_gen_movi_tl(cpu_Cf, 1); /* Cf = 1 */ + tcg_gen_movi_tl(cpu_Vf, 0); /* Vf = 0 */ + gen_ZNSf(Rd); + + tcg_temp_free_i32(R); + + return true; +} + +/* + * Replaces the contents of register Rd with its two's complement; the + * value $80 is left unchanged. + */ +static bool trans_NEG(DisasContext *ctx, arg_NEG *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv t0 = tcg_const_i32(0); + TCGv R = tcg_temp_new_i32(); + + tcg_gen_sub_tl(R, t0, Rd); /* R = 0 - Rd */ + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + gen_sub_CHf(R, t0, Rd); + gen_sub_Vf(R, t0, Rd); + gen_ZNSf(R); + + /* update output registers */ + tcg_gen_mov_tl(Rd, R); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(R); + + return true; +} + +/* + * Adds one -1- to the contents of register Rd and places the result in the + * destination register Rd. The C Flag in SREG is not affected by the + * operation, thus allowing the INC instruction to be used on a loop counter in + * multiple-precision computations. When operating on unsigned numbers, only + * BREQ and BRNE branches can be expected to perform consistently. When + * operating on two's complement values, all signed branches are available. + */ +static bool trans_INC(DisasContext *ctx, arg_INC *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + + tcg_gen_addi_tl(Rd, Rd, 1); + tcg_gen_andi_tl(Rd, Rd, 0xff); + + /* update status register */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Vf, Rd, 0x80); /* Vf = Rd == 0x80 */ + gen_ZNSf(Rd); + + return true; +} + +/* + * Subtracts one -1- from the contents of register Rd and places the result + * in the destination register Rd. The C Flag in SREG is not affected by the + * operation, thus allowing the DEC instruction to be used on a loop counter in + * multiple-precision computations. When operating on unsigned values, only + * BREQ and BRNE branches can be expected to perform consistently. When + * operating on two's complement values, all signed branches are available. + */ +static bool trans_DEC(DisasContext *ctx, arg_DEC *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + + tcg_gen_subi_tl(Rd, Rd, 1); /* Rd = Rd - 1 */ + tcg_gen_andi_tl(Rd, Rd, 0xff); /* make it 8 bits */ + + /* update status register */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Vf, Rd, 0x7f); /* Vf = Rd == 0x7f */ + gen_ZNSf(Rd); + + return true; +} + +/* + * This instruction performs 8-bit x 8-bit -> 16-bit unsigned multiplication. + */ +static bool trans_MUL(DisasContext *ctx, arg_MUL *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv R0 = cpu_r[0]; + TCGv R1 = cpu_r[1]; + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_mul_tl(R, Rd, Rr); /* R = Rd * Rr */ + tcg_gen_andi_tl(R0, R, 0xff); + tcg_gen_shri_tl(R1, R, 8); + + /* update status register */ + tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + + tcg_temp_free_i32(R); + + return true; +} + +/* + * This instruction performs 8-bit x 8-bit -> 16-bit signed multiplication. + */ +static bool trans_MULS(DisasContext *ctx, arg_MULS *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv R0 = cpu_r[0]; + TCGv R1 = cpu_r[1]; + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + TCGv t0 = tcg_temp_new_i32(); + TCGv t1 = tcg_temp_new_i32(); + + tcg_gen_ext8s_tl(t0, Rd); /* make Rd full 32 bit signed */ + tcg_gen_ext8s_tl(t1, Rr); /* make Rr full 32 bit signed */ + tcg_gen_mul_tl(R, t0, t1); /* R = Rd * Rr */ + tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */ + tcg_gen_andi_tl(R0, R, 0xff); + tcg_gen_shri_tl(R1, R, 8); + + /* update status register */ + tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(R); + + return true; +} + +/* + * This instruction performs 8-bit x 8-bit -> 16-bit multiplication of a + * signed and an unsigned number. + */ +static bool trans_MULSU(DisasContext *ctx, arg_MULSU *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv R0 = cpu_r[0]; + TCGv R1 = cpu_r[1]; + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + TCGv t0 = tcg_temp_new_i32(); + + tcg_gen_ext8s_tl(t0, Rd); /* make Rd full 32 bit signed */ + tcg_gen_mul_tl(R, t0, Rr); /* R = Rd * Rr */ + tcg_gen_andi_tl(R, R, 0xffff); /* make R 16 bits */ + tcg_gen_andi_tl(R0, R, 0xff); + tcg_gen_shri_tl(R1, R, 8); + + /* update status register */ + tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(R); + + return true; +} + +/* + * This instruction performs 8-bit x 8-bit -> 16-bit unsigned + * multiplication and shifts the result one bit left. + */ +static bool trans_FMUL(DisasContext *ctx, arg_FMUL *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv R0 = cpu_r[0]; + TCGv R1 = cpu_r[1]; + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_mul_tl(R, Rd, Rr); /* R = Rd * Rr */ + + /* update status register */ + tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + + /* update output registers */ + tcg_gen_shli_tl(R, R, 1); + tcg_gen_andi_tl(R0, R, 0xff); + tcg_gen_shri_tl(R1, R, 8); + tcg_gen_andi_tl(R1, R1, 0xff); + + + tcg_temp_free_i32(R); + + return true; +} + +/* + * This instruction performs 8-bit x 8-bit -> 16-bit signed multiplication + * and shifts the result one bit left. + */ +static bool trans_FMULS(DisasContext *ctx, arg_FMULS *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv R0 = cpu_r[0]; + TCGv R1 = cpu_r[1]; + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + TCGv t0 = tcg_temp_new_i32(); + TCGv t1 = tcg_temp_new_i32(); + + tcg_gen_ext8s_tl(t0, Rd); /* make Rd full 32 bit signed */ + tcg_gen_ext8s_tl(t1, Rr); /* make Rr full 32 bit signed */ + tcg_gen_mul_tl(R, t0, t1); /* R = Rd * Rr */ + tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */ + + /* update status register */ + tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + + /* update output registers */ + tcg_gen_shli_tl(R, R, 1); + tcg_gen_andi_tl(R0, R, 0xff); + tcg_gen_shri_tl(R1, R, 8); + tcg_gen_andi_tl(R1, R1, 0xff); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(R); + + return true; +} + +/* + * This instruction performs 8-bit x 8-bit -> 16-bit signed multiplication + * and shifts the result one bit left. + */ +static bool trans_FMULSU(DisasContext *ctx, arg_FMULSU *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv R0 = cpu_r[0]; + TCGv R1 = cpu_r[1]; + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + TCGv t0 = tcg_temp_new_i32(); + + tcg_gen_ext8s_tl(t0, Rd); /* make Rd full 32 bit signed */ + tcg_gen_mul_tl(R, t0, Rr); /* R = Rd * Rr */ + tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */ + + /* update status register */ + tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + + /* update output registers */ + tcg_gen_shli_tl(R, R, 1); + tcg_gen_andi_tl(R0, R, 0xff); + tcg_gen_shri_tl(R1, R, 8); + tcg_gen_andi_tl(R1, R1, 0xff); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(R); + + return true; +} + +/* + * The module is an instruction set extension to the AVR CPU, performing + * DES iterations. The 64-bit data block (plaintext or ciphertext) is placed in + * the CPU register file, registers R0-R7, where LSB of data is placed in LSB + * of R0 and MSB of data is placed in MSB of R7. The full 64-bit key (including + * parity bits) is placed in registers R8- R15, organized in the register file + * with LSB of key in LSB of R8 and MSB of key in MSB of R15. Executing one DES + * instruction performs one round in the DES algorithm. Sixteen rounds must be + * executed in increasing order to form the correct DES ciphertext or + * plaintext. Intermediate results are stored in the register file (R0-R15) + * after each DES instruction. The instruction's operand (K) determines which + * round is executed, and the half carry flag (H) determines whether encryption + * or decryption is performed. The DES algorithm is described in + * "Specifications for the Data Encryption Standard" (Federal Information + * Processing Standards Publication 46). Intermediate results in this + * implementation differ from the standard because the initial permutation and + * the inverse initial permutation are performed each iteration. This does not + * affect the result in the final ciphertext or plaintext, but reduces + * execution time. + */ +static bool trans_DES(DisasContext *ctx, arg_DES *a) +{ + /* TODO */ + if (!avr_have_feature(ctx, AVR_FEATURE_DES)) { + return true; + } + + qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__); + + return true; +} + +/* + * Branch Instructions + */ +static void gen_jmp_ez(DisasContext *ctx) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8); + tcg_gen_or_tl(cpu_pc, cpu_pc, cpu_eind); + ctx->bstate = DISAS_LOOKUP; +} + +static void gen_jmp_z(DisasContext *ctx) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8); + ctx->bstate = DISAS_LOOKUP; +} + +static void gen_push_ret(DisasContext *ctx, int ret) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + if (avr_feature(ctx->env, AVR_FEATURE_1_BYTE_PC)) { + + TCGv t0 = tcg_const_i32((ret & 0x0000ff)); + + tcg_gen_qemu_st_tl(t0, cpu_sp, MMU_DATA_IDX, MO_UB); + tcg_gen_subi_tl(cpu_sp, cpu_sp, 1); + + tcg_temp_free_i32(t0); + } else if (avr_feature(ctx->env, AVR_FEATURE_2_BYTE_PC)) { + + TCGv t0 = tcg_const_i32((ret & 0x00ffff)); + + tcg_gen_subi_tl(cpu_sp, cpu_sp, 1); + tcg_gen_qemu_st_tl(t0, cpu_sp, MMU_DATA_IDX, MO_BEUW); + tcg_gen_subi_tl(cpu_sp, cpu_sp, 1); + + tcg_temp_free_i32(t0); + + } else if (avr_feature(ctx->env, AVR_FEATURE_3_BYTE_PC)) { + + TCGv lo = tcg_const_i32((ret & 0x0000ff)); + TCGv hi = tcg_const_i32((ret & 0xffff00) >> 8); + + tcg_gen_qemu_st_tl(lo, cpu_sp, MMU_DATA_IDX, MO_UB); + tcg_gen_subi_tl(cpu_sp, cpu_sp, 2); + tcg_gen_qemu_st_tl(hi, cpu_sp, MMU_DATA_IDX, MO_BEUW); + tcg_gen_subi_tl(cpu_sp, cpu_sp, 1); + + tcg_temp_free_i32(lo); + tcg_temp_free_i32(hi); + } +} + +static void gen_pop_ret(DisasContext *ctx, TCGv ret) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + if (avr_feature(ctx->env, AVR_FEATURE_1_BYTE_PC)) { + tcg_gen_addi_tl(cpu_sp, cpu_sp, 1); + tcg_gen_qemu_ld_tl(ret, cpu_sp, MMU_DATA_IDX, MO_UB); + } else if (avr_feature(ctx->env, AVR_FEATURE_2_BYTE_PC)) { + tcg_gen_addi_tl(cpu_sp, cpu_sp, 1); + tcg_gen_qemu_ld_tl(ret, cpu_sp, MMU_DATA_IDX, MO_BEUW); + tcg_gen_addi_tl(cpu_sp, cpu_sp, 1); + } else if (avr_feature(ctx->env, AVR_FEATURE_3_BYTE_PC)) { + TCGv lo = tcg_temp_new_i32(); + TCGv hi = tcg_temp_new_i32(); + + tcg_gen_addi_tl(cpu_sp, cpu_sp, 1); + tcg_gen_qemu_ld_tl(hi, cpu_sp, MMU_DATA_IDX, MO_BEUW); + + tcg_gen_addi_tl(cpu_sp, cpu_sp, 2); + tcg_gen_qemu_ld_tl(lo, cpu_sp, MMU_DATA_IDX, MO_UB); + + tcg_gen_deposit_tl(ret, lo, hi, 8, 16); + + tcg_temp_free_i32(lo); + tcg_temp_free_i32(hi); + } +} + +static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + TranslationBlock *tb = ctx->tb; + + if (ctx->singlestep == 0) { + tcg_gen_goto_tb(n); + tcg_gen_movi_i32(cpu_pc, dest); + tcg_gen_exit_tb(tb, n); + } else { + tcg_gen_movi_i32(cpu_pc, dest); + gen_helper_debug(cpu_env); + tcg_gen_exit_tb(NULL, 0); + } + ctx->bstate = DISAS_NORETURN; +} + +/* + * Relative jump to an address within PC - 2K +1 and PC + 2K (words). For + * AVR microcontrollers with Program memory not exceeding 4K words (8KB) this + * instruction can address the entire memory from every address location. See + * also JMP. + */ +static bool trans_RJMP(DisasContext *ctx, arg_RJMP *a) +{ + int dst = ctx->npc + a->imm; + + gen_goto_tb(ctx, 0, dst); + + return true; +} + +/* + * Indirect jump to the address pointed to by the Z (16 bits) Pointer + * Register in the Register File. The Z-pointer Register is 16 bits wide and + * allows jump within the lowest 64K words (128KB) section of Program memory. + * This instruction is not available in all devices. Refer to the device + * specific instruction set summary. + */ +static bool trans_IJMP(DisasContext *ctx, arg_IJMP *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_IJMP_ICALL)) { + return true; + } + + gen_jmp_z(ctx); + + return true; +} + +/* + * Indirect jump to the address pointed to by the Z (16 bits) Pointer + * Register in the Register File and the EIND Register in the I/O space. This + * instruction allows for indirect jumps to the entire 4M (words) Program + * memory space. See also IJMP. This instruction is not available in all + * devices. Refer to the device specific instruction set summary. + */ +static bool trans_EIJMP(DisasContext *ctx, arg_EIJMP *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_EIJMP_EICALL)) { + return true; + } + + gen_jmp_ez(ctx); + return true; +} + +/* + * Jump to an address within the entire 4M (words) Program memory. See also + * RJMP. This instruction is not available in all devices. Refer to the device + * specific instruction set summary.0 + */ +static bool trans_JMP(DisasContext *ctx, arg_JMP *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_JMP_CALL)) { + return true; + } + + gen_goto_tb(ctx, 0, a->imm); + + return true; +} + +/* + * Relative call to an address within PC - 2K + 1 and PC + 2K (words). The + * return address (the instruction after the RCALL) is stored onto the Stack. + * See also CALL. For AVR microcontrollers with Program memory not exceeding 4K + * words (8KB) this instruction can address the entire memory from every + * address location. The Stack Pointer uses a post-decrement scheme during + * RCALL. + */ +static bool trans_RCALL(DisasContext *ctx, arg_RCALL *a) +{ + int ret = ctx->npc; + int dst = ctx->npc + a->imm; + + gen_push_ret(ctx, ret); + gen_goto_tb(ctx, 0, dst); + + return true; +} + +/* + * Calls to a subroutine within the entire 4M (words) Program memory. The + * return address (to the instruction after the CALL) will be stored onto the + * Stack. See also RCALL. The Stack Pointer uses a post-decrement scheme during + * CALL. This instruction is not available in all devices. Refer to the device + * specific instruction set summary. + */ +static bool trans_ICALL(DisasContext *ctx, arg_ICALL *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_IJMP_ICALL)) { + return true; + } + + int ret = ctx->npc; + + gen_push_ret(ctx, ret); + gen_jmp_z(ctx); + + return true; +} + +/* + * Indirect call of a subroutine pointed to by the Z (16 bits) Pointer + * Register in the Register File and the EIND Register in the I/O space. This + * instruction allows for indirect calls to the entire 4M (words) Program + * memory space. See also ICALL. The Stack Pointer uses a post-decrement scheme + * during EICALL. This instruction is not available in all devices. Refer to + * the device specific instruction set summary. + */ +static bool trans_EICALL(DisasContext *ctx, arg_EICALL *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_EIJMP_EICALL)) { + return true; + } + + int ret = ctx->npc; + + gen_push_ret(ctx, ret); + gen_jmp_ez(ctx); + return true; +} + +/* + * Calls to a subroutine within the entire Program memory. The return + * address (to the instruction after the CALL) will be stored onto the Stack. + * (See also RCALL). The Stack Pointer uses a post-decrement scheme during + * CALL. This instruction is not available in all devices. Refer to the device + * specific instruction set summary. + */ +static bool trans_CALL(DisasContext *ctx, arg_CALL *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_JMP_CALL)) { + return true; + } + + int Imm = a->imm; + int ret = ctx->npc; + + gen_push_ret(ctx, ret); + gen_goto_tb(ctx, 0, Imm); + + return true; +} + +/* + * Returns from subroutine. The return address is loaded from the STACK. + * The Stack Pointer uses a preincrement scheme during RET. + */ +static bool trans_RET(DisasContext *ctx, arg_RET *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + gen_pop_ret(ctx, cpu_pc); + + ctx->bstate = DISAS_LOOKUP; + return true; +} + +/* + * Returns from interrupt. The return address is loaded from the STACK and + * the Global Interrupt Flag is set. Note that the Status Register is not + * automatically stored when entering an interrupt routine, and it is not + * restored when returning from an interrupt routine. This must be handled by + * the application program. The Stack Pointer uses a pre-increment scheme + * during RETI. + */ +static bool trans_RETI(DisasContext *ctx, arg_RETI *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + gen_pop_ret(ctx, cpu_pc); + tcg_gen_movi_tl(cpu_If, 1); + + /* Need to return to main loop to re-evaluate interrupts. */ + ctx->bstate = DISAS_EXIT; + return true; +} + +/* + * This instruction performs a compare between two registers Rd and Rr, and + * skips the next instruction if Rd = Rr. + */ +static bool trans_CPSE(DisasContext *ctx, arg_CPSE *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + ctx->skip_cond = TCG_COND_EQ; + ctx->skip_var0 = cpu_r[a->rd]; + ctx->skip_var1 = cpu_r[a->rr]; + return true; +} + +/* + * This instruction performs a compare between two registers Rd and Rr. + * None of the registers are changed. All conditional branches can be used + * after this instruction. + */ +static bool trans_CP(DisasContext *ctx, arg_CP *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + + tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr */ + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + gen_sub_CHf(R, Rd, Rr); + gen_sub_Vf(R, Rd, Rr); + gen_ZNSf(R); + + tcg_temp_free_i32(R); + + return true; +} + +/* + * This instruction performs a compare between two registers Rd and Rr and + * also takes into account the previous carry. None of the registers are + * changed. All conditional branches can be used after this instruction. + */ +static bool trans_CPC(DisasContext *ctx, arg_CPC *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + TCGv R = tcg_temp_new_i32(); + TCGv zero = tcg_const_i32(0); + + tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr - Cf */ + tcg_gen_sub_tl(R, R, cpu_Cf); + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + /* update status register */ + gen_sub_CHf(R, Rd, Rr); + gen_sub_Vf(R, Rd, Rr); + gen_NSf(R); + + /* + * Previous value remains unchanged when the result is zero; + * cleared otherwise. + */ + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_Zf, R, zero, cpu_Zf, zero); + + tcg_temp_free_i32(zero); + tcg_temp_free_i32(R); + + return true; +} + +/* + * This instruction performs a compare between register Rd and a constant. + * The register is not changed. All conditional branches can be used after this + * instruction. + */ +static bool trans_CPI(DisasContext *ctx, arg_CPI *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + int Imm = a->imm; + TCGv Rr = tcg_const_i32(Imm); + TCGv R = tcg_temp_new_i32(); + + tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr */ + tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */ + + /* update status register */ + gen_sub_CHf(R, Rd, Rr); + gen_sub_Vf(R, Rd, Rr); + gen_ZNSf(R); + + tcg_temp_free_i32(R); + tcg_temp_free_i32(Rr); + + return true; +} + +/* + * This instruction tests a single bit in a register and skips the next + * instruction if the bit is cleared. + */ +static bool trans_SBRC(DisasContext *ctx, arg_SBRC *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rr = cpu_r[a->rr]; + + ctx->skip_cond = TCG_COND_EQ; + ctx->skip_var0 = tcg_temp_new(); + ctx->free_skip_var0 = true; + + tcg_gen_andi_tl(ctx->skip_var0, Rr, 1 << a->bit); + return true; +} + +/* + * This instruction tests a single bit in a register and skips the next + * instruction if the bit is set. + */ +static bool trans_SBRS(DisasContext *ctx, arg_SBRS *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rr = cpu_r[a->rr]; + + ctx->skip_cond = TCG_COND_NE; + ctx->skip_var0 = tcg_temp_new(); + ctx->free_skip_var0 = true; + + tcg_gen_andi_tl(ctx->skip_var0, Rr, 1 << a->bit); + return true; +} + +/* + * This instruction tests a single bit in an I/O Register and skips the + * next instruction if the bit is cleared. This instruction operates on the + * lower 32 I/O Registers -- addresses 0-31. + */ +static bool trans_SBIC(DisasContext *ctx, arg_SBIC *a) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + TCGv temp = tcg_const_i32(a->reg); + + gen_helper_inb(temp, cpu_env, temp); + tcg_gen_andi_tl(temp, temp, 1 << a->bit); + ctx->skip_cond = TCG_COND_EQ; + ctx->skip_var0 = temp; + ctx->free_skip_var0 = true; + + return true; +} + +/* + * This instruction tests a single bit in an I/O Register and skips the + * next instruction if the bit is set. This instruction operates on the lower + * 32 I/O Registers -- addresses 0-31. + */ +static bool trans_SBIS(DisasContext *ctx, arg_SBIS *a) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + TCGv temp = tcg_const_i32(a->reg); + + gen_helper_inb(temp, cpu_env, temp); + tcg_gen_andi_tl(temp, temp, 1 << a->bit); + ctx->skip_cond = TCG_COND_NE; + ctx->skip_var0 = temp; + ctx->free_skip_var0 = true; + + return true; +} + +/* + * Conditional relative branch. Tests a single bit in SREG and branches + * relatively to PC if the bit is cleared. This instruction branches relatively + * to PC in either direction (PC - 63 < = destination <= PC + 64). The + * parameter k is the offset from PC and is represented in two's complement + * form. + */ +static bool trans_BRBC(DisasContext *ctx, arg_BRBC *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGLabel *not_taken = gen_new_label(); + + TCGv var; + + switch (a->bit) { + case 0x00: + var = cpu_Cf; + break; + case 0x01: + var = cpu_Zf; + break; + case 0x02: + var = cpu_Nf; + break; + case 0x03: + var = cpu_Vf; + break; + case 0x04: + var = cpu_Sf; + break; + case 0x05: + var = cpu_Hf; + break; + case 0x06: + var = cpu_Tf; + break; + case 0x07: + var = cpu_If; + break; + default: + g_assert_not_reached(); + } + + tcg_gen_brcondi_i32(TCG_COND_NE, var, 0, not_taken); + gen_goto_tb(ctx, 0, ctx->npc + a->imm); + gen_set_label(not_taken); + + ctx->bstate = DISAS_CHAIN; + return true; +} + +/* + * Conditional relative branch. Tests a single bit in SREG and branches + * relatively to PC if the bit is set. This instruction branches relatively to + * PC in either direction (PC - 63 < = destination <= PC + 64). The parameter k + * is the offset from PC and is represented in two's complement form. + */ +static bool trans_BRBS(DisasContext *ctx, arg_BRBS *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGLabel *not_taken = gen_new_label(); + + TCGv var; + + switch (a->bit) { + case 0x00: + var = cpu_Cf; + break; + case 0x01: + var = cpu_Zf; + break; + case 0x02: + var = cpu_Nf; + break; + case 0x03: + var = cpu_Vf; + break; + case 0x04: + var = cpu_Sf; + break; + case 0x05: + var = cpu_Hf; + break; + case 0x06: + var = cpu_Tf; + break; + case 0x07: + var = cpu_If; + break; + default: + g_assert_not_reached(); + } + + tcg_gen_brcondi_i32(TCG_COND_EQ, var, 0, not_taken); + gen_goto_tb(ctx, 0, ctx->npc + a->imm); + gen_set_label(not_taken); + + ctx->bstate = DISAS_CHAIN; + return true; +} + +/* + * Data Transfer Instructions + */ + +/* + * in the gen_set_addr & gen_get_addr functions + * H assumed to be in 0x00ff0000 format + * M assumed to be in 0x000000ff format + * L assumed to be in 0x000000ff format + */ +static void gen_decl(set_addr, TCGv addr, TCGv H, TCGv M, TCGv L) +{ + tcg_gen_andi_tl(L, addr, 0x000000ff); + + tcg_gen_andi_tl(M, addr, 0x0000ff00); + tcg_gen_shri_tl(M, M, 8); + + tcg_gen_andi_tl(H, addr, 0x00ff0000); +} + +static void gen_set_xaddr(TCGContext *tcg_ctx, TCGv addr) +{ + gen_set_addr(tcg_ctx, addr, cpu_rampX, cpu_r[27], cpu_r[26]); +} + +static void gen_set_yaddr(TCGContext *tcg_ctx, TCGv addr) +{ + gen_set_addr(tcg_ctx, addr, cpu_rampY, cpu_r[29], cpu_r[28]); +} + +static void gen_set_zaddr(TCGContext *tcg_ctx, TCGv addr) +{ + gen_set_addr(tcg_ctx, addr, cpu_rampZ, cpu_r[31], cpu_r[30]); +} + +static TCGv gen_decl(get_addr, TCGv H, TCGv M, TCGv L) +{ + TCGv addr = tcg_temp_new_i32(); + + tcg_gen_deposit_tl(addr, M, H, 8, 8); + tcg_gen_deposit_tl(addr, L, addr, 8, 16); + + return addr; +} + +static TCGv gen_get_xaddr(TCGContext *tcg_ctx) +{ + return gen_get_addr(tcg_ctx, cpu_rampX, cpu_r[27], cpu_r[26]); +} + +static TCGv gen_get_yaddr(TCGContext *tcg_ctx) +{ + return gen_get_addr(tcg_ctx, cpu_rampY, cpu_r[29], cpu_r[28]); +} + +static TCGv gen_get_zaddr(TCGContext *tcg_ctx) +{ + return gen_get_addr(tcg_ctx, cpu_rampZ, cpu_r[31], cpu_r[30]); +} + +#define gen_set_xaddr(...) gen_call(set_xaddr, __VA_ARGS__) +#define gen_set_yaddr(...) gen_call(set_yaddr, __VA_ARGS__) +#define gen_set_zaddr(...) gen_call(set_zaddr, __VA_ARGS__) +#define gen_get_xaddr() gen_call(get_xaddr) +#define gen_get_yaddr() gen_call(get_yaddr) +#define gen_get_zaddr() gen_call(get_zaddr) + +/* + * Load one byte indirect from data space to register and stores an clear + * the bits in data space specified by the register. The instruction can only + * be used towards internal SRAM. The data location is pointed to by the Z (16 + * bits) Pointer Register in the Register File. Memory access is limited to the + * current data segment of 64KB. To access another data segment in devices with + * more than 64KB data space, the RAMPZ in register in the I/O area has to be + * changed. The Z-pointer Register is left unchanged by the operation. This + * instruction is especially suited for clearing status bits stored in SRAM. + */ +static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) { + gen_helper_fullwr(cpu_env, data, addr); + } else { + tcg_gen_qemu_st8(data, addr, MMU_DATA_IDX); /* mem[addr] = data */ + } +} + +static void gen_data_load(DisasContext *ctx, TCGv data, TCGv addr) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) { + gen_helper_fullrd(data, cpu_env, addr); + } else { + tcg_gen_qemu_ld8u(data, addr, MMU_DATA_IDX); /* data = mem[addr] */ + } +} + +static void gen_code_load(DisasContext *ctx, TCGv Rd, TCGv addr) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + // Unicorn: + const uint32_t code_base = avr_code_base(ctx->env); + if (code_base) { + TCGv Rc = tcg_const_i32(code_base); + tcg_gen_or_tl(addr, addr, Rc); + tcg_temp_free_i32(Rc); + } + tcg_gen_qemu_ld8u(Rd, addr, MMU_CODE_IDX); /* Rd = mem[addr] */ +} + +/* + * This instruction makes a copy of one register into another. The source + * register Rr is left unchanged, while the destination register Rd is loaded + * with a copy of Rr. + */ +static bool trans_MOV(DisasContext *ctx, arg_MOV *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv Rr = cpu_r[a->rr]; + + tcg_gen_mov_tl(Rd, Rr); + + return true; +} + +/* + * This instruction makes a copy of one register pair into another register + * pair. The source register pair Rr+1:Rr is left unchanged, while the + * destination register pair Rd+1:Rd is loaded with a copy of Rr + 1:Rr. This + * instruction is not available in all devices. Refer to the device specific + * instruction set summary. + */ +static bool trans_MOVW(DisasContext *ctx, arg_MOVW *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_MOVW)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv RdL = cpu_r[a->rd]; + TCGv RdH = cpu_r[a->rd + 1]; + TCGv RrL = cpu_r[a->rr]; + TCGv RrH = cpu_r[a->rr + 1]; + + tcg_gen_mov_tl(RdH, RrH); + tcg_gen_mov_tl(RdL, RrL); + + return true; +} + +/* + * Loads an 8 bit constant directly to register 16 to 31. + */ +static bool trans_LDI(DisasContext *ctx, arg_LDI *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + int imm = a->imm; + + tcg_gen_movi_tl(Rd, imm); + + return true; +} + +/* + * Loads one byte from the data space to a register. For parts with SRAM, + * the data space consists of the Register File, I/O memory and internal SRAM + * (and external SRAM if applicable). For parts without SRAM, the data space + * consists of the register file only. The EEPROM has a separate address space. + * A 16-bit address must be supplied. Memory access is limited to the current + * data segment of 64KB. The LDS instruction uses the RAMPD Register to access + * memory above 64KB. To access another data segment in devices with more than + * 64KB data space, the RAMPD in register in the I/O area has to be changed. + * This instruction is not available in all devices. Refer to the device + * specific instruction set summary. + */ +static bool trans_LDS(DisasContext *ctx, arg_LDS *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = tcg_temp_new_i32(); + TCGv H = cpu_rampD; + a->imm = next_word(ctx); + + tcg_gen_mov_tl(addr, H); /* addr = H:M:L */ + tcg_gen_shli_tl(addr, addr, 16); + tcg_gen_ori_tl(addr, addr, a->imm); + + gen_data_load(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Loads one byte indirect from the data space to a register. For parts + * with SRAM, the data space consists of the Register File, I/O memory and + * internal SRAM (and external SRAM if applicable). For parts without SRAM, the + * data space consists of the Register File only. In some parts the Flash + * Memory has been mapped to the data space and can be read using this command. + * The EEPROM has a separate address space. The data location is pointed to by + * the X (16 bits) Pointer Register in the Register File. Memory access is + * limited to the current data segment of 64KB. To access another data segment + * in devices with more than 64KB data space, the RAMPX in register in the I/O + * area has to be changed. The X-pointer Register can either be left unchanged + * by the operation, or it can be post-incremented or predecremented. These + * features are especially suited for accessing arrays, tables, and Stack + * Pointer usage of the X-pointer Register. Note that only the low byte of the + * X-pointer is updated in devices with no more than 256 bytes data space. For + * such devices, the high byte of the pointer is not used by this instruction + * and can be used for other purposes. The RAMPX Register in the I/O area is + * updated in parts with more than 64KB data space or more than 64KB Program + * memory, and the increment/decrement is added to the entire 24-bit address on + * such devices. Not all variants of this instruction is available in all + * devices. Refer to the device specific instruction set summary. In the + * Reduced Core tinyAVR the LD instruction can be used to achieve the same + * operation as LPM since the program memory is mapped to the data memory + * space. + */ +static bool trans_LDX1(DisasContext *ctx, arg_LDX1 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_xaddr(); + + gen_data_load(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_LDX2(DisasContext *ctx, arg_LDX2 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_xaddr(); + + gen_data_load(ctx, Rd, addr); + tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */ + + gen_set_xaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_LDX3(DisasContext *ctx, arg_LDX3 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_xaddr(); + + tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */ + gen_data_load(ctx, Rd, addr); + gen_set_xaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Loads one byte indirect with or without displacement from the data space + * to a register. For parts with SRAM, the data space consists of the Register + * File, I/O memory and internal SRAM (and external SRAM if applicable). For + * parts without SRAM, the data space consists of the Register File only. In + * some parts the Flash Memory has been mapped to the data space and can be + * read using this command. The EEPROM has a separate address space. The data + * location is pointed to by the Y (16 bits) Pointer Register in the Register + * File. Memory access is limited to the current data segment of 64KB. To + * access another data segment in devices with more than 64KB data space, the + * RAMPY in register in the I/O area has to be changed. The Y-pointer Register + * can either be left unchanged by the operation, or it can be post-incremented + * or predecremented. These features are especially suited for accessing + * arrays, tables, and Stack Pointer usage of the Y-pointer Register. Note that + * only the low byte of the Y-pointer is updated in devices with no more than + * 256 bytes data space. For such devices, the high byte of the pointer is not + * used by this instruction and can be used for other purposes. The RAMPY + * Register in the I/O area is updated in parts with more than 64KB data space + * or more than 64KB Program memory, and the increment/decrement/displacement + * is added to the entire 24-bit address on such devices. Not all variants of + * this instruction is available in all devices. Refer to the device specific + * instruction set summary. In the Reduced Core tinyAVR the LD instruction can + * be used to achieve the same operation as LPM since the program memory is + * mapped to the data memory space. + */ +static bool trans_LDY2(DisasContext *ctx, arg_LDY2 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_yaddr(); + + gen_data_load(ctx, Rd, addr); + tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */ + + gen_set_yaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_LDY3(DisasContext *ctx, arg_LDY3 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_yaddr(); + + tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */ + gen_data_load(ctx, Rd, addr); + gen_set_yaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_LDDY(DisasContext *ctx, arg_LDDY *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_yaddr(); + + tcg_gen_addi_tl(addr, addr, a->imm); /* addr = addr + q */ + gen_data_load(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Loads one byte indirect with or without displacement from the data space + * to a register. For parts with SRAM, the data space consists of the Register + * File, I/O memory and internal SRAM (and external SRAM if applicable). For + * parts without SRAM, the data space consists of the Register File only. In + * some parts the Flash Memory has been mapped to the data space and can be + * read using this command. The EEPROM has a separate address space. The data + * location is pointed to by the Z (16 bits) Pointer Register in the Register + * File. Memory access is limited to the current data segment of 64KB. To + * access another data segment in devices with more than 64KB data space, the + * RAMPZ in register in the I/O area has to be changed. The Z-pointer Register + * can either be left unchanged by the operation, or it can be post-incremented + * or predecremented. These features are especially suited for Stack Pointer + * usage of the Z-pointer Register, however because the Z-pointer Register can + * be used for indirect subroutine calls, indirect jumps and table lookup, it + * is often more convenient to use the X or Y-pointer as a dedicated Stack + * Pointer. Note that only the low byte of the Z-pointer is updated in devices + * with no more than 256 bytes data space. For such devices, the high byte of + * the pointer is not used by this instruction and can be used for other + * purposes. The RAMPZ Register in the I/O area is updated in parts with more + * than 64KB data space or more than 64KB Program memory, and the + * increment/decrement/displacement is added to the entire 24-bit address on + * such devices. Not all variants of this instruction is available in all + * devices. Refer to the device specific instruction set summary. In the + * Reduced Core tinyAVR the LD instruction can be used to achieve the same + * operation as LPM since the program memory is mapped to the data memory + * space. For using the Z-pointer for table lookup in Program memory see the + * LPM and ELPM instructions. + */ +static bool trans_LDZ2(DisasContext *ctx, arg_LDZ2 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + + gen_data_load(ctx, Rd, addr); + tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */ + + gen_set_zaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_LDZ3(DisasContext *ctx, arg_LDZ3 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + + tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */ + gen_data_load(ctx, Rd, addr); + + gen_set_zaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_LDDZ(DisasContext *ctx, arg_LDDZ *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + + tcg_gen_addi_tl(addr, addr, a->imm); /* addr = addr + q */ + gen_data_load(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Stores one byte from a Register to the data space. For parts with SRAM, + * the data space consists of the Register File, I/O memory and internal SRAM + * (and external SRAM if applicable). For parts without SRAM, the data space + * consists of the Register File only. The EEPROM has a separate address space. + * A 16-bit address must be supplied. Memory access is limited to the current + * data segment of 64KB. The STS instruction uses the RAMPD Register to access + * memory above 64KB. To access another data segment in devices with more than + * 64KB data space, the RAMPD in register in the I/O area has to be changed. + * This instruction is not available in all devices. Refer to the device + * specific instruction set summary. + */ +static bool trans_STS(DisasContext *ctx, arg_STS *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = tcg_temp_new_i32(); + TCGv H = cpu_rampD; + a->imm = next_word(ctx); + + tcg_gen_mov_tl(addr, H); /* addr = H:M:L */ + tcg_gen_shli_tl(addr, addr, 16); + tcg_gen_ori_tl(addr, addr, a->imm); + gen_data_store(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Stores one byte indirect from a register to data space. For parts with SRAM, + * the data space consists of the Register File, I/O memory, and internal SRAM + * (and external SRAM if applicable). For parts without SRAM, the data space + * consists of the Register File only. The EEPROM has a separate address space. + * + * The data location is pointed to by the X (16 bits) Pointer Register in the + * Register File. Memory access is limited to the current data segment of 64KB. + * To access another data segment in devices with more than 64KB data space, the + * RAMPX in register in the I/O area has to be changed. + * + * The X-pointer Register can either be left unchanged by the operation, or it + * can be post-incremented or pre-decremented. These features are especially + * suited for accessing arrays, tables, and Stack Pointer usage of the + * X-pointer Register. Note that only the low byte of the X-pointer is updated + * in devices with no more than 256 bytes data space. For such devices, the high + * byte of the pointer is not used by this instruction and can be used for other + * purposes. The RAMPX Register in the I/O area is updated in parts with more + * than 64KB data space or more than 64KB Program memory, and the increment / + * decrement is added to the entire 24-bit address on such devices. + */ +static bool trans_STX1(DisasContext *ctx, arg_STX1 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rr]; + TCGv addr = gen_get_xaddr(); + + gen_data_store(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_STX2(DisasContext *ctx, arg_STX2 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rr]; + TCGv addr = gen_get_xaddr(); + + gen_data_store(ctx, Rd, addr); + tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */ + gen_set_xaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_STX3(DisasContext *ctx, arg_STX3 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rr]; + TCGv addr = gen_get_xaddr(); + + tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */ + gen_data_store(ctx, Rd, addr); + gen_set_xaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Stores one byte indirect with or without displacement from a register to data + * space. For parts with SRAM, the data space consists of the Register File, I/O + * memory, and internal SRAM (and external SRAM if applicable). For parts + * without SRAM, the data space consists of the Register File only. The EEPROM + * has a separate address space. + * + * The data location is pointed to by the Y (16 bits) Pointer Register in the + * Register File. Memory access is limited to the current data segment of 64KB. + * To access another data segment in devices with more than 64KB data space, the + * RAMPY in register in the I/O area has to be changed. + * + * The Y-pointer Register can either be left unchanged by the operation, or it + * can be post-incremented or pre-decremented. These features are especially + * suited for accessing arrays, tables, and Stack Pointer usage of the Y-pointer + * Register. Note that only the low byte of the Y-pointer is updated in devices + * with no more than 256 bytes data space. For such devices, the high byte of + * the pointer is not used by this instruction and can be used for other + * purposes. The RAMPY Register in the I/O area is updated in parts with more + * than 64KB data space or more than 64KB Program memory, and the increment / + * decrement / displacement is added to the entire 24-bit address on such + * devices. + */ +static bool trans_STY2(DisasContext *ctx, arg_STY2 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_yaddr(); + + gen_data_store(ctx, Rd, addr); + tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */ + gen_set_yaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_STY3(DisasContext *ctx, arg_STY3 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_yaddr(); + + tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */ + gen_data_store(ctx, Rd, addr); + gen_set_yaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_STDY(DisasContext *ctx, arg_STDY *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_yaddr(); + + tcg_gen_addi_tl(addr, addr, a->imm); /* addr = addr + q */ + gen_data_store(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Stores one byte indirect with or without displacement from a register to data + * space. For parts with SRAM, the data space consists of the Register File, I/O + * memory, and internal SRAM (and external SRAM if applicable). For parts + * without SRAM, the data space consists of the Register File only. The EEPROM + * has a separate address space. + * + * The data location is pointed to by the Y (16 bits) Pointer Register in the + * Register File. Memory access is limited to the current data segment of 64KB. + * To access another data segment in devices with more than 64KB data space, the + * RAMPY in register in the I/O area has to be changed. + * + * The Y-pointer Register can either be left unchanged by the operation, or it + * can be post-incremented or pre-decremented. These features are especially + * suited for accessing arrays, tables, and Stack Pointer usage of the Y-pointer + * Register. Note that only the low byte of the Y-pointer is updated in devices + * with no more than 256 bytes data space. For such devices, the high byte of + * the pointer is not used by this instruction and can be used for other + * purposes. The RAMPY Register in the I/O area is updated in parts with more + * than 64KB data space or more than 64KB Program memory, and the increment / + * decrement / displacement is added to the entire 24-bit address on such + * devices. + */ +static bool trans_STZ2(DisasContext *ctx, arg_STZ2 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + + gen_data_store(ctx, Rd, addr); + tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */ + + gen_set_zaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_STZ3(DisasContext *ctx, arg_STZ3 *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + + tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */ + gen_data_store(ctx, Rd, addr); + + gen_set_zaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_STDZ(DisasContext *ctx, arg_STDZ *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + + tcg_gen_addi_tl(addr, addr, a->imm); /* addr = addr + q */ + gen_data_store(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Loads one byte pointed to by the Z-register into the destination + * register Rd. This instruction features a 100% space effective constant + * initialization or constant data fetch. The Program memory is organized in + * 16-bit words while the Z-pointer is a byte address. Thus, the least + * significant bit of the Z-pointer selects either low byte (ZLSB = 0) or high + * byte (ZLSB = 1). This instruction can address the first 64KB (32K words) of + * Program memory. The Zpointer Register can either be left unchanged by the + * operation, or it can be incremented. The incrementation does not apply to + * the RAMPZ Register. + * + * Devices with Self-Programming capability can use the LPM instruction to read + * the Fuse and Lock bit values. + */ +static bool trans_LPM1(DisasContext *ctx, arg_LPM1 *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_LPM)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[0]; + TCGv addr = tcg_temp_new_i32(); + TCGv H = cpu_r[31]; + TCGv L = cpu_r[30]; + + tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */ + tcg_gen_or_tl(addr, addr, L); + gen_code_load(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_LPM2(DisasContext *ctx, arg_LPM2 *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_LPM)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = tcg_temp_new_i32(); + TCGv H = cpu_r[31]; + TCGv L = cpu_r[30]; + + tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */ + tcg_gen_or_tl(addr, addr, L); + gen_code_load(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_LPMX(DisasContext *ctx, arg_LPMX *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_LPMX)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = tcg_temp_new_i32(); + TCGv H = cpu_r[31]; + TCGv L = cpu_r[30]; + + tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */ + tcg_gen_or_tl(addr, addr, L); + gen_code_load(ctx, Rd, addr); + tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */ + tcg_gen_andi_tl(L, addr, 0xff); + tcg_gen_shri_tl(addr, addr, 8); + tcg_gen_andi_tl(H, addr, 0xff); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Loads one byte pointed to by the Z-register and the RAMPZ Register in + * the I/O space, and places this byte in the destination register Rd. This + * instruction features a 100% space effective constant initialization or + * constant data fetch. The Program memory is organized in 16-bit words while + * the Z-pointer is a byte address. Thus, the least significant bit of the + * Z-pointer selects either low byte (ZLSB = 0) or high byte (ZLSB = 1). This + * instruction can address the entire Program memory space. The Z-pointer + * Register can either be left unchanged by the operation, or it can be + * incremented. The incrementation applies to the entire 24-bit concatenation + * of the RAMPZ and Z-pointer Registers. + * + * Devices with Self-Programming capability can use the ELPM instruction to + * read the Fuse and Lock bit value. + */ +static bool trans_ELPM1(DisasContext *ctx, arg_ELPM1 *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_ELPM)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[0]; + TCGv addr = gen_get_zaddr(); + + gen_code_load(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_ELPM2(DisasContext *ctx, arg_ELPM2 *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_ELPM)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + + gen_code_load(ctx, Rd, addr); + + tcg_temp_free_i32(addr); + + return true; +} + +static bool trans_ELPMX(DisasContext *ctx, arg_ELPMX *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_ELPMX)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + + gen_code_load(ctx, Rd, addr); + tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */ + gen_set_zaddr(addr); + + tcg_temp_free_i32(addr); + + return true; +} + +/* + * SPM can be used to erase a page in the Program memory, to write a page + * in the Program memory (that is already erased), and to set Boot Loader Lock + * bits. In some devices, the Program memory can be written one word at a time, + * in other devices an entire page can be programmed simultaneously after first + * filling a temporary page buffer. In all cases, the Program memory must be + * erased one page at a time. When erasing the Program memory, the RAMPZ and + * Z-register are used as page address. When writing the Program memory, the + * RAMPZ and Z-register are used as page or word address, and the R1:R0 + * register pair is used as data(1). When setting the Boot Loader Lock bits, + * the R1:R0 register pair is used as data. Refer to the device documentation + * for detailed description of SPM usage. This instruction can address the + * entire Program memory. + * + * The SPM instruction is not available in all devices. Refer to the device + * specific instruction set summary. + * + * Note: 1. R1 determines the instruction high byte, and R0 determines the + * instruction low byte. + */ +static bool trans_SPM(DisasContext *ctx, arg_SPM *a) +{ + /* TODO */ + if (!avr_have_feature(ctx, AVR_FEATURE_SPM)) { + return true; + } + + return true; +} + +static bool trans_SPMX(DisasContext *ctx, arg_SPMX *a) +{ + /* TODO */ + if (!avr_have_feature(ctx, AVR_FEATURE_SPMX)) { + return true; + } + + return true; +} + +/* + * Loads data from the I/O Space (Ports, Timers, Configuration Registers, + * etc.) into register Rd in the Register File. + */ +static bool trans_IN(DisasContext *ctx, arg_IN *a) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv port = tcg_const_i32(a->imm); + + gen_helper_inb(Rd, cpu_env, port); + + tcg_temp_free_i32(port); + + return true; +} + +/* + * Stores data from register Rr in the Register File to I/O Space (Ports, + * Timers, Configuration Registers, etc.). + */ +static bool trans_OUT(DisasContext *ctx, arg_OUT *a) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv port = tcg_const_i32(a->imm); + + gen_helper_outb(cpu_env, port, Rd); + + tcg_temp_free_i32(port); + + return true; +} + +/* + * This instruction stores the contents of register Rr on the STACK. The + * Stack Pointer is post-decremented by 1 after the PUSH. This instruction is + * not available in all devices. Refer to the device specific instruction set + * summary. + */ +static bool trans_PUSH(DisasContext *ctx, arg_PUSH *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + + gen_data_store(ctx, Rd, cpu_sp); + tcg_gen_subi_tl(cpu_sp, cpu_sp, 1); + + return true; +} + +/* + * This instruction loads register Rd with a byte from the STACK. The Stack + * Pointer is pre-incremented by 1 before the POP. This instruction is not + * available in all devices. Refer to the device specific instruction set + * summary. + */ +static bool trans_POP(DisasContext *ctx, arg_POP *a) +{ + /* + * Using a temp to work around some strange behaviour: + * tcg_gen_addi_tl(cpu_sp, cpu_sp, 1); + * gen_data_load(ctx, Rd, cpu_sp); + * seems to cause the add to happen twice. + * This doesn't happen if either the add or the load is removed. + */ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv t1 = tcg_temp_new_i32(); + TCGv Rd = cpu_r[a->rd]; + + tcg_gen_addi_tl(t1, cpu_sp, 1); + gen_data_load(ctx, Rd, t1); + tcg_gen_mov_tl(cpu_sp, t1); + + return true; +} + +/* + * Exchanges one byte indirect between register and data space. The data + * location is pointed to by the Z (16 bits) Pointer Register in the Register + * File. Memory access is limited to the current data segment of 64KB. To + * access another data segment in devices with more than 64KB data space, the + * RAMPZ in register in the I/O area has to be changed. + * + * The Z-pointer Register is left unchanged by the operation. This instruction + * is especially suited for writing/reading status bits stored in SRAM. + */ +static bool trans_XCH(DisasContext *ctx, arg_XCH *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_RMW)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv t0 = tcg_temp_new_i32(); + TCGv addr = gen_get_zaddr(); + + gen_data_load(ctx, t0, addr); + gen_data_store(ctx, Rd, addr); + tcg_gen_mov_tl(Rd, t0); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Load one byte indirect from data space to register and set bits in data + * space specified by the register. The instruction can only be used towards + * internal SRAM. The data location is pointed to by the Z (16 bits) Pointer + * Register in the Register File. Memory access is limited to the current data + * segment of 64KB. To access another data segment in devices with more than + * 64KB data space, the RAMPZ in register in the I/O area has to be changed. + * + * The Z-pointer Register is left unchanged by the operation. This instruction + * is especially suited for setting status bits stored in SRAM. + */ +static bool trans_LAS(DisasContext *ctx, arg_LAS *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_RMW)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rr = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + TCGv t0 = tcg_temp_new_i32(); + TCGv t1 = tcg_temp_new_i32(); + + gen_data_load(ctx, t0, addr); /* t0 = mem[addr] */ + tcg_gen_or_tl(t1, t0, Rr); + tcg_gen_mov_tl(Rr, t0); /* Rr = t0 */ + gen_data_store(ctx, t1, addr); /* mem[addr] = t1 */ + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Load one byte indirect from data space to register and stores and clear + * the bits in data space specified by the register. The instruction can + * only be used towards internal SRAM. The data location is pointed to by + * the Z (16 bits) Pointer Register in the Register File. Memory access is + * limited to the current data segment of 64KB. To access another data + * segment in devices with more than 64KB data space, the RAMPZ in register + * in the I/O area has to be changed. + * + * The Z-pointer Register is left unchanged by the operation. This instruction + * is especially suited for clearing status bits stored in SRAM. + */ +static bool trans_LAC(DisasContext *ctx, arg_LAC *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_RMW)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rr = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + TCGv t0 = tcg_temp_new_i32(); + TCGv t1 = tcg_temp_new_i32(); + + gen_data_load(ctx, t0, addr); /* t0 = mem[addr] */ + tcg_gen_andc_tl(t1, t0, Rr); /* t1 = t0 & (0xff - Rr) = t0 & ~Rr */ + tcg_gen_mov_tl(Rr, t0); /* Rr = t0 */ + gen_data_store(ctx, t1, addr); /* mem[addr] = t1 */ + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(addr); + + return true; +} + + +/* + * Load one byte indirect from data space to register and toggles bits in + * the data space specified by the register. The instruction can only be used + * towards SRAM. The data location is pointed to by the Z (16 bits) Pointer + * Register in the Register File. Memory access is limited to the current data + * segment of 64KB. To access another data segment in devices with more than + * 64KB data space, the RAMPZ in register in the I/O area has to be changed. + * + * The Z-pointer Register is left unchanged by the operation. This instruction + * is especially suited for changing status bits stored in SRAM. + */ +static bool trans_LAT(DisasContext *ctx, arg_LAT *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_RMW)) { + return true; + } + + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv addr = gen_get_zaddr(); + TCGv t0 = tcg_temp_new_i32(); + TCGv t1 = tcg_temp_new_i32(); + + gen_data_load(ctx, t0, addr); /* t0 = mem[addr] */ + tcg_gen_xor_tl(t1, t0, Rd); + tcg_gen_mov_tl(Rd, t0); /* Rd = t0 */ + gen_data_store(ctx, t1, addr); /* mem[addr] = t1 */ + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(addr); + + return true; +} + +/* + * Bit and Bit-test Instructions + */ +static void gen_decl(rshift_ZNVSf, TCGv R) +{ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */ + tcg_gen_shri_tl(cpu_Nf, R, 7); /* Nf = R(7) */ + tcg_gen_xor_tl(cpu_Vf, cpu_Nf, cpu_Cf); + tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */ +} + +#define gen_rshift_ZNVSf(...) gen_call(rshift_ZNVSf, __VA_ARGS__) + +/* + * Shifts all bits in Rd one place to the right. Bit 7 is cleared. Bit 0 is + * loaded into the C Flag of the SREG. This operation effectively divides an + * unsigned value by two. The C Flag can be used to round the result. + */ +static bool trans_LSR(DisasContext *ctx, arg_LSR *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + + tcg_gen_andi_tl(cpu_Cf, Rd, 1); + tcg_gen_shri_tl(Rd, Rd, 1); + + /* update status register */ + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, Rd, 0); /* Zf = Rd == 0 */ + tcg_gen_movi_tl(cpu_Nf, 0); + tcg_gen_mov_tl(cpu_Vf, cpu_Cf); + tcg_gen_mov_tl(cpu_Sf, cpu_Vf); + + return true; +} + +/* + * Shifts all bits in Rd one place to the right. The C Flag is shifted into + * bit 7 of Rd. Bit 0 is shifted into the C Flag. This operation, combined + * with ASR, effectively divides multi-byte signed values by two. Combined with + * LSR it effectively divides multi-byte unsigned values by two. The Carry Flag + * can be used to round the result. + */ +static bool trans_ROR(DisasContext *ctx, arg_ROR *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv t0 = tcg_temp_new_i32(); + + tcg_gen_shli_tl(t0, cpu_Cf, 7); + + /* update status register */ + tcg_gen_andi_tl(cpu_Cf, Rd, 1); + + /* update output register */ + tcg_gen_shri_tl(Rd, Rd, 1); + tcg_gen_or_tl(Rd, Rd, t0); + + /* update status register */ + gen_rshift_ZNVSf(Rd); + + tcg_temp_free_i32(t0); + + return true; +} + +/* + * Shifts all bits in Rd one place to the right. Bit 7 is held constant. Bit 0 + * is loaded into the C Flag of the SREG. This operation effectively divides a + * signed value by two without changing its sign. The Carry Flag can be used to + * round the result. + */ +static bool trans_ASR(DisasContext *ctx, arg_ASR *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv t0 = tcg_temp_new_i32(); + + /* update status register */ + tcg_gen_andi_tl(cpu_Cf, Rd, 1); /* Cf = Rd(0) */ + + /* update output register */ + tcg_gen_andi_tl(t0, Rd, 0x80); /* Rd = (Rd & 0x80) | (Rd >> 1) */ + tcg_gen_shri_tl(Rd, Rd, 1); + tcg_gen_or_tl(Rd, Rd, t0); + + /* update status register */ + gen_rshift_ZNVSf(Rd); + + tcg_temp_free_i32(t0); + + return true; +} + +/* + * Swaps high and low nibbles in a register. + */ +static bool trans_SWAP(DisasContext *ctx, arg_SWAP *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv t0 = tcg_temp_new_i32(); + TCGv t1 = tcg_temp_new_i32(); + + tcg_gen_andi_tl(t0, Rd, 0x0f); + tcg_gen_shli_tl(t0, t0, 4); + tcg_gen_andi_tl(t1, Rd, 0xf0); + tcg_gen_shri_tl(t1, t1, 4); + tcg_gen_or_tl(Rd, t0, t1); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t0); + + return true; +} + +/* + * Sets a specified bit in an I/O Register. This instruction operates on + * the lower 32 I/O Registers -- addresses 0-31. + */ +static bool trans_SBI(DisasContext *ctx, arg_SBI *a) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + TCGv data = tcg_temp_new_i32(); + TCGv port = tcg_const_i32(a->reg); + + gen_helper_inb(data, cpu_env, port); + tcg_gen_ori_tl(data, data, 1 << a->bit); + gen_helper_outb(cpu_env, port, data); + + tcg_temp_free_i32(port); + tcg_temp_free_i32(data); + + return true; +} + +/* + * Clears a specified bit in an I/O Register. This instruction operates on + * the lower 32 I/O Registers -- addresses 0-31. + */ +static bool trans_CBI(DisasContext *ctx, arg_CBI *a) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + TCGv data = tcg_temp_new_i32(); + TCGv port = tcg_const_i32(a->reg); + + gen_helper_inb(data, cpu_env, port); + tcg_gen_andi_tl(data, data, ~(1 << a->bit)); + gen_helper_outb(cpu_env, port, data); + + tcg_temp_free_i32(data); + tcg_temp_free_i32(port); + + return true; +} + +/* + * Stores bit b from Rd to the T Flag in SREG (Status Register). + */ +static bool trans_BST(DisasContext *ctx, arg_BST *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + + tcg_gen_andi_tl(cpu_Tf, Rd, 1 << a->bit); + tcg_gen_shri_tl(cpu_Tf, cpu_Tf, a->bit); + + return true; +} + +/* + * Copies the T Flag in the SREG (Status Register) to bit b in register Rd. + */ +static bool trans_BLD(DisasContext *ctx, arg_BLD *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + TCGv Rd = cpu_r[a->rd]; + TCGv t1 = tcg_temp_new_i32(); + + tcg_gen_andi_tl(Rd, Rd, ~(1u << a->bit)); /* clear bit */ + tcg_gen_shli_tl(t1, cpu_Tf, a->bit); /* create mask */ + tcg_gen_or_tl(Rd, Rd, t1); + + tcg_temp_free_i32(t1); + + return true; +} + +/* + * Sets a single Flag or bit in SREG. + */ +static bool trans_BSET(DisasContext *ctx, arg_BSET *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + switch (a->bit) { + case 0x00: + tcg_gen_movi_tl(cpu_Cf, 0x01); + break; + case 0x01: + tcg_gen_movi_tl(cpu_Zf, 0x01); + break; + case 0x02: + tcg_gen_movi_tl(cpu_Nf, 0x01); + break; + case 0x03: + tcg_gen_movi_tl(cpu_Vf, 0x01); + break; + case 0x04: + tcg_gen_movi_tl(cpu_Sf, 0x01); + break; + case 0x05: + tcg_gen_movi_tl(cpu_Hf, 0x01); + break; + case 0x06: + tcg_gen_movi_tl(cpu_Tf, 0x01); + break; + case 0x07: + tcg_gen_movi_tl(cpu_If, 0x01); + break; + } + + return true; +} + +/* + * Clears a single Flag in SREG. + */ +static bool trans_BCLR(DisasContext *ctx, arg_BCLR *a) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + switch (a->bit) { + case 0x00: + tcg_gen_movi_tl(cpu_Cf, 0x00); + break; + case 0x01: + tcg_gen_movi_tl(cpu_Zf, 0x00); + break; + case 0x02: + tcg_gen_movi_tl(cpu_Nf, 0x00); + break; + case 0x03: + tcg_gen_movi_tl(cpu_Vf, 0x00); + break; + case 0x04: + tcg_gen_movi_tl(cpu_Sf, 0x00); + break; + case 0x05: + tcg_gen_movi_tl(cpu_Hf, 0x00); + break; + case 0x06: + tcg_gen_movi_tl(cpu_Tf, 0x00); + break; + case 0x07: + tcg_gen_movi_tl(cpu_If, 0x00); + break; + } + + return true; +} + +/* + * MCU Control Instructions + */ + +/* + * The BREAK instruction is used by the On-chip Debug system, and is + * normally not used in the application software. When the BREAK instruction is + * executed, the AVR CPU is set in the Stopped Mode. This gives the On-chip + * Debugger access to internal resources. If any Lock bits are set, or either + * the JTAGEN or OCDEN Fuses are unprogrammed, the CPU will treat the BREAK + * instruction as a NOP and will not enter the Stopped mode. This instruction + * is not available in all devices. Refer to the device specific instruction + * set summary. + */ +static bool trans_BREAK(DisasContext *ctx, arg_BREAK *a) +{ + if (!avr_have_feature(ctx, AVR_FEATURE_BREAK)) { + return true; + } + +#ifdef BREAKPOINT_ON_BREAK + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + tcg_gen_movi_tl(cpu_pc, ctx->npc - 1); + gen_helper_debug(cpu_env); + ctx->bstate = DISAS_EXIT; +#else + /* NOP */ +#endif + + return true; +} + +/* + * This instruction performs a single cycle No Operation. + */ +static bool trans_NOP(DisasContext *ctx, arg_NOP *a) +{ + + /* NOP */ + + return true; +} + +/* + * This instruction sets the circuit in sleep mode defined by the MCU + * Control Register. + */ +static bool trans_SLEEP(DisasContext *ctx, arg_SLEEP *a) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + gen_helper_sleep(cpu_env); + ctx->bstate = DISAS_NORETURN; + return true; +} + +/* + * This instruction resets the Watchdog Timer. This instruction must be + * executed within a limited time given by the WD prescaler. See the Watchdog + * Timer hardware specification. + */ +static bool trans_WDR(DisasContext *ctx, arg_WDR *a) +{ + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + gen_helper_wdr(cpu_env); + + return true; +} + +/* + * Core translation mechanism functions: + * + * - translate() + * - canonicalize_skip() + * - gen_intermediate_code() + * - restore_state_to_opc() + * + */ +static void translate(DisasContext *ctx) +{ + INIT_UC_CONTEXT_FROM_DISAS(ctx); + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx); + + // Unicorn: end address tells us to stop emulation + const target_ulong insn_pc = ctx->npc; + if (uc_addr_is_exit(uc, insn_pc*2)) { + ctx->bstate = DISAS_UC_EXIT; + return; + } + + // Unicorn: trace this instruction on request + bool insn_hook = false; + TCGOp *insn_prev_op = NULL; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_CODE, insn_pc*2)) { + + // sync PC in advance + tcg_gen_movi_tl(cpu_pc, insn_pc); + + // save the last operand + insn_prev_op = tcg_last_op(tcg_ctx); + insn_hook = true; + gen_uc_tracecode(tcg_ctx, 0xf1, UC_HOOK_CODE_IDX, uc, insn_pc*2); + + // the callback might want to stop emulation immediately + check_exit_request(tcg_ctx); + } + + uint32_t opcode = next_word(ctx); + if (!decode_insn(ctx, opcode)) { + gen_helper_unsupported(cpu_env); + ctx->bstate = DISAS_NORETURN; + } + + if (insn_hook) { + // Unicorn: patch the callback to have the proper instruction size. + TCGOp *const tcg_op = insn_prev_op ? + QTAILQ_NEXT(insn_prev_op, link) : QTAILQ_FIRST(&tcg_ctx->ops); + tcg_op->args[1] = (ctx->npc - insn_pc)*2; + } +} + +/* Standardize the cpu_skip condition to NE. */ +static bool canonicalize_skip(DisasContext *ctx) +{ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); + switch (ctx->skip_cond) { + case TCG_COND_NEVER: + /* Normal case: cpu_skip is known to be false. */ + return false; + + case TCG_COND_ALWAYS: + /* + * Breakpoint case: cpu_skip is known to be true, via TB_FLAGS_SKIP. + * The breakpoint is on the instruction being skipped, at the start + * of the TranslationBlock. No need to update. + */ + return false; + + case TCG_COND_NE: + if (ctx->skip_var1 == NULL) { + tcg_gen_mov_tl(cpu_skip, ctx->skip_var0); + } else { + tcg_gen_xor_tl(cpu_skip, ctx->skip_var0, ctx->skip_var1); + ctx->skip_var1 = NULL; + } + break; + + default: + /* Convert to a NE condition vs 0. */ + if (ctx->skip_var1 == NULL) { + tcg_gen_setcondi_tl(ctx->skip_cond, cpu_skip, ctx->skip_var0, 0); + } else { + tcg_gen_setcond_tl(ctx->skip_cond, cpu_skip, + ctx->skip_var0, ctx->skip_var1); + ctx->skip_var1 = NULL; + } + ctx->skip_cond = TCG_COND_NE; + break; + } + if (ctx->free_skip_var0) { + tcg_temp_free(ctx->skip_var0); + ctx->free_skip_var0 = false; + } + ctx->skip_var0 = cpu_skip; + return true; +} + +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +{ + CPUAVRState *env = cs->env_ptr; + DisasContext ctx = { + .tb = tb, + .cs = cs, + .env = env, + .memidx = 0, + .bstate = DISAS_NEXT, + .skip_cond = TCG_COND_NEVER, + .singlestep = cs->singlestep_enabled, + }; + target_ulong pc_start = tb->pc / 2; + int num_insns = 0; + + INIT_UC_CONTEXT_FROM_DISAS(&ctx); + INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(&ctx); + + if (tb->flags & TB_FLAGS_FULL_ACCESS) { + /* + * This flag is set by ST/LD instruction we will regenerate it ONLY + * with mem/cpu memory access instead of mem access + */ + max_insns = 1; + } + if (ctx.singlestep) { + max_insns = 1; + } + + // Unicorn: trace this block on request + bool block_hook = false; + TCGOp *block_prev_op = NULL; + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_BLOCK, tb->pc)) { + + // save the last operand + block_prev_op = tcg_last_op(tcg_ctx); + block_hook = true; + gen_uc_tracecode(tcg_ctx, 0xf8, UC_HOOK_BLOCK_IDX, uc, tb->pc); + } + + gen_tb_start(tb); + + ctx.npc = pc_start; + if (tb->flags & TB_FLAGS_SKIP) { + ctx.skip_cond = TCG_COND_ALWAYS; + ctx.skip_var0 = cpu_skip; + } + + do { + TCGLabel *skip_label = NULL; + + /* translate current instruction */ + tcg_gen_insn_start(ctx.npc); + num_insns++; + + /* + * this is due to some strange GDB behavior + * let's assume main has address 0x100 + * b main - sets breakpoint at address 0x00000100 (code) + * b *0x100 - sets breakpoint at address 0x00800100 (data) + */ + if (unlikely(!ctx.singlestep && + (cpu_breakpoint_test(cs, avr_code_base(env) | ctx.npc * 2, BP_ANY) || + cpu_breakpoint_test(cs, OFFSET_DATA | ctx.npc * 2, BP_ANY)))) { + canonicalize_skip(&ctx); + tcg_gen_movi_tl(cpu_pc, ctx.npc); + gen_helper_debug(cpu_env); + goto done_generating; + } + + /* Conditionally skip the next instruction, if indicated. */ + if (ctx.skip_cond != TCG_COND_NEVER) { + skip_label = gen_new_label(); + if (ctx.skip_var0 == cpu_skip) { + /* + * Copy cpu_skip so that we may zero it before the branch. + * This ensures that cpu_skip is non-zero after the label + * if and only if the skipped insn itself sets a skip. + */ + ctx.free_skip_var0 = true; + ctx.skip_var0 = tcg_temp_new(); + tcg_gen_mov_tl(ctx.skip_var0, cpu_skip); + tcg_gen_movi_tl(cpu_skip, 0); + } + if (ctx.skip_var1 == NULL) { + tcg_gen_brcondi_tl(ctx.skip_cond, ctx.skip_var0, 0, skip_label); + } else { + tcg_gen_brcond_tl(ctx.skip_cond, ctx.skip_var0, + ctx.skip_var1, skip_label); + ctx.skip_var1 = NULL; + } + if (ctx.free_skip_var0) { + tcg_temp_free(ctx.skip_var0); + ctx.free_skip_var0 = false; + } + ctx.skip_cond = TCG_COND_NEVER; + ctx.skip_var0 = NULL; + } + + translate(&ctx); + + if (skip_label) { + canonicalize_skip(&ctx); + gen_set_label(skip_label); + if (ctx.bstate == DISAS_NORETURN) { + ctx.bstate = DISAS_CHAIN; + } + } + } while (ctx.bstate == DISAS_NEXT + && num_insns < max_insns + && (ctx.npc - pc_start) * 2 < TARGET_PAGE_SIZE - 4 + && !tcg_op_buf_full()); + + if (tb->cflags & CF_LAST_IO) { + gen_io_end(); + } + + bool nonconst_skip = canonicalize_skip(&ctx); + + switch (ctx.bstate) { + case DISAS_NORETURN: + assert(!nonconst_skip); + break; + case DISAS_NEXT: + case DISAS_TOO_MANY: + case DISAS_CHAIN: + if (!nonconst_skip) { + /* Note gen_goto_tb checks singlestep. */ + gen_goto_tb(&ctx, 1, ctx.npc); + break; + } + tcg_gen_movi_tl(cpu_pc, ctx.npc); + /* fall through */ + case DISAS_LOOKUP: + if (!ctx.singlestep) { + tcg_gen_lookup_and_goto_ptr(); + break; + } + /* fall through */ + case DISAS_EXIT: + if (ctx.singlestep) { + gen_helper_debug(cpu_env); + } else { + tcg_gen_exit_tb(NULL, 0); + } + break; + case DISAS_UC_EXIT: + tcg_gen_movi_tl(cpu_pc, ctx.npc); + gen_helper_uc_avr_exit(tcg_ctx, cpu_env); + break; + default: + g_assert_not_reached(); + } + +done_generating: + gen_tb_end(tb, num_insns); + + tb->size = (ctx.npc - pc_start) * 2; + tb->icount = num_insns; + + hooked_regions_check(uc, tb->pc, tb->size); + + if (block_hook) { + // Unicorn: patch the callback to have the proper block size. + TCGOp *const tcg_op = block_prev_op ? + QTAILQ_NEXT(block_prev_op, link) : QTAILQ_FIRST(&tcg_ctx->ops); + tcg_op->args[1] = (ctx.npc - pc_start)*2; + } + +#ifdef DEBUG_DISAS +#if 0 + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) + && qemu_log_in_addr_range(tb->pc)) { + FILE *fd; + fd = qemu_log_lock(); + qemu_log("IN: %s\n", lookup_symbol(tb->pc)); + log_target_disas(cs, tb->pc, tb->size); + qemu_log("\n"); + qemu_log_unlock(fd); + } +#endif +#endif +} + +void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb, + target_ulong *data) +{ + env->pc_w = data[0]; +} diff --git a/qemu/target/avr/unicorn.c b/qemu/target/avr/unicorn.c new file mode 100644 index 0000000000..09b5f628dd --- /dev/null +++ b/qemu/target/avr/unicorn.c @@ -0,0 +1,280 @@ +/* Unicorn Emulator Engine */ +/* By Nguyen Anh Quynh , 2015 */ + +/* + Created for Unicorn Engine by Glenn Baker , 2024 +*/ + +#include "qemu/typedefs.h" +#include "unicorn/unicorn.h" +#include "sysemu/cpus.h" +#include "sysemu/tcg.h" +#include "cpu.h" +#include "uc_priv.h" +#include "unicorn_common.h" +#include "unicorn.h" + +AVRCPU *cpu_avr_init(struct uc_struct *uc); + +static inline uint32_t get_pc(CPUAVRState *env) +{ + return env->pc_w*2; +} + +static uint64_t avr_get_pc(struct uc_struct *uc) +{ + return get_pc((CPUAVRState *)uc->cpu->env_ptr); +} + +static inline void set_pc(CPUAVRState *env, uint32_t value) +{ + env->pc_w = value/2; +} + +static void avr_set_pc(struct uc_struct *uc, uint64_t address) +{ + set_pc((CPUAVRState *)uc->cpu->env_ptr, address); +} + +static void reg_reset(struct uc_struct *uc) +{ +} + +#define GET_BYTE(x, n) (((x) >> (n)*8) & 0xff) +#define SET_BYTE(x, n, b) (x = ((x) & ~(0xff << ((n)*8))) | ((b) << ((n)*8))) +#define GET_RAMP(reg) GET_BYTE(env->glue(ramp,reg), 2) +#define SET_RAMP(reg, val) SET_BYTE(env->glue(ramp,reg), 2, val) + +DEFAULT_VISIBILITY +uc_err reg_read(void *_env, int mode, unsigned int regid, void *value, + size_t *size) +{ + CPUAVRState *const env = _env; + uc_err ret = UC_ERR_ARG; + + switch (regid) { + case UC_AVR_REG_PC: + CHECK_REG_TYPE(uint32_t); + *(uint32_t *)value = get_pc(env); + break; + case UC_AVR_REG_SP: + CHECK_REG_TYPE(uint32_t); + *(uint32_t *)value = env->sp; + break; + + case UC_AVR_REG_RAMPD: + CHECK_REG_TYPE(uint8_t); + *(uint8_t *)value = GET_RAMP(D); + break; + case UC_AVR_REG_RAMPX: + CHECK_REG_TYPE(uint8_t); + *(uint8_t *)value = GET_RAMP(X); + break; + case UC_AVR_REG_RAMPY: + CHECK_REG_TYPE(uint8_t); + *(uint8_t *)value = GET_RAMP(Y); + break; + case UC_AVR_REG_RAMPZ: + CHECK_REG_TYPE(uint8_t); + *(uint8_t *)value = GET_RAMP(Z); + break; + case UC_AVR_REG_EIND: + CHECK_REG_TYPE(uint8_t); + *(uint8_t *)value = GET_BYTE(env->eind, 2); + break; + case UC_AVR_REG_SPL: + CHECK_REG_TYPE(uint8_t); + *(uint8_t *)value = GET_BYTE(env->sp, 0); + break; + case UC_AVR_REG_SPH: + CHECK_REG_TYPE(uint8_t); + *(uint8_t *)value = GET_BYTE(env->sp, 1); + break; + case UC_AVR_REG_SREG: + CHECK_REG_TYPE(uint8_t); + *(uint8_t *)value = cpu_get_sreg(env); + break; + + default: { + uint64_t v = 0; + if (regid >= UC_AVR_REG_R0 && regid <= UC_AVR_REG_R31) { + CHECK_REG_TYPE(uint8_t); + *(int8_t *)value = (int8_t)env->r[regid - UC_AVR_REG_R0]; + } + else if (regid >= UC_AVR_REG_R0W && regid <= UC_AVR_REG_R30W) { + const uint32_t *const r = &env->r[regid - UC_AVR_REG_R0W]; + for (int k = 0; k < 2; k++) + SET_BYTE(v, k, (r[k] & 0xff)); + CHECK_REG_TYPE(uint16_t); + *(int16_t *)value = (int16_t)v; + } + else if (regid >= UC_AVR_REG_R0D && regid <= UC_AVR_REG_R28D) { + const uint32_t *const r = &env->r[regid - UC_AVR_REG_R0D]; + for (int k = 0; k < 4; k++) + SET_BYTE(v, k, (r[k] & 0xff)); + CHECK_REG_TYPE(uint32_t); + *(int32_t *)value = (int32_t)v; + } + break; + } + } + + CHECK_RET_DEPRECATE(ret, regid); + return ret; +} + +DEFAULT_VISIBILITY +uc_err reg_write(void *_env, int mode, unsigned int regid, const void *value, + size_t *size, int *setpc) +{ + CPUAVRState *const env = _env; + uc_err ret = UC_ERR_ARG; + + switch (regid) { + case UC_AVR_REG_PC: + CHECK_REG_TYPE(uint32_t); + set_pc(env, *(uint32_t *)value); + *setpc = 1; + break; + case UC_AVR_REG_SP: + CHECK_REG_TYPE(uint32_t); + env->sp = *(uint32_t *)value; + break; + + case UC_AVR_REG_RAMPD: + CHECK_REG_TYPE(uint8_t); + SET_RAMP(D, *(uint8_t *)value); + break; + case UC_AVR_REG_RAMPX: + CHECK_REG_TYPE(uint8_t); + SET_RAMP(X, *(uint8_t *)value); + break; + case UC_AVR_REG_RAMPY: + CHECK_REG_TYPE(uint8_t); + SET_RAMP(Y, *(uint8_t *)value); + break; + case UC_AVR_REG_RAMPZ: + CHECK_REG_TYPE(uint8_t); + SET_RAMP(Z, *(uint8_t *)value); + break; + case UC_AVR_REG_EIND: + CHECK_REG_TYPE(uint8_t); + SET_BYTE(env->eind, 2, *(uint8_t *)value); + break; + case UC_AVR_REG_SPL: + CHECK_REG_TYPE(uint8_t); + SET_BYTE(env->sp, 0, *(uint8_t *)value); + break; + case UC_AVR_REG_SPH: + CHECK_REG_TYPE(uint8_t); + SET_BYTE(env->sp, 1, *(uint8_t *)value); + break; + case UC_AVR_REG_SREG: + CHECK_REG_TYPE(uint8_t); + cpu_set_sreg(env, *(uint8_t *)value); + break; + + default: { + uint64_t v; + uint32_t *r = NULL; + int rlen = 0; + if (regid >= UC_AVR_REG_R0 && regid <= UC_AVR_REG_R31) { + v = *(uint8_t *)value; + r = &env->r[regid - UC_AVR_REG_R0]; + rlen = 1; + CHECK_REG_TYPE(uint8_t); + } + else if (regid >= UC_AVR_REG_R0W && regid <= UC_AVR_REG_R30W) { + v = *(uint16_t *)value; + r = &env->r[regid - UC_AVR_REG_R0W]; + rlen = 2; + CHECK_REG_TYPE(uint16_t); + } + else if (regid >= UC_AVR_REG_R0D && regid <= UC_AVR_REG_R28D) { + v = *(uint32_t *)value; + r = &env->r[regid - UC_AVR_REG_R0D]; + rlen = 4; + CHECK_REG_TYPE(uint32_t); + } + if (r && rlen > 0) { + for (int k = 0; k < rlen; k++) + r[k] = GET_BYTE(v, k); + } + } + } + + CHECK_RET_DEPRECATE(ret, regid); + return ret; +} + +static int avr_cpus_init(struct uc_struct *uc, const char *cpu_model) +{ + AVRCPU *cpu; + + cpu = cpu_avr_init(uc); + if (cpu == NULL) { + return -1; + } + + return 0; +} + +static void avr_release(void *ctx) +{ + int i; + TCGContext *tcg_ctx = (TCGContext *)ctx; + AVRCPU *cpu = (AVRCPU *)tcg_ctx->uc->cpu; + CPUTLBDesc *d = cpu->neg.tlb.d; + CPUTLBDescFast *f = cpu->neg.tlb.f; + CPUTLBDesc *desc; + CPUTLBDescFast *fast; + + release_common(ctx); + for (i = 0; i < NB_MMU_MODES; i++) { + desc = &(d[i]); + fast = &(f[i]); + g_free(desc->iotlb); + g_free(fast->table); + } +} + +static inline bool is_flash_memory(hwaddr addr, size_t size, uint32_t perms) +{ + if ((addr ^ UC_AVR_MEM_FLASH) >> 24) + return false; + if ((perms & UC_PROT_ALL) != (UC_PROT_READ|UC_PROT_EXEC)) + return false; + return true; +} + +static MemoryRegion *avr_memory_map(struct uc_struct *uc, hwaddr begin, size_t size, uint32_t perms) +{ + MemoryRegion *const mr = memory_map(uc, begin, size, perms); + if (mr && is_flash_memory(begin, size, perms)) + set_avr_feature(&AVR_CPU(uc->cpu)->env, AVR_FEATURE_FLASH); + return mr; +} + +static MemoryRegion *avr_memory_map_ptr(struct uc_struct *uc, hwaddr begin, size_t size, uint32_t perms, void *ptr) +{ + MemoryRegion *const mr = memory_map_ptr(uc, begin, size, perms, ptr); + if (mr && is_flash_memory(begin, size, perms)) + set_avr_feature(&AVR_CPU(uc->cpu)->env, AVR_FEATURE_FLASH); + return mr; +} + +DEFAULT_VISIBILITY +void uc_init(struct uc_struct *uc) +{ + uc->reg_read = reg_read; + uc->reg_write = reg_write; + uc->reg_reset = reg_reset; + uc->set_pc = avr_set_pc; + uc->get_pc = avr_get_pc; + uc->cpus_init = avr_cpus_init; + uc->release = avr_release; + uc->cpu_context_size = offsetof(CPUAVRState, features); + uc_common_init(uc); + uc->memory_map = avr_memory_map; + uc->memory_map_ptr = avr_memory_map_ptr; +} diff --git a/qemu/target/avr/unicorn.h b/qemu/target/avr/unicorn.h new file mode 100644 index 0000000000..a90b109016 --- /dev/null +++ b/qemu/target/avr/unicorn.h @@ -0,0 +1,21 @@ +/* Unicorn Emulator Engine */ +/* By Nguyen Anh Quynh , 2015 */ + +/* + Modified for Unicorn Engine by Glenn Baker , 2024 +*/ + +#ifndef UC_QEMU_TARGET_AVR_H +#define UC_QEMU_TARGET_AVR_H + +// functions to read & write registers +uc_err reg_read_avr(void *env, int mode, unsigned int regid, void *value, + size_t *size); +uc_err reg_write_avr(void *env, int mode, unsigned int regid, + const void *value, size_t *size, int *setpc); + +void uc_init_avr(struct uc_struct *uc); + +int avr_cpu_model_valid(int cpu_model); + +#endif /* UC_QEMU_TARGET_AVR_H */ diff --git a/qemu/target/avr/unicorn_helper.h b/qemu/target/avr/unicorn_helper.h new file mode 100644 index 0000000000..117b375f19 --- /dev/null +++ b/qemu/target/avr/unicorn_helper.h @@ -0,0 +1,165 @@ +#ifndef QEMU_UNICORN_HELPER_H +#define QEMU_UNICORN_HELPER_H + +#include + +#define UC_GET_TCG_CONTEXT(uc) ((uc)->tcg_ctx) +#define DISAS_GET_UC_CONTEXT(ctx) ((ctx)->env->uc) +#define DISAS_GET_TCG_CONTEXT(ctx) UC_GET_TCG_CONTEXT(DISAS_GET_UC_CONTEXT(ctx)) + +#define INIT_UC_CONTEXT_FROM_DISAS(ctx) \ + struct uc_struct *const uc = DISAS_GET_UC_CONTEXT(ctx) +#define INIT_TCG_CONTEXT_FROM_UC(uc) \ + TCGContext *const tcg_ctx = UC_GET_TCG_CONTEXT(uc) +#define INIT_CPU_ENV_FROM_TCG_CONTEXT(ctx) \ + TCGv_ptr const cpu_env = (ctx)->cpu_env +#define INIT_TCG_CONTEXT_FROM_DISAS(ctx) \ + INIT_TCG_CONTEXT_FROM_UC((ctx)->env->uc) +#define INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx) \ + INIT_TCG_CONTEXT_FROM_DISAS(ctx); \ + INIT_CPU_ENV_FROM_TCG_CONTEXT(tcg_ctx) + +/* "qapi/error.h */ +#if 0 +#include +#define error_report(...) \ + (error)(EXIT_FAILURE, 0, __VA_ARGS__) +#endif + +/* "exec/address-spaces.h" */ +#define address_space_memory \ + (cpu->uc->address_space_memory) +#define address_space_ldub(...) \ + glue(address_space_ldub, UNICORN_ARCH_POSTFIX)(uc, __VA_ARGS__) +#define address_space_stb(...) \ + glue(address_space_stb, UNICORN_ARCH_POSTFIX)(uc, __VA_ARGS__) + +/* "tcg/tch.h" */ +#define tcg_wrapper_I(func, ...) \ + (glue(tcg_,func))(tcg_ctx, ## __VA_ARGS__) +#define tcg_wrapper_X(func, ...) \ + tcg_wrapper_I(glue(func,_avr), ## __VA_ARGS__) +#define tcg_wrapper_tl(func, ...) \ + tcg_wrapper_I(glue(func,_i32), ## __VA_ARGS__) + +#undef tcg_const_i32 +#define tcg_const_i32(...) tcg_wrapper_X(const_i32, __VA_ARGS__) +#undef tcg_gen_addi_i32 +#define tcg_gen_addi_i32(...) tcg_wrapper_X(gen_addi_i32, __VA_ARGS__) +//#undef tcg_gen_addi_tl +//#define tcg_gen_addi_tl(...) tcg_wrapper_tl(gen_addi, __VA_ARGS__) +#undef tcg_gen_add_i32 +#define tcg_gen_add_i32(...) tcg_wrapper_I(gen_add_i32, __VA_ARGS__) +#undef tcg_gen_add_tl +#define tcg_gen_add_tl(...) tcg_wrapper_tl(gen_add, __VA_ARGS__) +#undef tcg_gen_andc_i32 +#define tcg_gen_andc_i32(...) tcg_wrapper_X(gen_andc_i32, __VA_ARGS__) +//#undef tcg_gen_andc_tl +//#define tcg_gen_andc_tl(...) tcg_wrapper_tl(gen_andc, __VA_ARGS__) +#undef tcg_gen_andi_i32 +#define tcg_gen_andi_i32(...) tcg_wrapper_X(gen_andi_i32, __VA_ARGS__) +//#undef tcg_gen_andi_tl +//#define tcg_gen_andi_tl(...) tcg_wrapper_tl(gen_andi, __VA_ARGS__) +#undef tcg_gen_and_i32 +#define tcg_gen_and_i32(...) tcg_wrapper_I(gen_and_i32, __VA_ARGS__) +#undef tcg_gen_and_tl +#define tcg_gen_and_tl(...) tcg_wrapper_tl(gen_and, __VA_ARGS__) +#undef tcg_gen_brcondi_i32 +#define tcg_gen_brcondi_i32(...) tcg_wrapper_X(gen_brcondi_i32, __VA_ARGS__) +//#undef tcg_gen_brcondi_tl +//#define tcg_gen_brcondi_tl(...) tcg_wrapper_tl(gen_brcondi, __VA_ARGS__) +#undef tcg_gen_brcond_i32 +#define tcg_gen_brcond_i32(...) tcg_wrapper_X(gen_brcond_i32, __VA_ARGS__) +//#undef tcg_gen_brcond_tl +//#define tcg_gen_brcond_tl(...) tcg_wrapper_tl(gen_brcond, __VA_ARGS__) +#undef tcg_gen_deposit_i32 +#define tcg_gen_deposit_i32(...) tcg_wrapper_X(gen_deposit_i32, __VA_ARGS__) +//#undef tcg_gen_deposit_tl +//#define tcg_gen_deposit_tl(...) tcg_wrapper_tl(gen_deposit, __VA_ARGS__) +#undef tcg_gen_exit_tb +#define tcg_gen_exit_tb(...) tcg_wrapper_X(gen_exit_tb, __VA_ARGS__) +#undef tcg_gen_ext8s_tl +#define tcg_gen_ext8s_tl(...) tcg_wrapper_tl(gen_ext8s, __VA_ARGS__) +#undef tcg_gen_goto_tb +#define tcg_gen_goto_tb(...) tcg_wrapper_X(gen_goto_tb, __VA_ARGS__) +#undef tcg_gen_insn_start +#define tcg_gen_insn_start(...) tcg_wrapper_I(gen_insn_start, __VA_ARGS__) +#undef tcg_gen_movcond_tl +#define tcg_gen_movcond_tl(...) tcg_wrapper_tl(gen_movcond, __VA_ARGS__) +#undef tcg_gen_movi_i32 +#define tcg_gen_movi_i32(...) tcg_wrapper_I(gen_movi_i32, __VA_ARGS__) +//#undef tcg_gen_movi_i32 +//#define tcg_gen_movi_i32(...) tcg_wrapper(gen_movi_i32, __VA_ARGS__) +#undef tcg_gen_movi_tl +#define tcg_gen_movi_tl(...) tcg_wrapper_tl(gen_movi, __VA_ARGS__) +#undef tcg_gen_mov_i32 +#define tcg_gen_mov_i32(...) tcg_wrapper(gen_mov_i32, __VA_ARGS__) +#undef tcg_gen_mov_tl +#define tcg_gen_mov_tl(...) tcg_wrapper_tl(gen_mov, __VA_ARGS__) +#undef tcg_gen_mul_i32 +#define tcg_gen_mul_i32(...) tcg_wrapper(gen_mul_i32, __VA_ARGS__) +#undef tcg_gen_mul_tl +#define tcg_gen_mul_tl(...) tcg_wrapper_tl(gen_mul, __VA_ARGS__) +#undef tcg_gen_not_i32 +#define tcg_gen_not_i32(...) tcg_wrapper(gen_not_i32, __VA_ARGS__) +#undef tcg_gen_not_tl +#define tcg_gen_not_tl(...) tcg_wrapper_tl(gen_not, __VA_ARGS__) +#undef tcg_gen_ori_i32 +#define tcg_gen_ori_i32(...) tcg_wrapper_X(gen_ori_i32, __VA_ARGS__) +//#undef tcg_gen_ori_tl +//#define tcg_gen_ori_tl(...) tcg_wrapper_tl(gen_ori, __VA_ARGS__) +#undef tcg_gen_or_i32 +#define tcg_gen_or_i32(...) tcg_wrapper_I(gen_or_i32, __VA_ARGS__) +#undef tcg_gen_or_tl +#define tcg_gen_or_tl(...) tcg_wrapper_tl(gen_or, __VA_ARGS__) +#undef tcg_gen_qemu_ld8u +#define tcg_gen_qemu_ld8u(...) tcg_wrapper_I(gen_qemu_ld8u, __VA_ARGS__) +#undef tcg_gen_qemu_ld_tl +#define tcg_gen_qemu_ld_tl(...) tcg_wrapper_tl(gen_qemu_ld, __VA_ARGS__) +#undef tcg_gen_qemu_st8 +#define tcg_gen_qemu_st8(...) tcg_wrapper_I(gen_qemu_st8, __VA_ARGS__) +#undef tcg_gen_qemu_st_tl +#define tcg_gen_qemu_st_tl(...) tcg_wrapper_tl(gen_qemu_st, __VA_ARGS__) +#undef tcg_gen_setcondi_tl +#define tcg_gen_setcondi_tl(...) tcg_wrapper_tl(gen_setcondi, __VA_ARGS__) +#undef tcg_gen_setcond_tl +#define tcg_gen_setcond_tl(...) tcg_wrapper_tl(gen_setcond, __VA_ARGS__) +#undef tcg_gen_shli_i32 +#define tcg_gen_shli_i32(...) tcg_wrapper_X(gen_shli_i32, __VA_ARGS__) +//#undef tcg_gen_shli_tl +//#define tcg_gen_shli_tl(...) tcg_wrapper_tl(gen_shli, __VA_ARGS__) +#undef tcg_gen_shri_i32 +#define tcg_gen_shri_i32(...) tcg_wrapper_X(gen_shri_i32, __VA_ARGS__) +//#undef tcg_gen_shri_tl +//#define tcg_gen_shri_tl(...) tcg_wrapper_tl(gen_shri, __VA_ARGS__) +#undef tcg_gen_subi_i32 +#define tcg_gen_subi_i32(...) tcg_wrapper_X(gen_subi_i32, __VA_ARGS__) +//#undef tcg_gen_subi_tl +//#define tcg_gen_subi_tl(...) tcg_wrapper_tl(gen_subi, __VA_ARGS__) +#undef tcg_gen_sub_i32 +#define tcg_gen_sub_i32(...) tcg_wrapper(gen_sub_i32, __VA_ARGS__) +#undef tcg_gen_sub_tl +#define tcg_gen_sub_tl(...) tcg_wrapper_tl(gen_sub, __VA_ARGS__) +#undef tcg_gen_xori_i32 +#define tcg_gen_xori_i32(...) tcg_wrapper_X(gen_xori_i32, __VA_ARGS__) +//#undef tcg_gen_xori_tl +//#define tcg_gen_xori_tl(...) tcg_wrapper_tl(gen_xori, __VA_ARGS__) +#undef tcg_gen_xor_i32 +#define tcg_gen_xor_i32(...) tcg_wrapper(gen_xor_i32, __VA_ARGS__) +#undef tcg_gen_xor_tl +#define tcg_gen_xor_tl(...) tcg_wrapper_tl(gen_xor, __VA_ARGS__) +#undef tcg_global_mem_new_i32 +#define tcg_global_mem_new_i32(...) tcg_wrapper_I(global_mem_new_i32, __VA_ARGS__) +#undef tcg_temp_new_i32 +#define tcg_temp_new_i32() tcg_wrapper_I(temp_new_i32) +#undef tcg_temp_free +#define tcg_temp_free(...) tcg_wrapper_tl(temp_free, __VA_ARGS__) +#undef tcg_temp_free_i32 +#define tcg_temp_free_i32(...) tcg_wrapper_I(temp_free_i32, __VA_ARGS__) +#undef tcg_op_buf_full +#define tcg_op_buf_full() tcg_wrapper_I(op_buf_full) +#undef tcg_gen_lookup_and_goto_ptr +#define tcg_gen_lookup_and_goto_ptr() \ + tcg_wrapper_X(gen_lookup_and_goto_ptr) + +#endif /* QEMU_UNICORN_HELPER_H */ diff --git a/qemu/target/rh850/Makefile.objs b/qemu/target/rh850/Makefile.objs new file mode 100644 index 0000000000..aaa7c0cc64 --- /dev/null +++ b/qemu/target/rh850/Makefile.objs @@ -0,0 +1 @@ +obj-y += translate.o op_helper.o helper.o cpu.o fpu_helper.o gdbstub.o fpu_translate.o diff --git a/qemu/target/rh850/cpu-param.h b/qemu/target/rh850/cpu-param.h new file mode 100644 index 0000000000..24231873c3 --- /dev/null +++ b/qemu/target/rh850/cpu-param.h @@ -0,0 +1,11 @@ +#pragma once + +/* QEMU addressing/paging config */ +#define TARGET_PAGE_BITS 12 /* 4 KiB Pages */ + +#define TARGET_LONG_BITS 32 +#define TARGET_PHYS_ADDR_SPACE_BITS 32 +#define TARGET_VIRT_ADDR_SPACE_BITS 32 + +#define NB_MMU_MODES 4 + diff --git a/qemu/target/rh850/cpu.c b/qemu/target/rh850/cpu.c new file mode 100644 index 0000000000..b6b44b28d2 --- /dev/null +++ b/qemu/target/rh850/cpu.c @@ -0,0 +1,473 @@ +/* + * QEMU RH850 CPU + * + * Copyright (c) 2018-2019 iSYSTEM Labs d.o.o. + * Copyright (c) 2023 Quarkslab + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/ctype.h" +#include "cpu.h" +#include "exec/exec-all.h" + +/* RH850 CPU definitions */ + +/* Program registers (rh850_prog_regnames): + * r0 - zero + * r1 - assembler reserved register + * r2 - real-time OS register / address and data variable register + * r3 - stack pointer + * r4 - global pointer + * r5 - text pointer + * r6-r29 - address and data variable registers + * r30 - element pointer + * r31 - link pointer + */ + +const char * const rh850_gp_regnames[] = { + "r0-zero", "r1", "r2", "r3-sp", "r4", "r5", "r6", "r7", + "r8", "r9", "r10 ", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r2 ", + "r24", "r25", "r26", "r27", "r28", "r29", "r30-ep", "r31-lp" +}; + +// Basic system registers +const char * const rh850_sys_regnames[][MAX_SYS_REGS_IN_BANK] = { + +{ // SELECTION ID 0 [5] used to be psw, but now it is stored in flags only + "eipc", "eipsw", "fepc", "fepsw", NULL, NULL, "fpsr", "fpepc", "fpst", "fpcc", + "fpcfg", "fpec", NULL, "eiic", "feic", NULL, "ctpc", "ctpsw", NULL, NULL, + "ctbp", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "eiwr", "fewr", + NULL, "bsel"}, +{ // SELECTION ID 1 + "mcfg0", NULL, "rbase", "ebase", "intbp", "mctl", "pid", "fpipr", NULL, NULL, + NULL, "sccfg", "scbp", +}, +{ // SELECTION ID 2 + "htcfg0",NULL, NULL, NULL, NULL, NULL, "mea", "asid", "mei", NULL, + "ispr", "pmr", "icsr", "intcfg" +}, +{ // SELECTION ID 3 + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL +}, +{ // SELECTION ID 4 + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, "ictagl", "ictagh","icdatl","icdath", + NULL, NULL, NULL, NULL, "icctrl",NULL, "iccfg", NULL, "icerr", NULL +}, +{ // SELECTION ID 5 + "mpm", "mprc", NULL, NULL, "mpbrgn","mptrgn",NULL, NULL, "mca", "mcs" + "mcc", "mcr" +}, +{ // SELECTION ID 6 + "mpla0", "mpua0", "mpat0", NULL, "mpla1", "mpua1", "mpat1", NULL, "mpla2", "mpua2", + "mpat2", NULL, "mpla3", "mpua3", "mpat3", NULL, "mpla4", "mpua4", "mpat4", NULL, + "mpla5", "mpua5", "mpat5", NULL, "mpla6", "mpua6", "mpat6", NULL, "mpla7", "mpua7", + "mpat7", NULL +}, +{ // SELECTION ID 7 + /* MPU function system registers */ + "mpla8", "mpua8", "mpat8", NULL, "mpla9", "mpua9", "mpat9", NULL, "mpla10","mpua10", + "mpat10",NULL, "mpla11", "mpua11", "mpat11",NULL, "mpla12","mpua12","mpat12",NULL, + "mpla13","mpua13","mpat13", NULL, "mpla14","mpua14","mpat14",NULL, "mpla15","mpua15", + "mpat15",NULL +} +}; + +// Where bits are read only, mask is set to 0 +const uint32_t rh850_sys_reg_read_only_masks[][MAX_SYS_REGS_IN_BANK] = { + +{ //SELECTION ID 0 PSW - implemented as registers for each used bit, see cpu_ZF, ... + 0xFFFFFFFF, 0x40078EFF, 0xFFFFFFFF, 0x40078EFF, 0x0, /*0x40018EFF*/ 0, 0xFFEEFFFF, 0xFFFFFFFE, 0x00003F3F, 0x000000FF, + 0x0000031F, 0x00000001, 0x0, 0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0x0000001F, 0x0, 0x0, + 0xFFFFFFFE, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x0 +}, +{ //SELECTION ID 1 + // for MCFG (idx = 0), byte 3 seems to not be writable, at least on devicee used for testing + 0x00000000, 0x0, 0x00000000, 0xFFFFFE01, 0xFFFFFE00, 0x00000003, 0x00000000, 0x0000001F, 0x0, 0x0, + 0x0, 0x000000FF, 0xFFFFFFFC +}, +{ //SELECTION ID 2 + 0x00000000, 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFFFFFF, 0x000003FF, 0x001F073F, 0x0, + 0x00000000, 0x0000FFFF, 0x00000000, 0x00000001 +}, +{ //SELECTION ID 3 + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 +}, +{ //SELECTION ID 4 + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFFFA35, 0xF0FFFF00, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x0, 0x0, 0x0, 0x00020107, 0x0, 0x00000000, 0x0, 0xBF3F7FFD, 0x0 +}, +{ //SELECTION ID 5 + 0x00000003, 0x0000FFFF, 0x0, 0x0, 0x00000000, 0x00000000, 0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x0000013F +}, +{ //SELECTION ID 6 + 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFF, + 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, + 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, + 0x03FF00FF, 0x0 +}, +{ //SELECTION ID 7 + 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFF, + 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, + 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 0xFFFFFFFC, 0xFFFFFFFC, + 0x03FF00FF, 0x0 +} +}; + + +const uint32_t rh850_sys_reg_read_only_values[][MAX_SYS_REGS_IN_BANK] = { +{ //SELECTION ID 0 + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0 +}, +{ //SELECTION ID 1 + 0x4, 0x0, 0x0, 0x0, 0x0, 0x80000000, 0x12345678, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0 +}, +{ //SELECTION ID 2 + 0x00008000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 +}, +{ //SELECTION ID 3 + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 +}, +{ //SELECTION ID 4 + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x00010000, 0x0, 0x00010000, 0x0, 0x0, 0x0 +}, +{ //SELECTION ID 5 + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0 +}, +{ //SELECTION ID 6 + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0 +}, +{ //SELECTION ID 7 + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0 +} +}; + + + +/*Data Buffer Operation Registers (rh850_sys_databuff_regnames): + * sr24, 13 - cbdcr */ +const char * const rh850_sys_databuff_regnames[] = { /* Data buffer operation registers */ + "cbdcr" +}; + +const char * const rh850_excp_names[] = { + "misaligned_fetch", + "fault_fetch", + "illegal_instruction", + "breakpoint", + "misaligned_load", + "fault_load", + "misaligned_store", + "fault_store", + "user_ecall", + "supervisor_ecall", + "hypervisor_ecall", + "machine_ecall", + "exec_page_fault", + "load_page_fault", + "reserved", + "store_page_fault" +}; + +const char * const rh850_intr_names[] = { + "u_software", + "s_software", + "h_software", + "m_software", + "u_timer", + "s_timer", + "h_timer", + "m_timer", + "u_external", + "s_external", + "h_external", + "m_external", + "coprocessor", + "host" +}; + + +void rh850_cpu_set_pc(CPUState *cs, vaddr value) +{ + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + env->pc = value; +} + +vaddr rh850_cpu_get_pc(CPUState *cs) +{ + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + return env->pc; +} + +AddressSpace *cpu_addressspace(CPUState *cs, MemTxAttrs attrs) +{ + return cpu_get_address_space(cs, cpu_asidx_from_attrs(cs, attrs)); +} + + +/* called by qemu's softmmu to fill the qemu tlb */ +static bool rh850_tlb_fill(CPUState *cs, vaddr addr, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr) +{ + int ret; + ret = rh850_cpu_handle_mmu_fault(cs, addr, size, access_type, mmu_idx); + if (ret == TRANSLATE_FAIL) { + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + do_raise_exception_err(env, cs->exception_index, retaddr); + } + return true; +} + + +static void rh850_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +{ + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + env->pc = tb->pc; +} + +static bool rh850_cpu_has_work(CPUState *cs) +{ +#ifndef CONFIG_USER_ONLY + return true; +#else + return true; +#endif +} + +void restore_state_to_opc(CPURH850State *env, TranslationBlock *tb, + target_ulong *data) +{ + env->pc = data[0]; +} + + +static void rh850_raise_exception(CPURH850State *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el) +{ + CPUState *cs = CPU(rh850_env_get_cpu(env)); + + cs->exception_index = excp; + cpu_loop_exit(cs); +} + + +static void rh850_debug_excp_handler(CPUState *cs) +{ + /* Called by core code when a watchpoint or breakpoint fires; + * need to check which one and raise the appropriate exception. + */ + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + CPUWatchpoint *wp_hit = cs->watchpoint_hit; + + if (wp_hit) { + if (wp_hit->flags & BP_CPU) { + // bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0; + // bool same_el = true; + + cs->watchpoint_hit = NULL; + + // env->exception.fsr = arm_debug_exception_fsr(env); + // env->exception.vaddress = wp_hit->hitaddr; + rh850_raise_exception(env, 0, 0, 0); + } + } else { + uint64_t pc = env->pc; + // bool same_el = true; + + /* (1) GDB breakpoints should be handled first. + * (2) Do not raise a CPU exception if no CPU breakpoint has fired, + * since singlestep is also done by generating a debug internal + * exception. + */ + if (!cpu_breakpoint_test(cs, pc, BP_GDB) && + cpu_breakpoint_test(cs, pc, BP_CPU)) { + + rh850_raise_exception(env, 0, 0, 0); + } + } +} + +static bool check_watchpoints(RH850CPU *cpu) +{ + return true; +} + + +static bool rh850_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) +{ + /* Called by core code when a CPU watchpoint fires; need to check if this + * is also an architectural watchpoint match. + */ + RH850CPU *cpu = RH850_CPU(cs); + + return check_watchpoints(cpu); +} + + +static void rh850_cpu_reset(CPUState *cs) +{ + + RH850CPU *cpu = RH850_CPU(cs); + RH850CPUClass *mcc = RH850_CPU_GET_CLASS(cpu); + CPURH850State *env = &cpu->env; + + mcc->parent_reset(cs); + cs->exception_index = EXCP_NONE; + set_default_nan_mode(1, &env->fp_status); + env->pc = 0; // move to direct vector ? (always 0?) + env->ID_flag = 1; // interrupts are disable on reset + env->systemRegs[BANK_ID_BASIC_0][EIPSW_IDX] = 0x20; + env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = 0x20; + env->systemRegs[BANK_ID_BASIC_0][EIIC_IDX] = 0x0; + env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = 0x0; + env->systemRegs[BANK_ID_BASIC_0][PSW_IDX] = 0x20; // reset value of PSW + env->systemRegs[BANK_ID_BASIC_0][CTPSW_IDX] = 0; + env->systemRegs[BANK_ID_BASIC_0][CTBP_IDX] = 0; // only bit 0 must be set to 0 + env->systemRegs[BANK_ID_BASIC_2][ASID_IDX2] = 0; // only bits 31-10 must be set to 0 + env->systemRegs[BANK_ID_BASIC_2][HTCFG0_IDX2] = 0x00018000; // const value + env->systemRegs[BANK_ID_BASIC_2][MEI_IDX2] = 0; // only some bits must be 0 + env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1] = 0; + env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1] = 0; // only bits 8-1 must be 0 + env->systemRegs[BANK_ID_BASIC_1][INTBP_IDX1] = 0; // only bits 8-0 must be 0 + env->systemRegs[BANK_ID_BASIC_1][PID_IDX1] = 0x05000120; // const + env->systemRegs[BANK_ID_BASIC_1][SCCFG_IDX1] = 0; // bits 31-8 must be 0 + env->systemRegs[BANK_ID_BASIC_1][SCBP_IDX1] = 0; // bits 1-0 must be 0 + env->systemRegs[BANK_ID_BASIC_1][MCFG0_IDX1] = 0x4; // bits 31-8 must be 0 + env->systemRegs[BANK_ID_BASIC_1][MCTL_IDX1] = 0x80000000; // bits 31-8 must be 0 + + env->systemRegs[BANK_ID_BASIC_2][FPIPR_IDX1] = 0; + env->systemRegs[BANK_ID_BASIC_2][ISPR_IDX2] = 0; + env->systemRegs[BANK_ID_BASIC_2][PMR_IDX2] = 0; + env->systemRegs[BANK_ID_BASIC_2][ICSR_IDX2] = 0; + env->systemRegs[BANK_ID_BASIC_2][INTCFG_IDX2] = 0; +} + +static void rh850_cpu_realize(struct uc_struct *uc, CPUState *dev) +{ + CPUState *cs = CPU(dev); + + cpu_exec_realizefn(cs); + + qemu_init_vcpu(cs); + + cpu_reset(cs); +} + +static void rh850_cpu_init(struct uc_struct *uc, CPUState *obj) +{ + CPUState *cs = CPU(obj); + RH850CPU *cpu = RH850_CPU(obj); + + /* Set CPU pointers. */ + cpu_set_cpustate_pointers(cpu); + + cs->env_ptr = &cpu->env; + cpu->env.uc = uc; +} + +static void rh850_cpu_class_init(struct uc_struct *uc, CPUClass *c) +{ + RH850CPUClass *mcc = RH850_CPU_CLASS(c); + CPUClass *cc = CPU_CLASS(c); + + mcc->parent_reset = cc->reset; + cc->reset = rh850_cpu_reset; + + cc->has_work = rh850_cpu_has_work; + cc->do_interrupt = rh850_cpu_do_interrupt; + cc->cpu_exec_interrupt = rh850_cpu_exec_interrupt; + cc->set_pc = rh850_cpu_set_pc; + cc->tlb_fill = rh850_tlb_fill; + cc->synchronize_from_tb = rh850_cpu_synchronize_from_tb; + cc->debug_excp_handler = rh850_debug_excp_handler; + cc->debug_check_watchpoint = rh850_debug_check_watchpoint; + +#ifdef CONFIG_USER_ONLY + cc->handle_mmu_fault = rh850_cpu_handle_mmu_fault; +#else + cc->do_unaligned_access = rh850_cpu_do_unaligned_access; + cc->get_phys_page_debug = rh850_cpu_get_phys_page_debug; +#endif +#ifdef CONFIG_TCG + cc->tcg_initialize = rh850_translate_init; +#endif +} + +RH850CPU *cpu_rh850_init(struct uc_struct *uc, const char *cpu_model) +{ + RH850CPU *cpu; + CPUState *cs; + CPUClass *cc; + + cpu = calloc(1, sizeof(*cpu)); + if (cpu == NULL) { + return NULL; + } + + cs = (CPUState *)cpu; + cc = (CPUClass *)&cpu->cc; + cs->cc = cc; + cs->uc = uc; + uc->cpu = (CPUState *)cpu; + + /* init CPUClass */ + cpu_class_init(uc, cc); + + /* init CPUClass */ + rh850_cpu_class_init(uc, cc); + + /* init CPUState */ + cpu_common_initfn(uc, cs); + + /* init CPU */ + rh850_cpu_init(uc, cs); + + /* realize CPU */ + rh850_cpu_realize(uc, cs); + + // init addresss space + cpu_address_space_init(cs, 0, cs->memory); + + return cpu; +} + + + + diff --git a/qemu/target/rh850/cpu.h b/qemu/target/rh850/cpu.h new file mode 100644 index 0000000000..c54ad11599 --- /dev/null +++ b/qemu/target/rh850/cpu.h @@ -0,0 +1,276 @@ +/* + * QEMU RH850 CPU + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017-2018 SiFive, Inc. + * Copyright (c) 2023 Quarkslab + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef RH850_CPU_H +#define RH850_CPU_H + +#define TCG_GUEST_DEFAULT_MO 0 + +//#define TARGET_INSN_START_EXTRA_WORDS 2 + +#define ELF_MACHINE EM_RH850 +#define CPUArchState struct CPURH850State + +#include "qemu-common.h" +#include "hw/core/cpu.h" +#include "exec/cpu-defs.h" +#include "fpu/softfloat.h" + +#define TYPE_RH850_CPU "rh850-cpu" + +#define RH850_CPU_TYPE_SUFFIX "-" TYPE_RH850_CPU +#define RH850_CPU_TYPE_NAME(name) (name RH850_CPU_TYPE_SUFFIX) +#define CPU_RESOLVING_TYPE TYPE_RH850_CPU +#define TYPE_RH850_CPU_ANY RH850_CPU_TYPE_NAME("any") + +#define RV32 ((target_ulong)1 << (TARGET_LONG_BITS - 2)) +#define RV64 ((target_ulong)2 << (TARGET_LONG_BITS - 2)) + +#if defined(TARGET_RH850) +#define RVXLEN RV32 +#elif defined(TARGET_RH85064) +#define RVXLEN RV64 +#endif + +#define RV(x) ((target_ulong)1 << (x - 'A')) + +#define RVI RV('I') +#define RVM RV('M') +#define RVA RV('A') +#define RVF RV('F') +#define RVD RV('D') +#define RVC RV('C') +#define RVS RV('S') +#define RVU RV('U') + +/* S extension denotes that Supervisor mode exists, however it is possible + to have a core that support S mode but does not have an MMU and there + is currently no bit in misa to indicate whether an MMU exists or not + so a cpu features bitfield is required */ +enum { + RH850_FEATURE_MMU +}; + +#define USER_VERSION_2_02_0 0x00020200 +#define PRIV_VERSION_1_09_1 0x00010901 +#define PRIV_VERSION_1_10_0 0x00011000 + +#define TRANSLATE_FAIL 1 +#define TRANSLATE_SUCCESS 0 +#define MMU_USER_IDX 3 + +#define MAX_RH850_PMPS (16) + +typedef struct CPURH850State CPURH850State; + +#include "pmp.h" + +#include "register_indices.h" + +#define NUM_GP_REGS 32 +#define NUM_SYS_REG_BANKS 7 +#define MAX_SYS_REGS_IN_BANK 32 +#define BANK_ID_BASIC_0 0 +#define BANK_ID_BASIC_1 1 +#define BANK_ID_BASIC_2 2 + +struct CPURH850State { + + + target_ulong gpRegs[NUM_GP_REGS]; + target_ulong pc; + target_ulong sysDatabuffRegs[1]; + target_ulong systemRegs[NUM_SYS_REG_BANKS][MAX_SYS_REGS_IN_BANK]; + //target_ulong sysBasicRegs[31]; + //target_ulong sysInterruptRegs[5]; + //uint64_t sysFpuRegs[6]; //using rh850 basic system registers(sr6-sr11), 32-bit or 64-bit precision + //target_ulong sysMpuRegs[56]; + //target_ulong sysCacheRegs[7]; + + // flags contained in PSW register + uint32_t Z_flag; + uint32_t S_flag; + uint32_t OV_flag; + uint32_t CY_flag; + uint32_t SAT_flag; + uint32_t ID_flag; + uint32_t EP_flag; + uint32_t NP_flag; + uint32_t EBV_flag; + uint32_t CU0_flag; + uint32_t CU1_flag; + uint32_t CU2_flag; + uint32_t UM_flag; + + uint32_t features; + uint32_t badaddr; + + target_ulong cpu_LLbit; // register for mutual exclusion (LDL.W, STC.W) + target_ulong cpu_LLAddress; // register for mutual exclusion (LDL.W, STC.W) + + target_ulong load_res; // inst addr for TCG + target_ulong load_val; // inst val for TCG + + float_status fp_status; // not used yet in rh850, left for floating-point support. + + target_ulong fpsr; /* floating-point configuration/status register. */ + + uint32_t exception_cause; + int exception_priority; + bool exception_dv; + + // Unicorn engine + struct uc_struct *uc; +}; + +#define RH850_CPU(obj) ((RH850CPU *)obj) +#define RH850_CPU_CLASS(klass) ((RH850CPUClass *)klass) +#define RH850_CPU_GET_CLASS(obj) (&((RH850CPU *)obj)->cc) + + +/** + * RH850CPUClass: + * @parent_realize: The parent class' realize handler. + * @parent_reset: The parent class' reset handler. + * + * A RH850 CPU model. + */ +typedef struct RH850CPUClass { + /*< private >*/ + CPUClass parent_class; + /*< public >*/ + void (*parent_reset)(CPUState *cpu); +} RH850CPUClass; + +/** + * RH850CPU: + * @env: #CPURH850State + * + * A RH850 CPU. + */ +typedef struct RH850CPU { + /*< private >*/ + CPUState parent_obj; + /*< public >*/ + CPUNegativeOffsetState neg; + CPURH850State env; + + RH850CPUClass cc; +} RH850CPU; + +typedef RH850CPU ArchCPU; + +static inline RH850CPU *rh850_env_get_cpu(CPURH850State *env) +{ + return container_of(env, RH850CPU, env); +} + +static inline int rh850_has_ext(CPURH850State *env, target_ulong ext) +{ // TODO: what does value 'ext' represent?? + //return (env->misa & ext) != 0; + return true; +} + +static inline bool rh850_feature(CPURH850State *env, int feature) +{ + return env->features & (1ULL << feature); +} + +#include "cpu_user.h" +#include "cpu_bits.h" + +extern const char * const rh850_gp_regnames[]; +extern const char * const rh850_sys_regnames[][MAX_SYS_REGS_IN_BANK]; +extern const char * const rh850_sys_databuff_regnames[]; + +extern const char * const rh850_excp_names[]; +extern const char * const rh850_intr_names[]; +extern const uint32_t rh850_sys_reg_read_only_values[][MAX_SYS_REGS_IN_BANK]; +extern const uint32_t rh850_sys_reg_read_only_masks[][MAX_SYS_REGS_IN_BANK]; + +#define ENV_GET_CPU(e) CPU(rh850_env_get_cpu(e)) +#define ENV_OFFSET offsetof(RH850CPU, env) + +void rh850_cpu_do_interrupt(CPUState *cpu); +int rh850_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); +int rh850_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +bool rh850_cpu_exec_interrupt(CPUState *cs, int interrupt_request); +int rh850_cpu_mmu_index(CPURH850State *env, bool ifetch); +hwaddr rh850_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); +void rh850_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, int mmu_idx, + uintptr_t retaddr); +int rh850_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int size, + int rw, int mmu_idx); + +char *rh850_isa_string(RH850CPU *cpu); +void rh850_cpu_list(void); + +#define cpu_init(cpu_model) cpu_generic_init(TYPE_RH850_CPU, cpu_model) +#define cpu_signal_handler cpu_rh850_signal_handler +#define cpu_list rh850_cpu_list +#define cpu_mmu_index rh850_cpu_mmu_index + +void rh850_set_mode(CPURH850State *env, target_ulong newpriv); + +void rh850_translate_init(struct uc_struct *uc); +RH850CPU *cpu_rh850_init(struct uc_struct *uc, const char *cpu_model); +int cpu_rh850_signal_handler(int host_signum, void *pinfo, void *puc); +void QEMU_NORETURN do_raise_exception_err(CPURH850State *env, + uint32_t exception, uintptr_t pc); + +target_ulong cpu_rh850_get_fflags(CPURH850State *env); +void cpu_rh850_set_fflags(CPURH850State *env, target_ulong); +void rh850_cpu_set_pc(CPUState *cs, vaddr value); +vaddr rh850_cpu_get_pc(CPUState *cs); +AddressSpace *cpu_addressspace(CPUState *cs, MemTxAttrs attrs); + +#define TB_FLAGS_MMU_MASK 3 +#define TB_FLAGS_FP_ENABLE MSTATUS_FS + +/* + * This f. is called from tcg_gen_lookup_and_goto_ptr() to obtain PC + * which is then used for TB lookup. + */ +static inline void cpu_get_tb_cpu_state(CPURH850State *env, target_ulong *pc, + target_ulong *cs_base, uint32_t *flags) +{ + *pc = env->pc; + *cs_base = 0; +#ifdef CONFIG_USER_ONLY + *flags = TB_FLAGS_FP_ENABLE; +#else + *flags = cpu_mmu_index(env, 0); +#endif +} + +void csr_write_helper(CPURH850State *env, target_ulong val_to_write, + target_ulong csrno); +target_ulong csr_read_helper(CPURH850State *env, target_ulong csrno); + +#ifndef CONFIG_USER_ONLY +void rh850_set_local_interrupt(RH850CPU *cpu, target_ulong mask, int value); +#endif + +extern const int NUM_GDB_REGS; + +#include "exec/cpu-all.h" + +#endif /* RH850_CPU_H */ diff --git a/qemu/target/rh850/cpu_bits.h b/qemu/target/rh850/cpu_bits.h new file mode 100644 index 0000000000..a3b90298a6 --- /dev/null +++ b/qemu/target/rh850/cpu_bits.h @@ -0,0 +1,431 @@ +/* RH850 PSW constants */ + +#define PSW_Z 0x00000001 +#define PSW_S 0x00000002 +#define PSW_OV 0x00000004 +#define PSW_CY 0x00000008 +#define PSW_SAT 0x00000010 +#define PSW_ID 0x00000020 +#define PSW_EP 0x00000040 +#define PSW_NP 0x00000080 +#define PSW_EBV 0x00008000 +#define PSW_CU0 0x00010000 +#define PSW_UM 0x40000000 + +/* */ + +/* RH850 ISA constants */ + +#define get_field(reg, mask) (((reg) & \ + (target_ulong)(mask)) / ((mask) & ~((mask) << 1))) +#define set_field(reg, mask, val) (((reg) & ~(target_ulong)(mask)) | \ + (((target_ulong)(val) * ((mask) & ~((mask) << 1))) & \ + (target_ulong)(mask))) + +#define PGSHIFT 12 + +#define FSR_RD_SHIFT 5 +#define FSR_RD (0x7 << FSR_RD_SHIFT) + +#define FPEXC_NX 0x01 +#define FPEXC_UF 0x02 +#define FPEXC_OF 0x04 +#define FPEXC_DZ 0x08 +#define FPEXC_NV 0x10 + +#define FSR_AEXC_SHIFT 0 +#define FSR_NVA (FPEXC_NV << FSR_AEXC_SHIFT) +#define FSR_OFA (FPEXC_OF << FSR_AEXC_SHIFT) +#define FSR_UFA (FPEXC_UF << FSR_AEXC_SHIFT) +#define FSR_DZA (FPEXC_DZ << FSR_AEXC_SHIFT) +#define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) +#define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) + +/* CSR numbers */ +#define CSR_FFLAGS 0x1 +#define CSR_FRM 0x2 +#define CSR_FCSR 0x3 +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 +#define CSR_INSTRET 0xc02 +#define CSR_HPMCOUNTER3 0xc03 +#define CSR_HPMCOUNTER4 0xc04 +#define CSR_HPMCOUNTER5 0xc05 +#define CSR_HPMCOUNTER6 0xc06 +#define CSR_HPMCOUNTER7 0xc07 +#define CSR_HPMCOUNTER8 0xc08 +#define CSR_HPMCOUNTER9 0xc09 +#define CSR_HPMCOUNTER10 0xc0a +#define CSR_HPMCOUNTER11 0xc0b +#define CSR_HPMCOUNTER12 0xc0c +#define CSR_HPMCOUNTER13 0xc0d +#define CSR_HPMCOUNTER14 0xc0e +#define CSR_HPMCOUNTER15 0xc0f +#define CSR_HPMCOUNTER16 0xc10 +#define CSR_HPMCOUNTER17 0xc11 +#define CSR_HPMCOUNTER18 0xc12 +#define CSR_HPMCOUNTER19 0xc13 +#define CSR_HPMCOUNTER20 0xc14 +#define CSR_HPMCOUNTER21 0xc15 +#define CSR_HPMCOUNTER22 0xc16 +#define CSR_HPMCOUNTER23 0xc17 +#define CSR_HPMCOUNTER24 0xc18 +#define CSR_HPMCOUNTER25 0xc19 +#define CSR_HPMCOUNTER26 0xc1a +#define CSR_HPMCOUNTER27 0xc1b +#define CSR_HPMCOUNTER28 0xc1c +#define CSR_HPMCOUNTER29 0xc1d +#define CSR_HPMCOUNTER30 0xc1e +#define CSR_HPMCOUNTER31 0xc1f +#define CSR_SSTATUS 0x100 +#define CSR_SIE 0x104 +#define CSR_STVEC 0x105 +#define CSR_SCOUNTEREN 0x106 +#define CSR_SSCRATCH 0x140 +#define CSR_SEPC 0x141 +#define CSR_SCAUSE 0x142 +#define CSR_SBADADDR 0x143 +#define CSR_SIP 0x144 +#define CSR_SPTBR 0x180 +#define CSR_SATP 0x180 +#define CSR_MSTATUS 0x300 +#define CSR_MISA 0x301 +#define CSR_MEDELEG 0x302 +#define CSR_MIDELEG 0x303 +#define CSR_MIE 0x304 +#define CSR_MTVEC 0x305 +#define CSR_MCOUNTEREN 0x306 +#define CSR_MSCRATCH 0x340 +#define CSR_MEPC 0x341 +#define CSR_MCAUSE 0x342 +#define CSR_MBADADDR 0x343 +#define CSR_MIP 0x344 +#define CSR_PMPCFG0 0x3a0 +#define CSR_PMPCFG1 0x3a1 +#define CSR_PMPCFG2 0x3a2 +#define CSR_PMPCFG3 0x3a3 +#define CSR_PMPADDR0 0x3b0 +#define CSR_PMPADDR1 0x3b1 +#define CSR_PMPADDR2 0x3b2 +#define CSR_PMPADDR3 0x3b3 +#define CSR_PMPADDR4 0x3b4 +#define CSR_PMPADDR5 0x3b5 +#define CSR_PMPADDR6 0x3b6 +#define CSR_PMPADDR7 0x3b7 +#define CSR_PMPADDR8 0x3b8 +#define CSR_PMPADDR9 0x3b9 +#define CSR_PMPADDR10 0x3ba +#define CSR_PMPADDR11 0x3bb +#define CSR_PMPADDR12 0x3bc +#define CSR_PMPADDR13 0x3bd +#define CSR_PMPADDR14 0x3be +#define CSR_PMPADDR15 0x3bf +#define CSR_TSELECT 0x7a0 +#define CSR_TDATA1 0x7a1 +#define CSR_TDATA2 0x7a2 +#define CSR_TDATA3 0x7a3 +#define CSR_DCSR 0x7b0 +#define CSR_DPC 0x7b1 +#define CSR_DSCRATCH 0x7b2 +#define CSR_MCYCLE 0xb00 +#define CSR_MINSTRET 0xb02 +#define CSR_MHPMCOUNTER3 0xb03 +#define CSR_MHPMCOUNTER4 0xb04 +#define CSR_MHPMCOUNTER5 0xb05 +#define CSR_MHPMCOUNTER6 0xb06 +#define CSR_MHPMCOUNTER7 0xb07 +#define CSR_MHPMCOUNTER8 0xb08 +#define CSR_MHPMCOUNTER9 0xb09 +#define CSR_MHPMCOUNTER10 0xb0a +#define CSR_MHPMCOUNTER11 0xb0b +#define CSR_MHPMCOUNTER12 0xb0c +#define CSR_MHPMCOUNTER13 0xb0d +#define CSR_MHPMCOUNTER14 0xb0e +#define CSR_MHPMCOUNTER15 0xb0f +#define CSR_MHPMCOUNTER16 0xb10 +#define CSR_MHPMCOUNTER17 0xb11 +#define CSR_MHPMCOUNTER18 0xb12 +#define CSR_MHPMCOUNTER19 0xb13 +#define CSR_MHPMCOUNTER20 0xb14 +#define CSR_MHPMCOUNTER21 0xb15 +#define CSR_MHPMCOUNTER22 0xb16 +#define CSR_MHPMCOUNTER23 0xb17 +#define CSR_MHPMCOUNTER24 0xb18 +#define CSR_MHPMCOUNTER25 0xb19 +#define CSR_MHPMCOUNTER26 0xb1a +#define CSR_MHPMCOUNTER27 0xb1b +#define CSR_MHPMCOUNTER28 0xb1c +#define CSR_MHPMCOUNTER29 0xb1d +#define CSR_MHPMCOUNTER30 0xb1e +#define CSR_MHPMCOUNTER31 0xb1f +#define CSR_MUCOUNTEREN 0x320 +#define CSR_MSCOUNTEREN 0x321 +#define CSR_MHPMEVENT3 0x323 +#define CSR_MHPMEVENT4 0x324 +#define CSR_MHPMEVENT5 0x325 +#define CSR_MHPMEVENT6 0x326 +#define CSR_MHPMEVENT7 0x327 +#define CSR_MHPMEVENT8 0x328 +#define CSR_MHPMEVENT9 0x329 +#define CSR_MHPMEVENT10 0x32a +#define CSR_MHPMEVENT11 0x32b +#define CSR_MHPMEVENT12 0x32c +#define CSR_MHPMEVENT13 0x32d +#define CSR_MHPMEVENT14 0x32e +#define CSR_MHPMEVENT15 0x32f +#define CSR_MHPMEVENT16 0x330 +#define CSR_MHPMEVENT17 0x331 +#define CSR_MHPMEVENT18 0x332 +#define CSR_MHPMEVENT19 0x333 +#define CSR_MHPMEVENT20 0x334 +#define CSR_MHPMEVENT21 0x335 +#define CSR_MHPMEVENT22 0x336 +#define CSR_MHPMEVENT23 0x337 +#define CSR_MHPMEVENT24 0x338 +#define CSR_MHPMEVENT25 0x339 +#define CSR_MHPMEVENT26 0x33a +#define CSR_MHPMEVENT27 0x33b +#define CSR_MHPMEVENT28 0x33c +#define CSR_MHPMEVENT29 0x33d +#define CSR_MHPMEVENT30 0x33e +#define CSR_MHPMEVENT31 0x33f +#define CSR_MVENDORID 0xf11 +#define CSR_MARCHID 0xf12 +#define CSR_MIMPID 0xf13 +#define CSR_MHARTID 0xf14 +#define CSR_CYCLEH 0xc80 +#define CSR_TIMEH 0xc81 +#define CSR_INSTRETH 0xc82 +#define CSR_HPMCOUNTER3H 0xc83 +#define CSR_HPMCOUNTER4H 0xc84 +#define CSR_HPMCOUNTER5H 0xc85 +#define CSR_HPMCOUNTER6H 0xc86 +#define CSR_HPMCOUNTER7H 0xc87 +#define CSR_HPMCOUNTER8H 0xc88 +#define CSR_HPMCOUNTER9H 0xc89 +#define CSR_HPMCOUNTER10H 0xc8a +#define CSR_HPMCOUNTER11H 0xc8b +#define CSR_HPMCOUNTER12H 0xc8c +#define CSR_HPMCOUNTER13H 0xc8d +#define CSR_HPMCOUNTER14H 0xc8e +#define CSR_HPMCOUNTER15H 0xc8f +#define CSR_HPMCOUNTER16H 0xc90 +#define CSR_HPMCOUNTER17H 0xc91 +#define CSR_HPMCOUNTER18H 0xc92 +#define CSR_HPMCOUNTER19H 0xc93 +#define CSR_HPMCOUNTER20H 0xc94 +#define CSR_HPMCOUNTER21H 0xc95 +#define CSR_HPMCOUNTER22H 0xc96 +#define CSR_HPMCOUNTER23H 0xc97 +#define CSR_HPMCOUNTER24H 0xc98 +#define CSR_HPMCOUNTER25H 0xc99 +#define CSR_HPMCOUNTER26H 0xc9a +#define CSR_HPMCOUNTER27H 0xc9b +#define CSR_HPMCOUNTER28H 0xc9c +#define CSR_HPMCOUNTER29H 0xc9d +#define CSR_HPMCOUNTER30H 0xc9e +#define CSR_HPMCOUNTER31H 0xc9f +#define CSR_MCYCLEH 0xb80 +#define CSR_MINSTRETH 0xb82 +#define CSR_MHPMCOUNTER3H 0xb83 +#define CSR_MHPMCOUNTER4H 0xb84 +#define CSR_MHPMCOUNTER5H 0xb85 +#define CSR_MHPMCOUNTER6H 0xb86 +#define CSR_MHPMCOUNTER7H 0xb87 +#define CSR_MHPMCOUNTER8H 0xb88 +#define CSR_MHPMCOUNTER9H 0xb89 +#define CSR_MHPMCOUNTER10H 0xb8a +#define CSR_MHPMCOUNTER11H 0xb8b +#define CSR_MHPMCOUNTER12H 0xb8c +#define CSR_MHPMCOUNTER13H 0xb8d +#define CSR_MHPMCOUNTER14H 0xb8e +#define CSR_MHPMCOUNTER15H 0xb8f +#define CSR_MHPMCOUNTER16H 0xb90 +#define CSR_MHPMCOUNTER17H 0xb91 +#define CSR_MHPMCOUNTER18H 0xb92 +#define CSR_MHPMCOUNTER19H 0xb93 +#define CSR_MHPMCOUNTER20H 0xb94 +#define CSR_MHPMCOUNTER21H 0xb95 +#define CSR_MHPMCOUNTER22H 0xb96 +#define CSR_MHPMCOUNTER23H 0xb97 +#define CSR_MHPMCOUNTER24H 0xb98 +#define CSR_MHPMCOUNTER25H 0xb99 +#define CSR_MHPMCOUNTER26H 0xb9a +#define CSR_MHPMCOUNTER27H 0xb9b +#define CSR_MHPMCOUNTER28H 0xb9c +#define CSR_MHPMCOUNTER29H 0xb9d +#define CSR_MHPMCOUNTER30H 0xb9e +#define CSR_MHPMCOUNTER31H 0xb9f + +/* mstatus bits */ +#define MSTATUS_UIE 0x00000001 +#define MSTATUS_SIE 0x00000002 +#define MSTATUS_HIE 0x00000004 +#define MSTATUS_MIE 0x00000008 +#define MSTATUS_UPIE 0x00000010 +#define MSTATUS_SPIE 0x00000020 +#define MSTATUS_HPIE 0x00000040 +#define MSTATUS_MPIE 0x00000080 +#define MSTATUS_SPP 0x00000100 +#define MSTATUS_HPP 0x00000600 +#define MSTATUS_MPP 0x00001800 +#define MSTATUS_FS 0x00006000 +#define MSTATUS_XS 0x00018000 +#define MSTATUS_MPRV 0x00020000 +#define MSTATUS_PUM 0x00040000 /* until: priv-1.9.1 */ +#define MSTATUS_SUM 0x00040000 /* since: priv-1.10 */ +#define MSTATUS_MXR 0x00080000 +#define MSTATUS_VM 0x1F000000 /* until: priv-1.9.1 */ +#define MSTATUS_TVM 0x00100000 /* since: priv-1.10 */ +#define MSTATUS_TW 0x20000000 /* since: priv-1.10 */ +#define MSTATUS_TSR 0x40000000 /* since: priv-1.10 */ + +#define MSTATUS64_UXL 0x0000000300000000ULL +#define MSTATUS64_SXL 0x0000000C00000000ULL + +#define MSTATUS32_SD 0x80000000 +#define MSTATUS64_SD 0x8000000000000000ULL + +#if defined(TARGET_RH850) +#define MSTATUS_SD MSTATUS32_SD +#endif + +/* sstatus bits */ +#define SSTATUS_UIE 0x00000001 +#define SSTATUS_SIE 0x00000002 +#define SSTATUS_UPIE 0x00000010 +#define SSTATUS_SPIE 0x00000020 +#define SSTATUS_SPP 0x00000100 +#define SSTATUS_FS 0x00006000 +#define SSTATUS_XS 0x00018000 +#define SSTATUS_PUM 0x00040000 /* until: priv-1.9.1 */ +#define SSTATUS_SUM 0x00040000 /* since: priv-1.10 */ +#define SSTATUS_MXR 0x00080000 + +#define SSTATUS32_SD 0x80000000 +#define SSTATUS64_SD 0x8000000000000000ULL + +#if defined(TARGET_RH850) +#define SSTATUS_SD SSTATUS32_SD +#endif + +/* irqs */ +#define MIP_SSIP (1 << IRQ_S_SOFT) +#define MIP_HSIP (1 << IRQ_H_SOFT) +#define MIP_MSIP (1 << IRQ_M_SOFT) +#define MIP_STIP (1 << IRQ_S_TIMER) +#define MIP_HTIP (1 << IRQ_H_TIMER) +#define MIP_MTIP (1 << IRQ_M_TIMER) +#define MIP_SEIP (1 << IRQ_S_EXT) +#define MIP_HEIP (1 << IRQ_H_EXT) +#define MIP_MEIP (1 << IRQ_M_EXT) + +#define SIP_SSIP MIP_SSIP +#define SIP_STIP MIP_STIP +#define SIP_SEIP MIP_SEIP + +#define PRV_U 0 +#define PRV_S 1 +#define PRV_H 2 +#define PRV_M 3 + +/* privileged ISA 1.9.1 VM modes (mstatus.vm) */ +#define VM_1_09_MBARE 0 +#define VM_1_09_MBB 1 +#define VM_1_09_MBBID 2 +#define VM_1_09_SV32 8 +#define VM_1_09_SV39 9 +#define VM_1_09_SV48 10 + +/* privileged ISA 1.10.0 VM modes (satp.mode) */ +#define VM_1_10_MBARE 0 +#define VM_1_10_SV32 1 +#define VM_1_10_SV39 8 +#define VM_1_10_SV48 9 +#define VM_1_10_SV57 10 +#define VM_1_10_SV64 11 + +/* privileged ISA interrupt causes */ +#define IRQ_U_SOFT 0 /* since: priv-1.10 */ +#define IRQ_S_SOFT 1 +#define IRQ_H_SOFT 2 /* until: priv-1.9.1 */ +#define IRQ_M_SOFT 3 /* until: priv-1.9.1 */ +#define IRQ_U_TIMER 4 /* since: priv-1.10 */ +#define IRQ_S_TIMER 5 +#define IRQ_H_TIMER 6 /* until: priv-1.9.1 */ +#define IRQ_M_TIMER 7 /* until: priv-1.9.1 */ +#define IRQ_U_EXT 8 /* since: priv-1.10 */ +#define IRQ_S_EXT 9 +#define IRQ_H_EXT 10 /* until: priv-1.9.1 */ +#define IRQ_M_EXT 11 /* until: priv-1.9.1 */ +#define IRQ_X_COP 12 /* non-standard */ + +/* Default addresses */ +#define DEFAULT_RSTVEC 0x00000000 + +/* RV32 satp field masks */ +#define SATP32_MODE 0x80000000 +#define SATP32_ASID 0x7fc00000 +#define SATP32_PPN 0x003fffff + +/* RV64 satp field masks */ +#define SATP64_MODE 0xF000000000000000ULL +#define SATP64_ASID 0x0FFFF00000000000ULL +#define SATP64_PPN 0x00000FFFFFFFFFFFULL + +#if defined(TARGET_RH850) +#define SATP_MODE SATP32_MODE +#define SATP_ASID SATP32_ASID +#define SATP_PPN SATP32_PPN +#endif + +/* RH850 Exception Codes */ +#define EXCP_NONE -1 /* not a real RH850 exception code */ +#define RH850_EXCP_INST_ADDR_MIS 0x0 +#define RH850_EXCP_INST_ACCESS_FAULT 0x1 +#define RH850_EXCP_ILLEGAL_INST 0x2 +#define RH850_EXCP_BREAKPOINT 0x3 +#define RH850_EXCP_LOAD_ADDR_MIS 0x4 +#define RH850_EXCP_LOAD_ACCESS_FAULT 0x5 +#define RH850_EXCP_STORE_AMO_ADDR_MIS 0x6 +#define RH850_EXCP_STORE_AMO_ACCESS_FAULT 0x7 +#define RH850_EXCP_U_ECALL 0x8 /* for convenience, report all + ECALLs as this, handler + fixes */ +#define RH850_EXCP_S_ECALL 0x9 +#define RH850_EXCP_H_ECALL 0xa +#define RH850_EXCP_M_ECALL 0xb +#define RH850_EXCP_INST_PAGE_FAULT 0xc /* since: priv-1.10.0 */ +#define RH850_EXCP_LOAD_PAGE_FAULT 0xd /* since: priv-1.10.0 */ +#define RH850_EXCP_STORE_PAGE_FAULT 0xf /* since: priv-1.10.0 */ +#define RH850_EXCP_FETRAP 0x10 +#define RH850_EXCP_TRAP 0x11 +#define RH850_EXCP_RIE 0x12 +#define RH850_EXCP_SYSCALL 0x13 +#define RH850_EXCP_EIINT 0x14 +#define RH850_EXCP_FEINT 0x15 +#define RH850_EXCP_FENMI 0x16 + +/* Specific interrupts (FENMI, FEINT, EIINT). */ +#define RH850_INT_FENMI CPU_INTERRUPT_TGT_EXT_0 /* Exception handler address is table-based */ +#define RH850_INT_FEINT CPU_INTERRUPT_TGT_EXT_1 /* Defines a non-maskable FE interrupt */ +#define RH850_INT_EIINT CPU_INTERRUPT_TGT_EXT_2 /* Defines a maskable FE interrupt */ + +#define RH850_EXCP_INT_FLAG 0x80000000 +#define RH850_EXCP_INT_MASK 0x7fffffff + + +/* page table entry (PTE) fields */ +#define PTE_V 0x001 /* Valid */ +#define PTE_R 0x002 /* Read */ +#define PTE_W 0x004 /* Write */ +#define PTE_X 0x008 /* Execute */ +#define PTE_U 0x010 /* User */ +#define PTE_G 0x020 /* Global */ +#define PTE_A 0x040 /* Accessed */ +#define PTE_D 0x080 /* Dirty */ +#define PTE_SOFT 0x300 /* Reserved for Software */ + +#define PTE_PPN_SHIFT 10 + +#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V) diff --git a/qemu/target/rh850/cpu_user.h b/qemu/target/rh850/cpu_user.h new file mode 100644 index 0000000000..c2199610ab --- /dev/null +++ b/qemu/target/rh850/cpu_user.h @@ -0,0 +1,13 @@ +#define xRA 1 /* return address (aka link register) */ +#define xSP 2 /* stack pointer */ +#define xGP 3 /* global pointer */ +#define xTP 4 /* thread pointer */ + +#define xA0 10 /* gpr[10-17] are syscall arguments */ +#define xA1 11 +#define xA2 12 +#define xA3 13 +#define xA4 14 +#define xA5 15 +#define xA6 16 +#define xA7 17 /* syscall number goes here */ diff --git a/qemu/target/rh850/fpu_helper.c b/qemu/target/rh850/fpu_helper.c new file mode 100644 index 0000000000..d99c8613dd --- /dev/null +++ b/qemu/target/rh850/fpu_helper.c @@ -0,0 +1,823 @@ +/* + * RH850 FPU Emulation Helpers for QEMU. + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include +#include "cpu.h" +#include "qemu/host-utils.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" + +target_ulong cpu_rh850_get_fflags(CPURH850State *env) +{ + int soft = get_float_exception_flags(&env->fp_status); + target_ulong hard = 0; + + hard |= (soft & float_flag_inexact) ? FPEXC_NX : 0; + hard |= (soft & float_flag_underflow) ? FPEXC_UF : 0; + hard |= (soft & float_flag_overflow) ? FPEXC_OF : 0; + hard |= (soft & float_flag_divbyzero) ? FPEXC_DZ : 0; + hard |= (soft & float_flag_invalid) ? FPEXC_NV : 0; + + return hard; +} + +void cpu_rh850_set_fflags(CPURH850State *env, target_ulong hard) +{ + int soft = 0; + + soft |= (hard & FPEXC_NX) ? float_flag_inexact : 0; + soft |= (hard & FPEXC_UF) ? float_flag_underflow : 0; + soft |= (hard & FPEXC_OF) ? float_flag_overflow : 0; + soft |= (hard & FPEXC_DZ) ? float_flag_divbyzero : 0; + soft |= (hard & FPEXC_NV) ? float_flag_invalid : 0; + + set_float_exception_flags(soft, &env->fp_status); +} + +void helper_set_rounding_mode(CPURH850State *env, uint32_t rm) +{ + int softrm; + + if (rm == 7) { + rm = 0; //env->frm; + } + switch (rm) { + case 0: + softrm = float_round_nearest_even; + break; + case 1: + softrm = float_round_to_zero; + break; + case 2: + softrm = float_round_down; + break; + case 3: + softrm = float_round_up; + break; + case 4: + softrm = float_round_ties_away; + break; + default: + qemu_log_mask(CPU_LOG_INT, "%s\n", __func__); + do_raise_exception_err(env, RH850_EXCP_ILLEGAL_INST, GETPC()); + } + + set_float_rounding_mode(softrm, &env->fp_status); +} + +/* Propagate softfloat flags into FPSR. */ +void helper_f_sync_fflags(CPURH850State *env) +{ + target_ulong flags; + + /* Retrieve softfloat flags. */ + flags = cpu_rh850_get_fflags(env); + + /* Handle inexact flag. */ + if (flags & FPEXC_NX) + { + if (env->fpsr & (1 << 5)) + { + /* Inexact exception allowed, set cause bit. */ + env->fpsr |= (1 << 10); + } + else + { + /* Set preservation bit. */ + flags |= 1 << 0; + } + } + + /* Handle underflow flag. */ + if (flags & FPEXC_UF) + { + if (env->fpsr & (1 << 6)) + { + /* Underflow exception allowed, set cause bit. */ + env->fpsr |= (1 << 11); + } + else + { + /* Set preservation bit. */ + env->fpsr |= 1 << 1; + } + } + + /* Handle overflow flag. */ + if (flags & FPEXC_OF) + { + if (env->fpsr & (1 << 7)) + { + /* Overflow exception allowed, set cause bit. */ + env->fpsr |= (1 << 12); + } + else + { + /* Set preservation bit. */ + env->fpsr |= 1 << 2; + } + } + + /* Handle div-by-zero flag. */ + if (flags & FPEXC_DZ) + { + if (env->fpsr & (1 << 8)) + { + /* Div-by-zero exception allowed, set cause bit. */ + env->fpsr |= (1 << 13); + } + else + { + /* Set preservation bit. */ + env->fpsr |= 1 << 3; + } + } + + /* Handle invalid flag. */ + if (flags & FPEXC_NV) + { + if (env->fpsr & (1 << 9)) + { + /* Div-by-zero exception allowed, set cause bit. */ + env->fpsr |= (1 << 14); + } + else + { + /* Set preservation bit. */ + env->fpsr |= 1 << 4; + } + } +} + +/** + * FPU flags checks + **/ + +uint32_t HELPER(f32_is_normal)(CPURH850State *env, uint32_t frs1) +{ + return (uint32_t)float32_is_normal(frs1); +} + +uint32_t HELPER(f32_is_zero_or_normal)(CPURH850State *env, uint32_t frs1) +{ + return (uint32_t)float32_is_zero_or_normal(frs1); +} + +uint32_t HELPER(f32_is_infinity)(CPURH850State *env, uint32_t frs1) +{ + return (uint32_t)float32_is_infinity(frs1); +} + + + +uint64_t helper_fmadd_s(CPURH850State *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3) +{ + return float32_muladd(frs1, frs2, frs3, 0, &env->fp_status); +} + +uint64_t helper_fmadd_d(CPURH850State *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3) +{ + return float64_muladd(frs1, frs2, frs3, 0, &env->fp_status); +} + +uint64_t helper_fmsub_s(CPURH850State *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3) +{ + return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c, + &env->fp_status); +} + +uint64_t helper_fmsub_d(CPURH850State *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3) +{ + return float64_muladd(frs1, frs2, frs3, float_muladd_negate_c, + &env->fp_status); +} + +uint64_t helper_fnmsub_s(CPURH850State *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3) +{ + return float32_muladd(frs1, frs2, frs3, float_muladd_negate_product, + &env->fp_status); +} + +uint64_t helper_fnmsub_d(CPURH850State *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3) +{ + return float64_muladd(frs1, frs2, frs3, float_muladd_negate_product, + &env->fp_status); +} + +uint64_t helper_fnmadd_s(CPURH850State *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3) +{ + return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c | + float_muladd_negate_product, &env->fp_status); +} + +uint64_t helper_fnmadd_d(CPURH850State *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3) +{ + return float64_muladd(frs1, frs2, frs3, float_muladd_negate_c | + float_muladd_negate_product, &env->fp_status); +} + + +/** + * Floating-point simple precision helpers. + **/ + +uint32_t HELPER(fadd_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_add(frs1, frs2, &env->fp_status); +} + +uint32_t HELPER(fsub_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_sub(frs1, frs2, &env->fp_status); +} + +uint32_t HELPER(fmul_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_mul(frs1, frs2, &env->fp_status); +} + +uint32_t HELPER(fmax_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_maxnum(frs1, frs2, &env->fp_status); +} + +uint32_t HELPER(fmin_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_minnum(frs1, frs2, &env->fp_status); +} + +uint32_t HELPER(fdiv_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_div(frs1, frs2, &env->fp_status); +} + +uint32_t HELPER(fabs_s)(CPURH850State *env, uint32_t frs1) +{ + return float32_abs(frs1); +} + +uint32_t HELPER(fneg_s)(CPURH850State *env, uint32_t frs1) +{ + return (frs1^0x80000000); +} + +uint32_t HELPER(ftrnc_sw)(CPURH850State *env, uint32_t frs1) +{ + return float32_to_int32_round_to_zero(frs1, &env->fp_status); +} + +uint32_t HELPER(fceil_sw)(CPURH850State *env, uint32_t frs1) +{ + /* Convert to int32 and round to positive. */ + return float32_to_int32_scalbn(frs1, float_round_up, 0, &env->fp_status); +} + +uint32_t HELPER(ffloor_sw)(CPURH850State *env, uint32_t frs1) +{ + /* Convert to int32 and round to positive. */ + return float32_to_int32_scalbn(frs1, float_round_down, 0, &env->fp_status); +} + +uint32_t HELPER(fcvt_sw)(CPURH850State *env, uint32_t frs1) +{ + /* Convert to int32 and round based on fp_status. */ + return float32_to_int32(frs1, &env->fp_status); +} + +uint32_t HELPER(fcvt_ls)(CPURH850State *env, uint64_t frs1) +{ + /* Convert int64 to float32 and round based on fp_status. */ + return int64_to_float32(frs1, &env->fp_status); +} + +uint32_t HELPER(fcvt_hs)(CPURH850State *env, uint32_t frs1) +{ + /* Convert lower half of frs1 into float32. */ + return int16_to_float32((int16_t)(frs1&0xffff), &env->fp_status); +} + +uint32_t HELPER(fcvt_sh)(CPURH850State *env, uint32_t frs1) +{ + /* Convert float32 to int16_t, zero-extended. */ + return float32_to_int16(frs1, &env->fp_status) & 0xffff; +} + +uint32_t HELPER(fcvt_ws)(CPURH850State *env, uint32_t frs1) +{ + /* Convert to float32 and round based on fp_status. */ + return int32_to_float32(frs1, &env->fp_status); +} + +uint32_t HELPER(ftrnc_suw)(CPURH850State *env, uint32_t frs1) +{ + return float32_to_uint32_round_to_zero(frs1, &env->fp_status); +} + +uint32_t HELPER(fceil_suw)(CPURH850State *env, uint32_t frs1) +{ + /* Convert to int32 and round to positive. */ + return float32_to_uint32_scalbn(frs1, float_round_up, 0, &env->fp_status); +} + +uint32_t HELPER(ffloor_suw)(CPURH850State *env, uint32_t frs1) +{ + /* Convert to int32 and round to positive. */ + return float32_to_uint32_scalbn(frs1, float_round_down, 0, &env->fp_status); +} + +uint32_t HELPER(fcvt_suw)(CPURH850State *env, uint32_t frs1) +{ + /* Convert to int32 and round based on fp_status. */ + return float32_to_uint32(frs1, &env->fp_status); +} + +uint32_t HELPER(fcvt_uws)(CPURH850State *env, uint32_t frs1) +{ + /* Convert from uint32 to float32 and round based on fp_status. */ + return uint32_to_float32(frs1, &env->fp_status); +} + +uint32_t HELPER(fcvt_uls)(CPURH850State *env, uint64_t frs1) +{ + /* Convert uint64 to float32 and round based on fp_status. */ + return uint64_to_float32(frs1, &env->fp_status); +} + +uint64_t HELPER(ftrnc_sl)(CPURH850State *env, uint32_t frs1) +{ + return float32_to_int64_round_to_zero(frs1, &env->fp_status); +} + +uint64_t HELPER(fceil_sl)(CPURH850State *env, uint32_t frs1) +{ + /* Convert float32 to int64 and round to upper value. */ + return float32_to_int64_scalbn(frs1, float_round_up, 0, &env->fp_status); +} + +uint64_t HELPER(ffloor_sl)(CPURH850State *env, uint32_t frs1) +{ + /* Convert float32 to int64 and round to lower value. */ + return float32_to_int64_scalbn(frs1, float_round_down, 0, &env->fp_status); +} + +uint64_t HELPER(fcvt_sl)(CPURH850State *env, uint32_t frs1) +{ + /* Convert float32 to int64. */ + return float32_to_int64(frs1, &env->fp_status); +} + +uint64_t HELPER(ftrnc_sul)(CPURH850State *env, uint32_t frs1) +{ + return float32_to_uint64_round_to_zero(frs1, &env->fp_status); +} + +uint64_t HELPER(fceil_sul)(CPURH850State *env, uint32_t frs1) +{ + /* Convert float32 to uint64 and round to upper value. */ + return float32_to_uint64_scalbn(frs1, float_round_up, 0, &env->fp_status); +} + +uint64_t HELPER(ffloor_sul)(CPURH850State *env, uint32_t frs1) +{ + /* Convert float32 to uint64 and round to lower value. */ + return float32_to_uint64_scalbn(frs1, float_round_down, 0, &env->fp_status); +} + +uint64_t HELPER(fcvt_sul)(CPURH850State *env, uint32_t frs1) +{ + /* Convert float32 to uint64. */ + return float32_to_uint64(frs1, &env->fp_status); +} + +uint32_t HELPER(fsqrt_s)(CPURH850State *env, uint32_t frs1) +{ + return float32_sqrt(frs1, &env->fp_status); +} + +uint32_t HELPER(frecip_s)(CPURH850State *env, uint32_t frs1) +{ + /* Compute 1/x (0x3f800000 = float32(1.1)). */ + return float32_div(0x3f800000, frs1, &env->fp_status); +} + +uint32_t HELPER(frsqrt_s)(CPURH850State *env, uint32_t frs1) +{ + /* Compute 1/sqrt(x). */ + return HELPER(frecip_s)(env, float32_sqrt(frs1, &env->fp_status)); +} + +uint32_t HELPER(f_is_nan_s)(CPURH850State *env, uint32_t frs1) +{ + /* Check if float32 is NaN. */ + return float32_is_any_nan(frs1); +} + +uint32_t helper_fle_s(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_le(frs1, frs2, &env->fp_status); +} + +uint32_t helper_flt_s(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_lt(frs1, frs2, &env->fp_status); +} + +uint32_t helper_feq_s(CPURH850State *env, uint32_t frs1, uint32_t frs2) +{ + return float32_eq_quiet(frs1, frs2, &env->fp_status); +} + + +uint32_t HELPER(fmaf_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2, uint32_t frs3) +{ + /* Compute (frs1 * frs2) + frs3 */ + return float32_muladd(frs1, frs2, frs3, 0, &env->fp_status); +} + +uint32_t HELPER(fmsf_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2, uint32_t frs3) +{ + /* Compute (frs1 * frs2) - frs3 */ + return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c, &env->fp_status); +} + +uint32_t HELPER(fnmaf_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2, uint32_t frs3) +{ + /* Compute (frs1 * frs2) + frs3 */ + return float32_muladd(frs1, frs2, frs3, float_muladd_negate_result, &env->fp_status); +} + +uint32_t HELPER(fnmsf_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2, uint32_t frs3) +{ + /* Compute (frs1 * frs2) - frs3 */ + return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c | float_muladd_negate_result, &env->fp_status); +} + + + +target_ulong helper_fcvt_w_s(CPURH850State *env, uint64_t frs1) +{ + return float32_to_int32(frs1, &env->fp_status); +} + +target_ulong helper_fcvt_wu_s(CPURH850State *env, uint64_t frs1) +{ + return (int32_t)float32_to_uint32(frs1, &env->fp_status); +} + +#if defined(TARGET_RH85064) +uint64_t helper_fcvt_l_s(CPURH850State *env, uint64_t frs1) +{ + return float32_to_int64(frs1, &env->fp_status); +} + +uint64_t helper_fcvt_lu_s(CPURH850State *env, uint64_t frs1) +{ + return float32_to_uint64(frs1, &env->fp_status); +} +#endif + +uint64_t helper_fcvt_s_w(CPURH850State *env, target_ulong rs1) +{ + return int32_to_float32((int32_t)rs1, &env->fp_status); +} + +uint64_t helper_fcvt_s_wu(CPURH850State *env, target_ulong rs1) +{ + return uint32_to_float32((uint32_t)rs1, &env->fp_status); +} + +#if defined(TARGET_RH85064) +uint64_t helper_fcvt_s_l(CPURH850State *env, uint64_t rs1) +{ + return int64_to_float32(rs1, &env->fp_status); +} + +uint64_t helper_fcvt_s_lu(CPURH850State *env, uint64_t rs1) +{ + return uint64_to_float32(rs1, &env->fp_status); +} +#endif + +target_ulong helper_fclass_s(uint64_t frs1) +{ + float32 f = frs1; + bool sign = float32_is_neg(f); + + if (float32_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float32_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float32_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float32_is_any_nan(f)) { + float_status s = { 0 }; /* for snan_bit_is_one */ + return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +/** + * Floating-point double precision helpers. + **/ + +uint64_t HELPER(fadd_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_add(frs1, frs2, &env->fp_status); +} + +uint64_t HELPER(fsub_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_sub(frs1, frs2, &env->fp_status); +} + +uint64_t HELPER(fmul_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_mul(frs1, frs2, &env->fp_status); +} + +uint64_t HELPER(fmax_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_maxnum(frs1, frs2, &env->fp_status); +} + +uint64_t HELPER(fmin_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_minnum(frs1, frs2, &env->fp_status); +} + +uint64_t HELPER(fdiv_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_div(frs1, frs2, &env->fp_status); +} + +uint64_t HELPER(fabs_d)(CPURH850State *env, uint64_t frs1) +{ + return float64_abs(frs1); +} + +uint64_t HELPER(fneg_d)(CPURH850State *env, uint64_t frs1) +{ + return (frs1 ^ 0x8000000000000000); +} + +uint32_t HELPER(ftrnc_dw)(CPURH850State *env, uint64_t frs1) +{ + return float64_to_int32_round_to_zero(frs1, &env->fp_status); +} + +uint32_t HELPER(fceil_dw)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to int32 and round to upper value. */ + return float64_to_int32_scalbn(frs1, float_round_up, 0, &env->fp_status); +} + +uint32_t HELPER(ffloor_dw)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to int32 and round to lower value. */ + return float64_to_int32_scalbn(frs1, float_round_down, 0, &env->fp_status); +} + +uint32_t HELPER(fcvt_dw)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to int32. */ + return float64_to_int32(frs1, &env->fp_status); +} + +uint32_t HELPER(ftrnc_duw)(CPURH850State *env, uint64_t frs1) +{ + return float64_to_uint32_round_to_zero(frs1, &env->fp_status); +} + +uint32_t HELPER(fceil_duw)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to uint32 and round to upper value. */ + return float64_to_uint32_scalbn(frs1, float_round_up, 0, &env->fp_status); +} + +uint32_t HELPER(ffloor_duw)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to uint32 and round to lower value. */ + return float64_to_uint32_scalbn(frs1, float_round_down, 0, &env->fp_status); +} + +uint32_t HELPER(fcvt_duw)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to uint32. */ + return float64_to_uint32(frs1, &env->fp_status); +} + +uint64_t HELPER(fcvt_wd)(CPURH850State *env, uint32_t frs1) +{ + /* Convert int32 to float64. */ + return int32_to_float64(frs1, &env->fp_status); +} + +uint64_t HELPER(fcvt_ld)(CPURH850State *env, uint64_t frs1) +{ + /* Convert int32 to float64. */ + return int64_to_float64(frs1, &env->fp_status); +} + +uint64_t HELPER(fcvt_sd)(CPURH850State *env, uint32_t frs1) +{ + /* Convert float32 to float64. */ + return float32_to_float64(frs1, &env->fp_status); +} + +uint64_t HELPER(fcvt_uwd)(CPURH850State *env, uint32_t frs1) +{ + /* Convert int32 to float64. */ + return uint32_to_float64(frs1, &env->fp_status); +} + +uint64_t HELPER(fcvt_uld)(CPURH850State *env, uint64_t frs1) +{ + /* Convert int32 to float64. */ + return uint64_to_float64(frs1, &env->fp_status); +} + + + +uint64_t HELPER(ftrnc_dl)(CPURH850State *env, uint64_t frs1) +{ + return float64_to_int64_round_to_zero(frs1, &env->fp_status); +} + +uint64_t HELPER(fceil_dl)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to int64 and round to upper value. */ + return float64_to_int64_scalbn(frs1, float_round_up, 0, &env->fp_status); +} + +uint64_t HELPER(ffloor_dl)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to int64 and round to lower value. */ + return float64_to_int64_scalbn(frs1, float_round_down, 0, &env->fp_status); +} + +uint64_t HELPER(fcvt_dl)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to int64. */ + return float64_to_int64(frs1, &env->fp_status); +} + +uint64_t HELPER(ftrnc_dul)(CPURH850State *env, uint64_t frs1) +{ + return float64_to_uint64_round_to_zero(frs1, &env->fp_status); +} + +uint64_t HELPER(fceil_dul)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to uint64 and round to upper value. */ + return float64_to_uint64_scalbn(frs1, float_round_up, 0, &env->fp_status); +} + +uint64_t HELPER(ffloor_dul)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to uint64 and round to lower value. */ + return float64_to_uint64_scalbn(frs1, float_round_down, 0, &env->fp_status); +} + +uint64_t HELPER(fcvt_dul)(CPURH850State *env, uint64_t frs1) +{ + /* Convert float64 to uint64. */ + return float64_to_uint64(frs1, &env->fp_status); +} + +uint64_t HELPER(fsqrt_d)(CPURH850State *env, uint64_t frs1) +{ + return float64_sqrt(frs1, &env->fp_status); +} + +uint64_t HELPER(frecip_d)(CPURH850State *env, uint64_t frs1) +{ + /* Compute 1/x (0x3ff0000000000000 = float64(1.1)). */ + return float64_div(0x3ff0000000000000, frs1, &env->fp_status); +} + +uint64_t HELPER(frsqrt_d)(CPURH850State *env, uint64_t frs1) +{ + /* Compute 1/sqrt(x). */ + return HELPER(frecip_d)(env, float64_sqrt(frs1, &env->fp_status)); +} + +uint32_t HELPER(f_is_nan_d)(CPURH850State *env, uint64_t frs1) +{ + /* Check if float64 is NaN. */ + return float64_is_any_nan(frs1); +} + + + +uint64_t helper_fcvt_s_d(CPURH850State *env, uint64_t rs1) +{ + return float64_to_float32(rs1, &env->fp_status); +} + +uint64_t helper_fcvt_d_s(CPURH850State *env, uint64_t rs1) +{ + return float32_to_float64(rs1, &env->fp_status); +} + +uint32_t helper_fle_d(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_le(frs1, frs2, &env->fp_status); +} + +uint32_t helper_flt_d(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_lt(frs1, frs2, &env->fp_status); +} + +uint32_t helper_feq_d(CPURH850State *env, uint64_t frs1, uint64_t frs2) +{ + return float64_eq_quiet(frs1, frs2, &env->fp_status); +} + +target_ulong helper_fcvt_w_d(CPURH850State *env, uint64_t frs1) +{ + return float64_to_int32(frs1, &env->fp_status); +} + +target_ulong helper_fcvt_wu_d(CPURH850State *env, uint64_t frs1) +{ + return (int32_t)float64_to_uint32(frs1, &env->fp_status); +} + +#if defined(TARGET_RH85064) +uint64_t helper_fcvt_l_d(CPURH850State *env, uint64_t frs1) +{ + return float64_to_int64(frs1, &env->fp_status); +} + +uint64_t helper_fcvt_lu_d(CPURH850State *env, uint64_t frs1) +{ + return float64_to_uint64(frs1, &env->fp_status); +} +#endif + +uint64_t helper_fcvt_d_w(CPURH850State *env, target_ulong rs1) +{ + return int32_to_float64((int32_t)rs1, &env->fp_status); +} + +uint64_t helper_fcvt_d_wu(CPURH850State *env, target_ulong rs1) +{ + return uint32_to_float64((uint32_t)rs1, &env->fp_status); +} + +#if defined(TARGET_RH85064) +uint64_t helper_fcvt_d_l(CPURH850State *env, uint64_t rs1) +{ + return int64_to_float64(rs1, &env->fp_status); +} + +uint64_t helper_fcvt_d_lu(CPURH850State *env, uint64_t rs1) +{ + return uint64_to_float64(rs1, &env->fp_status); +} +#endif + +target_ulong helper_fclass_d(uint64_t frs1) +{ + float64 f = frs1; + bool sign = float64_is_neg(f); + + if (float64_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float64_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float64_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float64_is_any_nan(f)) { + float_status s = { 0 }; /* for snan_bit_is_one */ + return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} diff --git a/qemu/target/rh850/fpu_translate.c b/qemu/target/rh850/fpu_translate.c new file mode 100644 index 0000000000..2fd008177b --- /dev/null +++ b/qemu/target/rh850/fpu_translate.c @@ -0,0 +1,1557 @@ +#include "fpu_translate.h" +#include "instmap.h" + +extern TCGv_i32 cpu_ZF; + +/* Helpers */ +void fpu_load_i64(TCGContext *tcg_ctx, TCGv_i64 dst, int reg_n); +void fpu_load_i64_2(TCGContext *tcg_ctx, TCGv_i64 dst0, TCGv_i64 dst1, int reg_n0, int reg_n1); +void fpu_store_i64(TCGContext *tcg_ctx, int reg_n, TCGv_i64 src); + +/* Single-precision */ +void fpu_gen_sp_ir_3(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs1, int rs2, int rs3); +void fpu_gen_sp_ir_2(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs2, int rs3); +void fpu_gen_cmpf_s(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int fcond, int fcbit); +void fpu_gen_cmov_s(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int rs3, int fcbit); +void fpu_gen_trfsr(CPURH850State *env, DisasContext *ctx, int fcbit); +void fpu_gen_cat1_ir(CPURH850State *env, DisasContext *ctx, int op, int frs1, int frs2, int frs3); + + +/* Double precision */ +void fpu_gen_cmpf_d(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int fcond, int fcbit); +void fpu_gen_cmov_d(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int rs3, int fcbit); +void fpu_gen_dp_ir_3(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs1, int rs2, int rs3); +void fpu_gen_dp_ir_2(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs2, int rs3); + + +/** + * Helpers for 64-bit register load/store + **/ + +void fpu_load_i64(TCGContext *tcg_ctx, TCGv_i64 dst, int reg_n) +{ + TCGv_i32 rl = tcg_temp_local_new_i32(tcg_ctx); + TCGv_i32 rh = tcg_temp_local_new_i32(tcg_ctx); + + /* Read float64 from (reg_n/reg_n+1). */ + gen_get_gpr(tcg_ctx, rl, reg_n); + gen_get_gpr(tcg_ctx, rh, reg_n+1); + tcg_gen_concat_i32_i64(tcg_ctx, dst, rl, rh); + + /* Free temporary variables. */ + tcg_temp_free_i32(tcg_ctx, rl); + tcg_temp_free_i32(tcg_ctx, rh); +} + +void fpu_store_i64(TCGContext *tcg_ctx, int reg_n, TCGv_i64 src) +{ + TCGv_i32 rl = tcg_temp_local_new_i32(tcg_ctx); + TCGv_i32 rh = tcg_temp_local_new_i32(tcg_ctx); + TCGv_i64 shift = tcg_temp_local_new_i64(tcg_ctx); + + tcg_gen_movi_i64(tcg_ctx, shift, 32); + tcg_gen_extrl_i64_i32(tcg_ctx, rl, src); + tcg_gen_shr_i64(tcg_ctx, src, src, shift); + tcg_gen_extrl_i64_i32(tcg_ctx, rh, src); + gen_set_gpr(tcg_ctx, reg_n, rl); + gen_set_gpr(tcg_ctx, reg_n + 1, rh); + + /* Free temporary variables. */ + tcg_temp_free_i32(tcg_ctx, rl); + tcg_temp_free_i32(tcg_ctx, rh); +} + +void fpu_load_i64_2(TCGContext *tcg_ctx, TCGv_i64 dst0, TCGv_i64 dst1, int reg_n0, int reg_n1) +{ + TCGv_i32 rl = tcg_temp_local_new_i32(tcg_ctx); + TCGv_i32 rh = tcg_temp_local_new_i32(tcg_ctx); + + /* Read float64 from (reg_n0/reg_n0 + 1). */ + gen_get_gpr(tcg_ctx, rl, reg_n0); + gen_get_gpr(tcg_ctx, rh, reg_n0 + 1); + tcg_gen_concat_i32_i64(tcg_ctx, dst0, rl, rh); + + /* Read float64 from (reg_n1/reg_n1 + 1). */ + gen_get_gpr(tcg_ctx, rl, reg_n1); + gen_get_gpr(tcg_ctx, rh, reg_n1 + 1); + tcg_gen_concat_i32_i64(tcg_ctx, dst1, rl, rh); + + /* Free temporary variables. */ + tcg_temp_free_i32(tcg_ctx, rl); + tcg_temp_free_i32(tcg_ctx, rh); +} + +/** + * Floating-point simple-precision IR generators. + **/ + +void fpu_gen_cat1_ir(CPURH850State *env, DisasContext *ctx, int op, int frs1, int frs2, int frs3) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv r1 = tcg_temp_local_new(tcg_ctx); + TCGv r2 = tcg_temp_local_new(tcg_ctx); + TCGv r3 = tcg_temp_local_new(tcg_ctx); + + /* Load register content from frs1, frs2 and frs3. */ + gen_get_gpr(tcg_ctx, r1, frs1); + gen_get_gpr(tcg_ctx, r2, frs2); + gen_get_gpr(tcg_ctx, r3, frs3); + + switch(op) + { + case OPC_RH850_FPU_FMAF_S: + gen_helper_fmaf_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2, r3); + break; + + case OPC_RH850_FPU_FMSF_S: + gen_helper_fmsf_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2, r3); + break; + + case OPC_RH850_FPU_FNMAF_S: + gen_helper_fnmaf_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2, r3); + break; + + case OPC_RH850_FPU_FNMSF_S: + gen_helper_fnmsf_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2, r3); + break; + + default: + /* Unknown instruction. */ + break; + } + + /* Store r3 register into frs3. */ + gen_set_gpr(tcg_ctx, frs3, r3); + + /* Free locals. */ + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, r3); +} + + +void fpu_gen_sp_ir_2(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs2, int rs3) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + /* rs1, rs2 and rs3 for TCG */ + TCGv r2 = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3 = tcg_temp_local_new_i32(tcg_ctx); + TCGv_i64 r3_64 = tcg_temp_local_new_i64(tcg_ctx); + + /* Load contents from registers. */ + switch(operands) + { + case FPU_TYPE_S: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_ABS: + gen_helper_fabs_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_NEG: + gen_helper_fneg_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_SQRT: + gen_helper_fsqrt_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_RECIP: + gen_helper_frecip_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_RSQRT: + gen_helper_frsqrt_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + } + + /* Store result. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_SL: + { + /* Load simple-precision float. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_TRNC: + gen_helper_ftrnc_sl(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CEIL: + gen_helper_fceil_sl(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_FLOOR: + gen_helper_ffloor_sl(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CVT: + gen_helper_fcvt_sl(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2); + break; + } + + /* Store result as long. */ + fpu_store_i64(tcg_ctx, rs3, r3_64); + } + break; + + case FPU_TYPE_SUL: + { + /* Load simple-precision float. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_TRNC: + gen_helper_ftrnc_sul(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CEIL: + gen_helper_fceil_sul(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_FLOOR: + gen_helper_ffloor_sul(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CVT: + gen_helper_fcvt_sul(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2); + break; + } + + /* Store result as long. */ + fpu_store_i64(tcg_ctx, rs3, r3_64); + } + break; + + + case FPU_TYPE_SW: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_TRNC: + gen_helper_ftrnc_sw(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CEIL: + gen_helper_fceil_sw(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_FLOOR: + gen_helper_ffloor_sw(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CVT: + gen_helper_fcvt_sw(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + } + + /* Store result. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_SUW: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_TRNC: + gen_helper_ftrnc_suw(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CEIL: + gen_helper_fceil_suw(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_FLOOR: + gen_helper_ffloor_suw(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CVT: + gen_helper_fcvt_suw(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + } + + /* Store result. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_LS: + { + /* Load content from register. */ + fpu_load_i64(tcg_ctx, r3_64, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_ls(tcg_ctx, r3, tcg_ctx->cpu_env, r3_64); + } + else + { + /* Unsupported operation. */ + } + + /* Store result into rs3. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_HS: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_hs(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + } + else + { + /* Unsupported operation. */ + } + + /* Store result into rs3. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_WS: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_ws(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + } + else + { + /* Unsupported operation. */ + } + + /* Store result into rs3. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + + case FPU_TYPE_SH: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_sh(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + } + else + { + /* Unsupported operation. */ + } + + /* Store result into rs3. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_ULS: + { + /* Load content from register. */ + fpu_load_i64(tcg_ctx, r3_64, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_uls(tcg_ctx, r3, tcg_ctx->cpu_env, r3_64); + } + else + { + /* Unsupported operation. */ + } + + /* Store result into rs3. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_UWS: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_uws(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + } + else + { + /* Unsupported operation. */ + } + + /* Store result into rs3. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + + } + + /* Mov softfloat flags into our register. */ + gen_helper_f_sync_fflags(tcg_ctx, tcg_ctx->cpu_env); + + /* Free temp. */ + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, r3); + tcg_temp_free_i64(tcg_ctx, r3_64); +} + +/** + * refactored + **/ + +void fpu_gen_sp_ir_3(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs1, int rs2, int rs3) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + /* rs1, rs2 and rs3 for TCG */ + TCGv r1 = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2 = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3 = tcg_temp_local_new_i32(tcg_ctx); + + /* Load contents from registers. */ + switch(operands) + { + case FPU_TYPE_S: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + } + break; + } + + /* Apply operation. */ + switch(op) + { + case FPU_OP_ADD: + gen_helper_fadd_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2); + break; + + case FPU_OP_DIV: + gen_helper_fdiv_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2, r1); + break; + + case FPU_OP_SUB: + gen_helper_fsub_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2, r1); + break; + + case FPU_OP_MAX: + gen_helper_fmax_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2); + break; + + case FPU_OP_MIN: + gen_helper_fmin_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2); + break; + + case FPU_OP_MUL: + gen_helper_fmul_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2); + break; + } + + /* Store result. */ + switch(operands) + { + case FPU_TYPE_S: + { + /* Set reg3. */ + gen_set_gpr(tcg_ctx, rs3, r3); + } + break; + } + + /* Mov softfloat flags into our register. */ + gen_helper_f_sync_fflags(tcg_ctx, tcg_ctx->cpu_env); + + /* Free temp. */ + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, r3); +} + + +void fpu_gen_trfsr(CPURH850State *env, DisasContext *ctx, int fcbit) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv fpsr = tcg_temp_local_new(tcg_ctx); + TCGv mask = tcg_temp_local_new(tcg_ctx); + TCGv shift = tcg_temp_local_new(tcg_ctx); + TCGv one = tcg_const_i32(tcg_ctx, 1); + TCGv value = tcg_temp_local_new(tcg_ctx); + + /* Load fpsr and compute mask. */ + gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, fpsr); + tcg_gen_movi_i32(tcg_ctx, shift, 24 + fcbit); + tcg_gen_shl_i32(tcg_ctx, mask, one, shift); + + /* Extract CCn bit. */ + tcg_gen_and_i32(tcg_ctx, value, fpsr, mask); + tcg_gen_shr_i32(tcg_ctx, value, value, shift); + + /* Set Z flag. */ + tcg_gen_mov_i32(tcg_ctx, cpu_ZF, value); + gen_set_gpr(tcg_ctx, 1, value); + + /* Free locals. */ + tcg_temp_free(tcg_ctx, fpsr); + tcg_temp_free(tcg_ctx, mask); + tcg_temp_free(tcg_ctx, shift); + tcg_temp_free(tcg_ctx, one); + tcg_temp_free(tcg_ctx, value); +} + +void fpu_gen_cmov_s(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int rs3, int fcbit) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGLabel *end, *otherwise; + TCGv r1 = tcg_temp_local_new(tcg_ctx); + TCGv r2 = tcg_temp_local_new(tcg_ctx); + TCGv final_shift = tcg_temp_local_new(tcg_ctx); + TCGv res = tcg_temp_local_new(tcg_ctx); + TCGv fpsr = tcg_temp_local_new(tcg_ctx); + + end = gen_new_label(tcg_ctx); + otherwise = gen_new_label(tcg_ctx); + + + /* Load register contents. */ + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + + /* Check if FPSR.CCn is set (with n=fcbit). */ + gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, fpsr); + tcg_gen_movi_i32(tcg_ctx, res, 1); + tcg_gen_movi_i32(tcg_ctx, final_shift, 24 + fcbit); + tcg_gen_shl_i32(tcg_ctx, res, res, final_shift); + tcg_gen_and_i32(tcg_ctx, res, fpsr, res); + + /* If not set, r2 -> r3. */ + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, res, 0, otherwise); + + /* If set, do the move ! */ + gen_set_gpr(tcg_ctx, rs3, r1); + + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, otherwise); + + gen_set_gpr(tcg_ctx, rs3, r2); + + /* End. */ + gen_set_label(tcg_ctx, end); + + /* Free variables. */ + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, final_shift); + tcg_temp_free(tcg_ctx, res); + tcg_temp_free(tcg_ctx, fpsr); +} + +void fpu_gen_cmpf_s(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int fcond, int fcbit) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGLabel *handle_nan; + TCGLabel *end; + + end = gen_new_label(tcg_ctx); + handle_nan = gen_new_label(tcg_ctx); + + TCGv r1 = tcg_temp_local_new(tcg_ctx); + TCGv r2 = tcg_temp_local_new(tcg_ctx); + TCGv nan1 = tcg_temp_local_new(tcg_ctx); + TCGv nan2 = tcg_temp_local_new(tcg_ctx); + TCGv less = tcg_temp_local_new(tcg_ctx); + TCGv equal = tcg_temp_local_new(tcg_ctx); + TCGv unordered = tcg_temp_local_new(tcg_ctx); + TCGv res = tcg_temp_local_new(tcg_ctx); + TCGv final_shift = tcg_temp_local_new(tcg_ctx); + TCGv one = tcg_temp_local_new(tcg_ctx); + TCGv mask = tcg_temp_local_new(tcg_ctx); + + tcg_gen_movi_i32(tcg_ctx, one, 1); + + /* Load rs1 and rs2 registers. */ + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + + /* If r1 or r2 is a Nan, then error. */ + gen_helper_f_is_nan_s(tcg_ctx, nan1, tcg_ctx->cpu_env, r1); + gen_helper_f_is_nan_s(tcg_ctx, nan2, tcg_ctx->cpu_env, r2); + tcg_gen_brcond_i32(tcg_ctx, TCG_COND_EQ, nan1, one, handle_nan); + tcg_gen_brcond_i32(tcg_ctx, TCG_COND_EQ, nan2, one, handle_nan); + + gen_helper_flt_s(tcg_ctx, less, tcg_ctx->cpu_env, r2, r1); + gen_helper_feq_s(tcg_ctx, equal, tcg_ctx->cpu_env, r2, r1); + tcg_gen_movi_i32(tcg_ctx, unordered, 0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, handle_nan); + + tcg_gen_movi_i32(tcg_ctx, less, 0); + tcg_gen_movi_i32(tcg_ctx, equal, 0); + tcg_gen_movi_i32(tcg_ctx, unordered, 1); + if (fcond & 0x8) + { + /* Invalid operation detected. */ + /* TODO: raise exception ? */ + } + + /* This is the end =) */ + gen_set_label(tcg_ctx, end); + + /* Compute logical result. */ + tcg_gen_movi_i32(tcg_ctx, res, 0); + if (fcond & 1) + tcg_gen_or_i32(tcg_ctx, res, res, unordered); + if (fcond & 2) + tcg_gen_or_i32(tcg_ctx, res, res, equal); + if (fcond & 4) + tcg_gen_or_i32(tcg_ctx, res, res, less); + + /** + * Set CCn bit into FPSR (with n=fcbit). + * 1. Load FPSR into r1 + * 2. AND r1 with NOT bitmask for CCn + * 3. OR bitmask if res == 1 + * 4. Store r1 into FPSR + **/ + gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, r1); + tcg_gen_movi_i32(tcg_ctx, final_shift, 24 + fcbit); + tcg_gen_shl_i32(tcg_ctx, mask, one, final_shift); + tcg_gen_andc_tl(tcg_ctx, r1, r1, mask); + tcg_gen_shl_i32(tcg_ctx, res, res, final_shift); + tcg_gen_or_i32(tcg_ctx, r1, r1, res); + gen_set_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, r1); + + /* Free variables. */ + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, nan1); + tcg_temp_free(tcg_ctx, nan2); + tcg_temp_free(tcg_ctx, less); + tcg_temp_free(tcg_ctx, equal); + tcg_temp_free(tcg_ctx, unordered); + tcg_temp_free(tcg_ctx, final_shift); + tcg_temp_free(tcg_ctx, one); + tcg_temp_free(tcg_ctx, res); +} + + +/** + * Floating-point double-precision IR generators. + **/ + +void fpu_gen_dp_ir_2(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs2, int rs3) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + /* rs1, rs2 and rs3 for TCG */ + TCGv_i64 r2 = tcg_temp_local_new_i64(tcg_ctx); + TCGv_i64 r3 = tcg_temp_local_new_i64(tcg_ctx); + TCGv r3_32 = tcg_temp_local_new_i32(tcg_ctx); + + /* Load contents from registers. */ + switch(operands) + { + case FPU_TYPE_D: + { + /* Extract value from register rs2. */ + fpu_load_i64(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_ABS: + gen_helper_fabs_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_NEG: + gen_helper_fneg_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_SQRT: + gen_helper_fsqrt_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_RECIP: + gen_helper_frecip_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_RSQRT: + gen_helper_frsqrt_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + } + + /* Store result. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_DL: + { + /* Extract value from register rs2. */ + fpu_load_i64(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_TRNC: + gen_helper_ftrnc_dl(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CEIL: + gen_helper_fceil_dl(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_FLOOR: + gen_helper_ffloor_dl(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CVT: + gen_helper_fcvt_dl(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + } + + /* Store result. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_DUL: + { + /* Extract value from register rs2. */ + fpu_load_i64(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_TRNC: + gen_helper_ftrnc_dul(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CEIL: + gen_helper_fceil_dul(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_FLOOR: + gen_helper_ffloor_dul(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CVT: + gen_helper_fcvt_dul(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + break; + + } + + /* Store result. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + + + case FPU_TYPE_DW: + { + /* Extract value from register rs2. */ + fpu_load_i64(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_TRNC: + gen_helper_ftrnc_dw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CEIL: + gen_helper_fceil_dw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_FLOOR: + gen_helper_ffloor_dw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CVT: + gen_helper_fcvt_dw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2); + break; + + } + + /* Store result. */ + gen_set_gpr(tcg_ctx, rs3, r3_32); + } + break; + + case FPU_TYPE_DUW: + { + /* Extract value from register rs2. */ + fpu_load_i64(tcg_ctx, r2, rs2); + + /* Apply operation. */ + switch(op) + { + case FPU_OP_TRNC: + gen_helper_ftrnc_duw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CEIL: + gen_helper_fceil_duw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_FLOOR: + gen_helper_ffloor_duw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2); + break; + + case FPU_OP_CVT: + gen_helper_fcvt_duw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2); + break; + + } + + /* Store result. */ + gen_set_gpr(tcg_ctx, rs3, r3_32); + } + break; + + + case FPU_TYPE_LD: + { + /* Load content from register. */ + fpu_load_i64(tcg_ctx, r2, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_ld(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + } + else + { + /* Unsupported operation. */ + } + + /* Store result. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + + + case FPU_TYPE_WD: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r3_32, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_wd(tcg_ctx, r3, tcg_ctx->cpu_env, r3_32); + } + else + { + /* Unsupported operation. */ + } + + /* Store result. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + + + case FPU_TYPE_SD: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r3_32, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_sd(tcg_ctx, r3, tcg_ctx->cpu_env, r3_32); + } + else + { + /* Unsupported operation. */ + } + + /* Store result. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_UWD: + { + /* Extract value of reg1 and reg2. */ + gen_get_gpr(tcg_ctx, r3_32, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_uwd(tcg_ctx, r3, tcg_ctx->cpu_env, r3_32); + } + else + { + /* Unsupported operation. */ + } + + /* Store result. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + + case FPU_TYPE_ULD: + { + /* Load content from register. */ + fpu_load_i64(tcg_ctx, r2, rs2); + + /* Apply operation. */ + if (op == FPU_OP_CVT) + { + gen_helper_fcvt_uld(tcg_ctx, r3, tcg_ctx->cpu_env, r2); + } + else + { + /* Unsupported operation. */ + } + + /* Store result. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + + } + + /* Mov softfloat flags into our register. */ + gen_helper_f_sync_fflags(tcg_ctx, tcg_ctx->cpu_env); + + /* Free temp. */ + tcg_temp_free_i64(tcg_ctx, r2); + tcg_temp_free_i64(tcg_ctx, r3); + tcg_temp_free_i32(tcg_ctx, r3_32); +} + + +void fpu_gen_dp_ir_3(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs1, int rs2, int rs3) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + /* rs1, rs2 and rs3 for TCG */ + TCGv_i64 r1 = tcg_temp_local_new_i64(tcg_ctx); + TCGv_i64 r2 = tcg_temp_local_new_i64(tcg_ctx); + TCGv_i64 r3 = tcg_temp_local_new_i64(tcg_ctx); + + /* Load contents from registers. */ + switch(operands) + { + case FPU_TYPE_D: + { + /* Load float64 values from regpairs designed by rs1 and rs2. */ + fpu_load_i64_2(tcg_ctx, r1, r2, rs1, rs2); + } + break; + } + + switch(op) + { + case FPU_OP_ADD: + gen_helper_fadd_d(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2); + break; + + case FPU_OP_DIV: + gen_helper_fdiv_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2, r1); + break; + + case FPU_OP_SUB: + gen_helper_fsub_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2, r1); + break; + + case FPU_OP_MAX: + gen_helper_fmax_d(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2); + break; + + case FPU_OP_MIN: + gen_helper_fmin_d(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2); + break; + + case FPU_OP_MUL: + gen_helper_fmul_d(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2); + break; + } + + switch(operands) + { + case FPU_TYPE_D: + { + /* Store result as float64 in regpair designed by rs3. */ + fpu_store_i64(tcg_ctx, rs3, r3); + } + break; + } + + /* Mov softfloat flags into our register. */ + gen_helper_f_sync_fflags(tcg_ctx, tcg_ctx->cpu_env); + + /* Free temp. */ + tcg_temp_free_i64(tcg_ctx, r1); + tcg_temp_free_i64(tcg_ctx, r2); + tcg_temp_free_i64(tcg_ctx, r3); +} + + +void fpu_gen_cmpf_d(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int fcond, int fcbit) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGLabel *handle_nan; + TCGLabel *end; + + end = gen_new_label(tcg_ctx); + handle_nan = gen_new_label(tcg_ctx); + + TCGv_i64 r1 = tcg_temp_local_new_i64(tcg_ctx); + TCGv_i64 r2 = tcg_temp_local_new_i64(tcg_ctx); + TCGv nan1 = tcg_temp_local_new(tcg_ctx); + TCGv nan2 = tcg_temp_local_new(tcg_ctx); + TCGv less = tcg_temp_local_new(tcg_ctx); + TCGv equal = tcg_temp_local_new(tcg_ctx); + TCGv unordered = tcg_temp_local_new(tcg_ctx); + TCGv res = tcg_temp_local_new(tcg_ctx); + TCGv final_shift = tcg_temp_local_new(tcg_ctx); + TCGv one = tcg_temp_local_new(tcg_ctx); + TCGv mask = tcg_temp_local_new(tcg_ctx); + + tcg_gen_movi_i32(tcg_ctx, one, 1); + + /* Load rs1 and rs2 registers. */ + fpu_load_i64(tcg_ctx, r1, rs1); + fpu_load_i64(tcg_ctx, r2, rs2); + + /* If r1 or r2 is a Nan, then error. */ + gen_helper_f_is_nan_d(tcg_ctx, nan1, tcg_ctx->cpu_env, r1); + gen_helper_f_is_nan_d(tcg_ctx, nan2, tcg_ctx->cpu_env, r2); + tcg_gen_or_i32(tcg_ctx, nan1, nan1, nan2); + tcg_gen_brcond_i32(tcg_ctx, TCG_COND_EQ, nan1, one, handle_nan); + tcg_gen_brcond_i32(tcg_ctx, TCG_COND_EQ, nan2, one, handle_nan); + + gen_helper_flt_d(tcg_ctx, less, tcg_ctx->cpu_env, r2, r1); + gen_helper_feq_d(tcg_ctx, equal, tcg_ctx->cpu_env, r2, r1); + tcg_gen_movi_i32(tcg_ctx, unordered, 0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, handle_nan); + + tcg_gen_movi_i32(tcg_ctx, less, 0); + tcg_gen_movi_i32(tcg_ctx, equal, 0); + tcg_gen_movi_i32(tcg_ctx, unordered, 1); + if (fcond & 0x8) + { + /* Invalid operation detected. */ + /* TODO: raise exception ? */ + } + + /* This is the end =) */ + gen_set_label(tcg_ctx, end); + + /* Set FPSR.CCn */ + tcg_gen_movi_i32(tcg_ctx, res, 0); + if (fcond & 1) + tcg_gen_or_i32(tcg_ctx, res, res, unordered); + if (fcond & 2) + tcg_gen_or_i32(tcg_ctx, res, res, equal); + if (fcond & 4) + tcg_gen_or_i32(tcg_ctx, res, res, less); + + /** + * Set CCn bit into FPSR (with n=fcbit). + * 1. Load FPSR into r1 + * 2. AND r1 with NOT bitmask for CCn + * 3. OR bitmask if res == 1 + * 4. Store r1 into FPSR + **/ + gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, nan1); + tcg_gen_movi_i32(tcg_ctx, final_shift, 24 + fcbit); + tcg_gen_shl_i32(tcg_ctx, mask, one, final_shift); + tcg_gen_andc_tl(tcg_ctx, nan1, nan1, mask); + tcg_gen_shl_i32(tcg_ctx, res, res, final_shift); + tcg_gen_or_i32(tcg_ctx, nan1, nan1, res); + gen_set_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, nan1); + + /* Free variables. */ + tcg_temp_free_i64(tcg_ctx, r1); + tcg_temp_free_i64(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, nan1); + tcg_temp_free(tcg_ctx, nan2); + tcg_temp_free(tcg_ctx, less); + tcg_temp_free(tcg_ctx, equal); + tcg_temp_free(tcg_ctx, unordered); + tcg_temp_free(tcg_ctx, final_shift); + tcg_temp_free(tcg_ctx, one); + tcg_temp_free(tcg_ctx, mask); +} + +void fpu_gen_cmov_d(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int rs3, int fcbit) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGLabel *end, *otherwise; + TCGv_i64 r1 = tcg_temp_local_new_i64(tcg_ctx); + TCGv_i64 r2 = tcg_temp_local_new_i64(tcg_ctx); + TCGv final_shift = tcg_temp_local_new(tcg_ctx); + TCGv res = tcg_temp_local_new(tcg_ctx); + TCGv fpsr = tcg_temp_local_new(tcg_ctx); + + end = gen_new_label(tcg_ctx); + otherwise = gen_new_label(tcg_ctx); + + + /* Load register contents. */ + fpu_load_i64(tcg_ctx, r1, rs1); + fpu_load_i64(tcg_ctx, r2, rs2); + + /* Check if FPSR.CCn is set (with n=fcbit). */ + gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, fpsr); + tcg_gen_movi_i32(tcg_ctx, res, 1); + tcg_gen_movi_i32(tcg_ctx, final_shift, 24 + fcbit); + tcg_gen_shl_i32(tcg_ctx, res, res, final_shift); + tcg_gen_and_i32(tcg_ctx, res, fpsr, res); + + /* If not set, r2 -> r3. */ + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, res, 0, otherwise); + + /* If set, do the move ! */ + fpu_store_i64(tcg_ctx, rs3, r1); + + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, otherwise); + + fpu_store_i64(tcg_ctx, rs3, r2); + + /* End. */ + gen_set_label(tcg_ctx, end); + + /* Free variables. */ + tcg_temp_free_i64(tcg_ctx, r1); + tcg_temp_free_i64(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, final_shift); + tcg_temp_free(tcg_ctx, res); + tcg_temp_free(tcg_ctx, fpsr); +} + + +/** + * Instruction decoding and IR generation. + **/ + +void fpu_decode_cat0_instn(CPURH850State *env, DisasContext *ctx) +{ + int rs1 = GET_RS1(ctx->opcode); + int rs2 = GET_RS2(ctx->opcode); + int rs3 = GET_RS3(ctx->opcode); + + switch(MASK_OP_FORMAT_FI(ctx->opcode)) + { + case OPC_RH850_FPU_GROUP_SW: + switch(rs1) + { + case OPC_RH850_FPU_TRNCF_SW: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_TRNC, rs2, rs3); + break; + + case OPC_RH850_FPU_CEILF_SW: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_CEIL, rs2, rs3); + break; + + case OPC_RH850_FPU_FLOORF_SW: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_FLOOR, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_SW: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_TRNCF_SUW: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUW, FPU_OP_TRNC, rs2, rs3); + break; + + case OPC_RH850_FPU_CEILF_SUW: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUW, FPU_OP_CEIL, rs2, rs3); + break; + + case OPC_RH850_FPU_FLOORF_SUW: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_FLOOR, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_SUW: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUW, FPU_OP_CVT, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_DS: + switch(rs1) + { + case OPC_RH850_FPU_CVTF_WS: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_WS, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_LS: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_LS, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_HS: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_HS, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_SH: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SH, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_UWS: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_UWS, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_ULS: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_ULS, FPU_OP_CVT, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_SL: + switch(rs1) + { + case OPC_RH850_FPU_TRNCF_SL: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SL, FPU_OP_TRNC, rs2, rs3); + break; + + case OPC_RH850_FPU_CEILF_SL: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SL, FPU_OP_CEIL, rs2, rs3); + break; + + case OPC_RH850_FPU_FLOORF_SL: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SL, FPU_OP_FLOOR, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_SL: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SL, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_TRNCF_SUL: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUL, FPU_OP_TRNC, rs2, rs3); + break; + + case OPC_RH850_FPU_CEILF_SUL: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUL, FPU_OP_CEIL, rs2, rs3); + break; + + case OPC_RH850_FPU_FLOORF_SUL: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUL, FPU_OP_FLOOR, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_SUL: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUL, FPU_OP_CVT, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_ABSS: + switch(rs1) + { + case OPC_RH850_FPU_ABSF_S: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_ABS, rs2, rs3); + break; + + case OPC_RH850_FPU_NEGF_S: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_NEG, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_S: + switch(rs1) + { + case OPC_RH850_FPU_SQRTF_S: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_SQRT, rs2, rs3); + break; + + case OPC_RH850_FPU_RECIPF_S: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_RECIP, rs2, rs3); + break; + + case OPC_RH850_FPU_RSQRTF_S: + fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_RSQRT, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_DW: + switch(rs1) + { + case OPC_RH850_FPU_TRNCF_DW: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DW, FPU_OP_TRNC, rs2, rs3); + break; + + case OPC_RH850_FPU_CEILF_DW: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DW, FPU_OP_CEIL, rs2, rs3); + break; + + case OPC_RH850_FPU_FLOORF_DW: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DW, FPU_OP_FLOOR, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_DW: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DW, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_TRNCF_DUW: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUW, FPU_OP_TRNC, rs2, rs3); + break; + + case OPC_RH850_FPU_CEILF_DUW: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUW, FPU_OP_CEIL, rs2, rs3); + break; + + case OPC_RH850_FPU_FLOORF_DUW: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUW, FPU_OP_FLOOR, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_DUW: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUW, FPU_OP_CVT, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_DD: + switch(rs1) + { + case OPC_RH850_FPU_CVTF_WD: + //fpu_gen_cvtf_wd(env, ctx, rs2, rs3); + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_WD, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_LD: + //fpu_gen_cvtf_ld(env, ctx, rs2, rs3); + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_LD, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_SD: + //fpu_gen_cvtf_sd(env, ctx, rs2, rs3); + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_SD, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_UWD: + //fpu_gen_cvtf_uwd(env, ctx, rs2, rs3); + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_UWD, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_ULD: + //fpu_gen_cvtf_uld(env, ctx, rs2, rs3); + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_ULD, FPU_OP_CVT, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_DL: + switch(rs1) + { + case OPC_RH850_FPU_TRNCF_DL: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DL, FPU_OP_TRNC, rs2, rs3); + break; + + case OPC_RH850_FPU_CEILF_DL: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DL, FPU_OP_CEIL, rs2, rs3); + break; + + case OPC_RH850_FPU_FLOORF_DL: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DL, FPU_OP_FLOOR, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_DL: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DL, FPU_OP_CVT, rs2, rs3); + break; + + case OPC_RH850_FPU_TRNCF_DUL: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUL, FPU_OP_TRNC, rs2, rs3); + break; + + case OPC_RH850_FPU_CEILF_DUL: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUL, FPU_OP_CEIL, rs2, rs3); + break; + + case OPC_RH850_FPU_FLOORF_DUL: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUL, FPU_OP_FLOOR, rs2, rs3); + break; + + case OPC_RH850_FPU_CVTF_DUL: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUL, FPU_OP_CVT, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_ABSD: + switch(rs1) + { + case OPC_RH850_FPU_ABSF_D: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_ABS, rs2, rs3); + break; + + case OPC_RH850_FPU_NEGF_D: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_NEG, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_GROUP_D: + switch(rs1) + { + case OPC_RH850_FPU_SQRTF_D: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_SQRT, rs2, rs3); + break; + + case OPC_RH850_FPU_RECIPF_D: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_RECIP, rs2, rs3); + break; + + case OPC_RH850_FPU_RSQRTF_D: + fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_RSQRT, rs2, rs3); + break; + } + break; + + case OPC_RH850_FPU_ADDF_S: + fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_ADD, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_ADDF_D: + /* rs1, rs2 and rs3 must have bit 0 set to 0. */ + if ((rs1 & 1) || (rs2 & 1) || (rs3 & 1)) + { + /* TODO: Invalid instruction, must trigger exception. */ + } + else + fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_ADD, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_SUBF_S: + fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_SUB, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_SUBF_D: + fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_SUB, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_MULF_S: + fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_MUL, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_MULF_D: + fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_MUL, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_MAXF_S: + fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_MAX, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_MAXF_D: + fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_MAX, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_MINF_S: + fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_MIN, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_MINF_D: + fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_MIN, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_DIVF_S: + fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_DIV, rs1, rs2, rs3); + break; + + case OPC_RH850_FPU_DIVF_D: + fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_DIV, rs1, rs2, rs3); + break; + + + default: + switch(ctx->opcode & (0x70 << 16)) + { + case OPC_RH850_FPU_CMOV_S_OR_TRFSR: + + /* If reg1==reg2==reg3==0, then it is a TRSFR instruction. */ + if ((rs1 == 0) && (rs2 == 0) && (rs3 == 0)) + { + fpu_gen_trfsr(env, ctx, (ctx->opcode & (0xe << 16))>>17 ); + } + else + { + /* Call generator with fcbit. */ + fpu_gen_cmov_s(env, ctx, rs1, rs2, rs3, (ctx->opcode & (0xe << 16))>>17 ); + } + break; + + case OPC_RH850_FPU_CMOV_D: + /* Call generator with fcbit. */ + fpu_gen_cmov_d(env, ctx, rs1, rs2, rs3, (ctx->opcode & (0xe << 16))>>17 ); + break; + + case OPC_RH850_FPU_CMP_S: + /* Call generator with fcond (rs3) and fcbit. */ + fpu_gen_cmpf_s(env, ctx, rs1, rs2, rs3, (ctx->opcode & (0xe << 16))>>17 ); + break; + + case OPC_RH850_FPU_CMP_D: + /* Call generator with fcond (rs3) and fcbit. */ + fpu_gen_cmpf_d(env, ctx, rs1, rs2, rs3, (ctx->opcode & (0xe << 16))>>17 ); + break; + + default: + /* Unknown inst. */ + break; + } + break; + } +} + +void fpu_decode_cat1_instn(CPURH850State *env, DisasContext *ctx) +{ + int rs1 = GET_RS1(ctx->opcode); + int rs2 = GET_RS2(ctx->opcode); + int rs3 = GET_RS3(ctx->opcode); + + fpu_gen_cat1_ir(env, ctx, MASK_OP_FORMAT_FI(ctx->opcode), rs1, rs2, rs3); +} + +/** + * Initialize FPU. + **/ + +void rh850_fpu_translate_init(void) +{ +} \ No newline at end of file diff --git a/qemu/target/rh850/fpu_translate.h b/qemu/target/rh850/fpu_translate.h new file mode 100644 index 0000000000..b21af6759f --- /dev/null +++ b/qemu/target/rh850/fpu_translate.h @@ -0,0 +1,41 @@ +/* + * QEMU RH850 CPU + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017-2018 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef RH850_FPU_H +#define RH850_FPU_H + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "tcg/tcg-op.h" +#include "translate.h" +#include "fpu_translate.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" +#include "exec/translator.h" +#include "translate.h" + +void fpu_decode_cat0_instn(CPURH850State *env, DisasContext *ctx); +void fpu_decode_cat1_instn(CPURH850State *env, DisasContext *ctx); +void fpu_init(CPURH850State *env); +void rh850_fpu_translate_init(void); + +#endif /* RH850_FPU_H */ \ No newline at end of file diff --git a/qemu/target/rh850/gdbstub.c b/qemu/target/rh850/gdbstub.c new file mode 100644 index 0000000000..2abc97fb64 --- /dev/null +++ b/qemu/target/rh850/gdbstub.c @@ -0,0 +1,169 @@ +/* + * RH850 GDB Server Stub + * + * Copyright (c) 2019-2020 Marko Klopcic, iSYSTEM Labs + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "exec/gdbstub.h" +#include "cpu.h" + +/* Mapping of winIDEA register index to env->sysBasicRegs() index. (see mail + * from Matic 2019-05-06 and isystem/doc/v850-tdep.c) + QEMU idx wI idx + 32, // eipc 0 + 33, // eipsw 1 + 34, // fepc 2 + 35, // fepsw 3 + 37, // psw 4 + 128, // fpsr 5 + 129, // fpepc 6 + 130, // fpst 7 + 131, // fpcc 8 + 132, // fpcfg 9 + 133, // fpec 10 + 44, // SESR N/A + 45, // EIIC 11 + 46, // FEIC 12 + 48, // CTPC 13 + 49, // CTPSW 14 + 52, // CTBP 15 + 60, // EIWR 16 + 61, // FEWR 17 + 63, // BSEL 18 + 150, // mcfg0 19 + 152, // RBASE 20 + 153, // EBASE 21 + 154, // intbp 22 + 155, // mctl 23 + 156, // pid 24 + 161, // sccfg 25 + 162, // scbp 26 + 182, // htcfg0 27 + 188, // mea 28 + 189, // asid 29 + 190 // mei 30 +*/ +#define BANK_MASK 0xf0000 +#define BANK_SHIFT 16 +#define SRI(selID, regID) (((selID) << BANK_SHIFT) | (regID)) +#define SRI0(regID) (regID) +#define SRI1(regID) SRI(1, (regID)) +#define SRI2(regID) SRI(2, (regID)) + +typedef int IdxType; +const IdxType winIdeaRegIdx2qemuSysRegIdx[] = { +// 0 1 2 3 4 5 6 7 8 9 +// --------------------------------------------- +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0 +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1 +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2 + +-1, -1, SRI0(EIPC_IDX), SRI0(EIPSW_IDX),SRI0(FEPC_IDX),SRI0(FEPSW_IDX),-1, SRI0(PSW_IDX), -1, -1, // 3 +-1, -1, -1, -1, -1, SRI0(EIIC_IDX),SRI0(FEIC_IDX),-1,SRI0(CTPC_IDX),SRI0(CTPSW_IDX), // 4 +-1, -1, SRI0(CTBP_IDX), -1, -1, -1, -1, -1, -1, -1, // 5 + +SRI0(EIWR_IDX),SRI0(FEWR_IDX),-1,SRI0(BSEL_IDX), -1, -1, -1, -1, -1, -1, // 6 +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7 +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8 + +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9 +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10 +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 11 + +-1, -1, -1, -1, -1, -1, -1, -1, SRI0(FPSR_IDX), SRI0(FEPC_IDX), // 12 +SRI0(FPST_IDX),SRI0(FPCC_IDX),SRI0(FPCFG_IDX),SRI0(FPEC_IDX), -1,-1, -1, -1, -1, -1, // 13 +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 14 + +SRI1(MCFG0_IDX1),-1,SRI1(RBASE_IDX1),SRI1(EBASE_IDX1),SRI1(INTBP_IDX1),SRI1(MCTL_IDX1),SRI1(PID_IDX1),-1,-1, -1, // 15 +-1, SRI1(SCCFG_IDX1), SRI1(SCBP_IDX1), -1, -1, -1, -1, -1, -1, -1, // 16 +-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 17 + +-1, -1,SRI2(HTCFG0_IDX2), -1, -1, -1, -1, -1,SRI2(MEA_IDX2),SRI2(ASID_IDX2), // 18 +SRI2(MEI_IDX2), -1, -1, -1, -1, -1, -1, -1, -1, -1, // 19 +}; + +const int NUM_GDB_REGS = sizeof(winIdeaRegIdx2qemuSysRegIdx) / sizeof(IdxType); + +int rh850_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + + if (n < 32) { + return gdb_get_regl(mem_buf, env->gpRegs[n]); //gpr is now supposed to be progRegs + } else if (n == 64) { + return gdb_get_regl(mem_buf, env->pc); + } else if (n < NUM_GDB_REGS) { + int sysRegIdx = winIdeaRegIdx2qemuSysRegIdx[n]; + if (sysRegIdx >= 0) { + int selID = sysRegIdx >> BANK_SHIFT; + int regID = sysRegIdx & ~BANK_MASK; + if (selID == BANK_ID_BASIC_0 && regID == PSW_IDX) { + int psw = env->Z_flag | (env->S_flag << 1) | (env->OV_flag << 2) | (env->CY_flag << 3); + psw |= (env->SAT_flag << 4) | (env->ID_flag << 5) | (env->EP_flag << 6); + psw |= (env->NP_flag << 7) | (env->EBV_flag << 15) | (env->CU0_flag << 16); + psw |= (env->CU1_flag << 17) | (env->CU2_flag << 18) | (env->UM_flag << 30); + return gdb_get_regl(mem_buf, psw); + } else { + return gdb_get_regl(mem_buf, env->systemRegs[selID][regID]); // eipc, eipsw, fepc, fepsw, psw, ... + } + } + } + + *((uint32_t *)mem_buf) = 0xBAD0BAD0; + return 4; // registers in slots not set above are ignored +} + +int rh850_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + // at the moment our GDB server has different indices for writing single register + // will fix this if batch write will have to be supported or interfacing + // to other GDB servers for RH850 will be needed. + if (n > 0 && n < 32) { // skip R0, because it is always 0 + env->gpRegs[n] = ldtul_p(mem_buf); + } else if (n == 64) { + env->pc = ldtul_p(mem_buf); + } else if (n < NUM_GDB_REGS) { + int sysRegIdx = winIdeaRegIdx2qemuSysRegIdx[n]; + if (sysRegIdx >= 0) { + int selID = sysRegIdx >> BANK_SHIFT; + int regID = sysRegIdx & ~BANK_MASK; + if (selID == BANK_ID_BASIC_0 && regID == PSW_IDX) { + int psw = ldtul_p(mem_buf); + env->Z_flag = psw & 1; + env->S_flag = (psw >> 1) & 1; + env->OV_flag = (psw >> 2) & 1; + env->CY_flag = (psw >> 3) & 1; + env->SAT_flag = (psw >> 4) & 1; + env->ID_flag = (psw >> 5) & 1; + env->EP_flag = (psw >> 6) & 1; + env->NP_flag = (psw >> 7) & 1; + env->EBV_flag = (psw >> 15) & 1; + env->CU0_flag = (psw >> 16) & 1; + env->CU1_flag = (psw >> 17) & 1; + env->CU2_flag = (psw >> 18) & 1; + env->UM_flag = (psw >> 30) & 1; + } else { + env->systemRegs[selID][regID] = ldtul_p(mem_buf); // eipc, eipsw, fepc, fepsw, psw, ... + } + } + } + + return sizeof(target_ulong); +} diff --git a/qemu/target/rh850/helper.c b/qemu/target/rh850/helper.c new file mode 100644 index 0000000000..ee171f0dbb --- /dev/null +++ b/qemu/target/rh850/helper.c @@ -0,0 +1,539 @@ +/* + * RH850 emulation helpers for qemu. + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017-2018 SiFive, Inc. + * Copyright (c) 2018-2019 iSYSTEM Labs d.o.o. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "cpu.h" +#include "exec/exec-all.h" + +#define RH850_DEBUG_INTERRUPT 0 + +int rh850_cpu_mmu_index(CPURH850State *env, bool ifetch) +{ + return 0; +} + +#ifndef CONFIG_USER_ONLY +/* + * Return RH850 IRQ number if an interrupt should be taken, else -1. + * Used in cpu-exec.c + * + * Adapted from Spike's processor_t::take_interrupt() + */ + +#if 0 /* Not used */ +static int rh850_cpu_hw_interrupts_pending(CPURH850State *env) +{ + + return EXCP_NONE; +} +#endif +#endif + +uint32_t psw2int(CPURH850State * env); +uint32_t mem_deref_4(CPUState * cs, uint32_t addr); + + +uint32_t psw2int(CPURH850State * env) +{ + uint32_t ret = 0; + ret |= env->UM_flag<<30; + ret |= env->CU0_flag<<16; + ret |= env->CU1_flag<<17; + ret |= env->CU2_flag<<18; + ret |= env->EBV_flag<<15; + ret |= env->NP_flag<<7; + ret |= env->EP_flag<<6; + ret |= env->ID_flag<<5; + ret |= env->SAT_flag<<4; + ret |= env->CY_flag<<3; + ret |= env->OV_flag<<2; + ret |= env->S_flag<<1; + ret |= env->Z_flag; + + return ret; +} + +/* + * RH850 interrupt handler. + **/ + +bool rh850_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ +#if !defined(CONFIG_USER_ONLY) + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + + //qemu_log("[cpu] exec_interrupt: got interrupt_req=%08x\n", interrupt_request); + + /* Handle FENMI interrupt. */ + if (interrupt_request == RH850_INT_FENMI) + { + /* Set exception info. */ + cs->exception_index = RH850_EXCP_FENMI; + env->exception_cause = 0xE0; + env->exception_priority = 1; + + /* Acknowledge interrupt. */ + rh850_cpu_do_interrupt(cs); + } + else if (interrupt_request == RH850_INT_FEINT) + { + if (!(env->systemRegs[BANK_ID_BASIC_2][PMR_IDX2] & (1<exception_priority))) + { + /* Set exception info. */ + cs->exception_index = RH850_EXCP_FEINT; + env->exception_cause = 0xF0; + env->exception_priority = 3; + + /* Acknowledge interrupt. */ + rh850_cpu_do_interrupt(cs); + } + } + else if (interrupt_request == RH850_EXCP_EIINT) + { + //qemu_log("exec_interrupt got RH850_EXCP_EIINT\n"); + + /* Get interrupt request number. */ + //int intn = env->exception_cause & 0xfff; + int priority = 4; + + //qemu_log("[cpu] exec_interrupt: got interrupt_req=%08x\n", interrupt_request); + + /* Check if interrupt priority is not masked (through PMR). */ + if (!(env->systemRegs[BANK_ID_BASIC_2][PMR_IDX2] & (1<exception_index = RH850_EXCP_EIINT; + //env->exception_cause = 0x1000 | (intn); + //env->exception_dv = !(interrupt_request & RH850_INT_TAB_REF); + env->exception_priority = priority; + + /* Acknowledge interrupt. */ + rh850_cpu_do_interrupt(cs); + } + else + { + //qemu_log("[cpu] interrupt priority is masked\n"); + } + } +#endif + + /* Interrupt request has been processed. */ + cs->interrupt_request = 0; + return false; +} + +#if !defined(CONFIG_USER_ONLY) + + +static int get_physical_address(CPURH850State *env, hwaddr *physical, + int *prot, target_ulong addr, + int access_type, int mmu_idx) +{ + + /* + * There is no memory virtualization in RH850 (at least for the targeted SoC) + * Address resolution is straightforward + */ + *physical = addr; + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + return TRANSLATE_SUCCESS; + +} + +static void raise_mmu_exception(CPURH850State *env, target_ulong address, + MMUAccessType access_type) +{ + CPUState *cs = CPU(rh850_env_get_cpu(env)); + int page_fault_exceptions = RH850_EXCP_INST_PAGE_FAULT; + switch (access_type) { + case MMU_INST_FETCH: + cs->exception_index = page_fault_exceptions ? + RH850_EXCP_INST_PAGE_FAULT : RH850_EXCP_INST_ACCESS_FAULT; + break; + case MMU_DATA_LOAD: + cs->exception_index = page_fault_exceptions ? + RH850_EXCP_LOAD_PAGE_FAULT : RH850_EXCP_LOAD_ACCESS_FAULT; + break; + case MMU_DATA_STORE: + cs->exception_index = page_fault_exceptions ? + RH850_EXCP_STORE_PAGE_FAULT : RH850_EXCP_STORE_AMO_ACCESS_FAULT; + break; + default: + g_assert_not_reached(); + } + env->badaddr = address; +} + +hwaddr rh850_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +{ + RH850CPU *cpu = RH850_CPU(cs); + hwaddr phys_addr; + int prot; + int mmu_idx = cpu_mmu_index(&cpu->env, false); + + if (get_physical_address(&cpu->env, &phys_addr, &prot, addr, 0, mmu_idx)) { + return -1; + } + return phys_addr; +} + +void rh850_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, int mmu_idx, + uintptr_t retaddr) +{ + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + switch (access_type) { + case MMU_INST_FETCH: + cs->exception_index = RH850_EXCP_INST_ADDR_MIS; + break; + case MMU_DATA_LOAD: + cs->exception_index = RH850_EXCP_LOAD_ADDR_MIS; + break; + case MMU_DATA_STORE: + cs->exception_index = RH850_EXCP_STORE_AMO_ADDR_MIS; + break; + default: + g_assert_not_reached(); + } + env->badaddr = addr; + //qemu_log_mask(CPU_LOG_INT, "%s\n", __func__); + do_raise_exception_err(env, cs->exception_index, retaddr); +} + +#endif + +int rh850_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, + int rw, int mmu_idx) +{ + + + /* + * TODO: Add check to system register concerning MPU configuratuon MPLA, MPUA + * + */ + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; +#if !defined(CONFIG_USER_ONLY) + hwaddr pa = 0; + int prot; +#endif + int ret = TRANSLATE_FAIL; + qemu_log_mask(CPU_LOG_MMU, + "%s pc " TARGET_FMT_lx " ad %" VADDR_PRIx " rw %d mmu_idx \ + %d\n", __func__, env->pc, address, rw, mmu_idx); + +#if !defined(CONFIG_USER_ONLY) + + ret = get_physical_address(env, &pa, &prot, address, rw, mmu_idx); + qemu_log_mask(CPU_LOG_MMU, + "%s address=%" VADDR_PRIx " ret %d physical " TARGET_FMT_plx + " prot %d\n", __func__, address, ret, pa, prot); + if (ret == TRANSLATE_SUCCESS) { + tlb_set_page(cs, address & TARGET_PAGE_MASK, pa & TARGET_PAGE_MASK, + prot, mmu_idx, TARGET_PAGE_SIZE); + } else if (ret == TRANSLATE_FAIL) { + raise_mmu_exception(env, address, rw); + } +#else + switch (rw) { + case MMU_INST_FETCH: + cs->exception_index = RH850_EXCP_INST_PAGE_FAULT; + break; + case MMU_DATA_LOAD: + cs->exception_index = RH850_EXCP_LOAD_PAGE_FAULT; + break; + case MMU_DATA_STORE: + cs->exception_index = RH850_EXCP_STORE_PAGE_FAULT; + break; + } +#endif + return ret; +} + + +uint32_t mem_deref_4(CPUState * cs, uint32_t addr){ + uint8_t * buf = g_malloc(4); + uint32_t ret_dword = 0; + cpu_memory_rw_debug(cs, addr, buf, 4, false); + + ret_dword |= buf[3] << 24; + ret_dword |= buf[2] << 16; + ret_dword |= buf[1] << 8; + ret_dword |= buf[0]; + g_free(buf); + return ret_dword; +} + + +void rh850_cpu_do_interrupt(CPUState *cs) +{ + + //qemu_log("[cpu] rh850_cpu_do_interrupt()\n"); + //qemu_log_mask(CPU_LOG_INT, "%s\n", __func__); +#if !defined(CONFIG_USER_ONLY) + uint32_t intbp; + RH850CPU *cpu = RH850_CPU(cs); + CPURH850State *env = &cpu->env; + + uint32_t direct_vector_ba; + qemu_log_mask(CPU_LOG_INT, "%s: entering switch\n", __func__); + switch (cs->exception_index) { + case RH850_EXCP_FETRAP: + + qemu_log_mask(CPU_LOG_INT, "%s: entering FETRAP handler\n", __func__); + // store PSW to FEPSW (and update env->EBV_flag) + env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = psw2int(env); + // store PC to FEPC + env->systemRegs[BANK_ID_BASIC_0][FEPC_IDX] = env->pc+2; + // Set Exception Cause + env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = env->exception_cause; + + qemu_log_mask(CPU_LOG_INT, "%s, saved pc : %x\n", __func__,env->pc); + + // update PSW + env->UM_flag = 0; + env->NP_flag = 1; + env->EP_flag = 1; + env->ID_flag = 1; + + // modify PC, keep RBASE or EBASE bits 9 to 31 (discard bits 0 to 8) + if (env->EBV_flag) + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1] & 0xFFFFFE00; + else + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1] & 0xFFFFFE00; + + qemu_log_mask(CPU_LOG_INT, "%s: direct vector addr : %x \n", __func__,direct_vector_ba); + env->pc = direct_vector_ba + 0x30; + break; + + case RH850_EXCP_TRAP: + qemu_log_mask(CPU_LOG_INT, "%s: entering TRAP handler\n", __func__); + // store PSW to EIPSW + env->systemRegs[BANK_ID_BASIC_0][EIPSW_IDX] = psw2int(env); + // store PC to EIPC + env->systemRegs[BANK_ID_BASIC_0][EIPC_IDX] = env->pc+4; + // Set Exception Cause + env->systemRegs[BANK_ID_BASIC_0][EIIC_IDX] = env->exception_cause; + + env->UM_flag = 0; + env->EP_flag = 1; + env->ID_flag = 1; + + // modify PC, keep RBASE or EBASE bits 9 to 31 (discard bits 0 to 8) + if (env->EBV_flag) + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1] & 0xFFFFFE00; + else + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1] & 0xFFFFFE00; + + if (env->exception_cause < 0x50) { + env->pc = direct_vector_ba + 0x40; + } else { + env->pc = direct_vector_ba + 0x50; + } + break; + + case RH850_EXCP_RIE: + //qemu_log("%s: entering RIE handler\n", __func__); + // store PSW to FEPSW + env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = psw2int(env); + // store PC to FEPC + env->systemRegs[BANK_ID_BASIC_0][FEPC_IDX] = env->pc; + // Set Exception Cause + env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = env->exception_cause; + //qemu_log("%s, saved pc : %x\n", __func__,env->pc); + // update PSW + + env->UM_flag = 0; + env->NP_flag = 1; + env->EP_flag = 1; + env->ID_flag = 1; + + // modify PC, keep RBASE or EBASE bits 9 to 31 (discard bits 0 to 8) + if (env->EBV_flag) + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1] & 0xFFFFFE00; + else + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1] & 0xFFFFFE00; + + //qemu_log("%s: direct vector addr : %x \n", __func__,direct_vector_ba); + env->pc = direct_vector_ba + 0x60; + //qemu_log("%s: pc : 0x%08x \n", __func__, direct_vector_ba+0x60); + break; + + case RH850_EXCP_SYSCALL: + qemu_log_mask(CPU_LOG_INT, "%s: entering SYSCALL handler\n", __func__); + uint32_t syscall_cfg = env->systemRegs[BANK_ID_BASIC_1][SCCFG_IDX1] & 0xff; + uint32_t syscall_number = env->exception_cause - 0x8000; + uint32_t syscall_bp = env->systemRegs[BANK_ID_BASIC_1][SCBP_IDX1]; + uint32_t handler_offset=0, deref_addr=0; + + if (syscall_number <= syscall_cfg) { + deref_addr = syscall_bp + (syscall_number<<2); + } else { + + deref_addr = syscall_bp; + } + + qemu_log_mask(CPU_LOG_INT, "%s syscall_cfg_size = %d\n", __func__,syscall_cfg); + qemu_log_mask(CPU_LOG_INT, "%s syscall_bp = %d\n", __func__,syscall_bp); + qemu_log_mask(CPU_LOG_INT, "%s syscall_num = %d\n", __func__,syscall_number); + qemu_log_mask(CPU_LOG_INT, "%s deref_addr = 0x%x\n", __func__,deref_addr); + handler_offset = mem_deref_4(cs,deref_addr); + qemu_log_mask(CPU_LOG_INT, "%s handler offset = %x\n", __func__,handler_offset); + + // store PSW to EIPSW + env->systemRegs[BANK_ID_BASIC_0][EIPSW_IDX] = psw2int(env); + // store PC to EIPC + env->systemRegs[BANK_ID_BASIC_0][EIPC_IDX] = env->pc+4; + // Set Exception Cause + env->systemRegs[BANK_ID_BASIC_0][EIIC_IDX] = env->exception_cause; + + env->UM_flag = 0; + env->EP_flag = 1; + env->ID_flag = 1; + + // modify PC + env->pc = syscall_bp + handler_offset; + qemu_log_mask(CPU_LOG_INT, "%s: moving pc to = 0x%x\n", __func__,env->pc); + + break; + + case RH850_EXCP_FEINT: + //qemu_log("[cpu] entering FEINT handler\n"); + // store PSW to FEPSW + env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = psw2int(env); + // store PC to FEPC + env->systemRegs[BANK_ID_BASIC_0][FEPC_IDX] = env->pc; + // Set Exception Cause + env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = env->exception_cause; + + /* Update PSW. */ + env->UM_flag = 0; + env->ID_flag = 1; + env->NP_flag = 1; + env->EP_flag = 0; + + /* Direct vector. */ + if (env->EBV_flag) + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1]; + else + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1]; + + /* Redirect to FEINT exception handler. */ + env->pc = (direct_vector_ba & 0xFFFFFF00) + 0xF0; + //qemu_log("%s: moving pc to = 0x%x\n", __func__,env->pc); + break; + + case RH850_EXCP_FENMI: + //qemu_log("[cpu] entering FENMI handler\n"); + // store PSW to FEPSW + env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = psw2int(env); + // store PC to FEPC + env->systemRegs[BANK_ID_BASIC_0][FEPC_IDX] = env->pc; + // Set Exception Cause + env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = env->exception_cause; + + /* Update PSW. */ + env->UM_flag = 0; + env->ID_flag = 1; + env->NP_flag = 1; + env->EP_flag = 0; + + /* Direct vector. */ + if (env->EBV_flag) + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1]; + else + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1]; + + /* Redirect to FENMI exception handler. */ + env->pc = (direct_vector_ba & 0xFFFFFF00) + 0xE0; + break; + + case RH850_EXCP_EIINT: + //qemu_log("[cpu] entering EIINT handler\n"); + //qemu_log_mask(CPU_LOG_INT, "%s: entering EIINT handler\n", __func__); + + // store PSW to EIPSW + env->systemRegs[BANK_ID_BASIC_0][EIPSW_IDX] = psw2int(env); + // store PC to EIPC + env->systemRegs[BANK_ID_BASIC_0][EIPC_IDX] = env->pc; + // Set Exception Cause + env->systemRegs[BANK_ID_BASIC_0][EIIC_IDX] = env->exception_cause; + // Set priority to ISPR + env->systemRegs[BANK_ID_BASIC_2][ISPR_IDX2] |= (1 << env->exception_priority); + + /* Set PSW.ID (disable further EI exceptions). */ + env->ID_flag = 1; + + /* Clear PSW.EP (we are processing an interrupt). */ + env->EP_flag = 0; + + /* Modify PC based on dispatch method (direct vector or table reference). */ + if (!env->exception_dv) + { + //qemu_log("[cpu] dispatch EIINT (table reference) for IRQ %d\n", env->exception_cause&0x1ff); + /* Table reference, first read INTBP value. */ + intbp = env->systemRegs[BANK_ID_BASIC_1][INTBP_IDX1]; + //qemu_log("[cpu] INTBP=0x%08x\n", intbp); + + /* Compute address of interrupt handler (based on channel). */ + env->pc = mem_deref_4(cs, intbp + 4*(env->exception_cause & 0x1ff)); + //qemu_log("[cpu] PC=0x%08x\n", env->pc); + } + else + { + //qemu_log("[cpu] dispatch EIINT (direct vector) for IRQ %d\n", env->exception_cause&0x1ff); + //qemu_log("[cpu] exception priority=%d\n", env->exception_priority); + /* Direct vector. */ + if (env->EBV_flag) + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1]; + else + direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1]; + //qemu_log("[cpu] Direct vector Base Address = 0x%08x\n", direct_vector_ba); + + /* Is RINT bit set ? */ + if (direct_vector_ba & 1) + { + //qemu_log("[cpu] RINT bit set\n"); + /* Reduced vector (one handler for any priority). */ + env->pc = (direct_vector_ba & 0xFFFFFF00) + 0x100; + } + else + { + //qemu_log("[cpu] RINT bit NOT set\n"); + /* One handler per priority level. */ + env->pc = (direct_vector_ba & 0xFFFFFF00) + 0x100 + (env->exception_priority<<4); + } + //qemu_log("[cpu] PC=0x%08x\n", env->pc); + } + break; + } + +#endif + cs->exception_index = EXCP_NONE; /* mark handled to qemu */ +} diff --git a/qemu/target/rh850/helper.h b/qemu/target/rh850/helper.h new file mode 100644 index 0000000000..24c9fa5865 --- /dev/null +++ b/qemu/target/rh850/helper.h @@ -0,0 +1,157 @@ +DEF_HELPER_4(uc_tracecode, void, i32, i32, ptr, i64) +DEF_HELPER_6(uc_traceopcode, void, ptr, i64, i64, i32, ptr, i64) +DEF_HELPER_1(uc_rh850_exit, void, env) + +/* Exceptions */ +DEF_HELPER_2(raise_exception, noreturn, env, i32) +DEF_HELPER_3(raise_exception_with_cause, noreturn, env, i32, i32) + + +/* Floating Point - rounding mode */ +DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_WG, void, env, i32) + +/* Floating Point - fused */ +DEF_HELPER_FLAGS_4(fmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(fmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(fmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(fmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(fnmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(fnmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(fnmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(fnmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64) + +/* Floating Point - Single Precision */ +DEF_HELPER_FLAGS_2(f32_is_normal, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(f32_is_zero_or_normal, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(f32_is_infinity, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_1(f_sync_fflags, TCG_CALL_NO_RWG, void, env) + +DEF_HELPER_FLAGS_3(fadd_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fsub_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fmul_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fmax_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fmin_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(fdiv_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_2(fabs_s, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fneg_s, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(ftrnc_sw, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fceil_sw, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(ffloor_sw, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fcvt_sw, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(ftrnc_suw, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fceil_suw, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(ffloor_suw, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fcvt_suw, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fcvt_ws, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fcvt_ls, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(fcvt_hs, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fcvt_sh, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fcvt_uws, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(fcvt_uls, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(ftrnc_sl, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(fceil_sl, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(ffloor_sl, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(fcvt_sl, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(ftrnc_sul, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(fceil_sul, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(ffloor_sul, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(fcvt_sul, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(fsqrt_s, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(frecip_s, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(frsqrt_s, TCG_CALL_NO_RWG, i32, env, i32) + +DEF_HELPER_FLAGS_2(f_is_nan_s, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_3(fle_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(flt_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(feq_s, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_2(fcvt_w_s, TCG_CALL_NO_RWG, tl, env, i64) +DEF_HELPER_FLAGS_2(fcvt_wu_s, TCG_CALL_NO_RWG, tl, env, i64) + +DEF_HELPER_FLAGS_4(fmaf_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32) +DEF_HELPER_FLAGS_4(fmsf_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32) +DEF_HELPER_FLAGS_4(fnmaf_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32) +DEF_HELPER_FLAGS_4(fnmsf_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32) + + + + +#if defined(TARGET_RH85064) +DEF_HELPER_FLAGS_2(fcvt_l_s, TCG_CALL_NO_RWG, tl, env, i64) +DEF_HELPER_FLAGS_2(fcvt_lu_s, TCG_CALL_NO_RWG, tl, env, i64) +#endif +DEF_HELPER_FLAGS_2(fcvt_s_w, TCG_CALL_NO_RWG, i64, env, tl) +DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl) +#if defined(TARGET_RH85064) +DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl) +DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl) +#endif +DEF_HELPER_FLAGS_1(fclass_s, TCG_CALL_NO_RWG_SE, tl, i64) + +/* Floating Point - Double Precision */ +DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmul_d, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmax_d, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmin_d, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fdiv_d, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_2(fabs_d, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fneg_d, TCG_CALL_NO_RWG, i64, env, i64) + +DEF_HELPER_FLAGS_2(ftrnc_dw, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(fceil_dw, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(ffloor_dw, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(fcvt_dw, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(ftrnc_duw, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(fceil_duw, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(ffloor_duw, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(fcvt_duw, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_2(fcvt_wd, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(fcvt_ld, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fcvt_sd, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(fcvt_uwd, TCG_CALL_NO_RWG, i64, env, i32) +DEF_HELPER_FLAGS_2(fcvt_uld, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(ftrnc_dl, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fceil_dl, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(ffloor_dl, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fcvt_dl, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(ftrnc_dul, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fceil_dul, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(ffloor_dul, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fcvt_dul, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fsqrt_d, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(frecip_d, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(frsqrt_d, TCG_CALL_NO_RWG, i64, env, i64) + +DEF_HELPER_FLAGS_2(f_is_nan_d, TCG_CALL_NO_RWG, i32, env, i64) +DEF_HELPER_FLAGS_3(fle_d, TCG_CALL_NO_RWG, i32, env, i64, i64) +DEF_HELPER_FLAGS_3(flt_d, TCG_CALL_NO_RWG, i32, env, i64, i64) +DEF_HELPER_FLAGS_3(feq_d, TCG_CALL_NO_RWG, i32, env, i64, i64) + + + +DEF_HELPER_FLAGS_2(fcvt_s_d, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fcvt_d_s, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(fcvt_w_d, TCG_CALL_NO_RWG, tl, env, i64) +DEF_HELPER_FLAGS_2(fcvt_wu_d, TCG_CALL_NO_RWG, tl, env, i64) +#if defined(TARGET_RH85064) +DEF_HELPER_FLAGS_2(fcvt_l_d, TCG_CALL_NO_RWG, tl, env, i64) +DEF_HELPER_FLAGS_2(fcvt_lu_d, TCG_CALL_NO_RWG, tl, env, i64) +#endif +DEF_HELPER_FLAGS_2(fcvt_d_w, TCG_CALL_NO_RWG, i64, env, tl) +DEF_HELPER_FLAGS_2(fcvt_d_wu, TCG_CALL_NO_RWG, i64, env, tl) +#if defined(TARGET_RH85064) +DEF_HELPER_FLAGS_2(fcvt_d_l, TCG_CALL_NO_RWG, i64, env, tl) +DEF_HELPER_FLAGS_2(fcvt_d_lu, TCG_CALL_NO_RWG, i64, env, tl) +#endif +DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64) + +/* Special functions */ +//DEF_HELPER_3(csrrw, tl, env, tl, tl) +//DEF_HELPER_4(csrrs, tl, env, tl, tl, tl) +//DEF_HELPER_4(csrrc, tl, env, tl, tl, tl) +#ifndef CONFIG_USER_ONLY +//DEF_HELPER_2(sret, tl, env, tl) +//DEF_HELPER_2(mret, tl, env, tl) +//DEF_HELPER_1(wfi, void, env) +DEF_HELPER_1(tlb_flush, void, env) +#endif diff --git a/qemu/target/rh850/instmap.h b/qemu/target/rh850/instmap.h new file mode 100644 index 0000000000..2cbf2aed2f --- /dev/null +++ b/qemu/target/rh850/instmap.h @@ -0,0 +1,624 @@ +/* + * RH850 emulation for qemu: Instruction decode helpers + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _RH850_INSTMAP_H +#define _RH850_INSTMAP_H + +enum{ + /*SIGNED INT*/ + COND_RH850_BGE = 1110, + COND_RH850_BGT = 1111, + COND_RH850_BLE = 0111, + COND_RH850_BLT = 0110, + /*UNSIGNED INT*/ + COND_RH850_BH = 1011, + COND_RH850_BL = 0001, + COND_RH850_BNH = 0011, + COND_RH850_BNL = 1001, + /*COMMON*/ + COND_RH850_BE = 0010, + COND_RH850_BNE = 1010, + /*OTHERS*/ + COND_RH850_BC = 0001, + COND_RH850_BF = 1010, + COND_RH850_BN = 0100, + COND_RH850_BNC = 1001, + COND_RH850_BNV = 1000, + COND_RH850_BNZ = 1010, + COND_RH850_BP = 1100, + COND_RH850_BR = 0101, + COND_RH850_BSA = 1101, + COND_RH850_BT = 0010, + COND_RH850_BV = 0000, + COND_RH850_BZ = 0010, +}; + +#define MASK_OP_MAJOR(op) (op & (0x3F << 5)) // the major opcode in rh850 is at bits 10-5 +enum { + /* FORMAT I */ // unique opcodes and grouped instructions + OPC_RH850_16bit_0 = (0x0 << 5), // group with opcode 0x0 (nop, synci, synce, syncm, syncp, mov) + OPC_RH850_NOT_reg1_reg2 = (0x1 << 5), + OPC_RH850_16bit_2 = (0x2 << 5), // group with opcode 0x2 (rie, switch, divh, fetrap) + OPC_RH850_16bit_3 = (0x3 << 5), // group with opcode 0x3 (jmp,sld.bu,sld.hu) + OPC_RH850_16bit_4 = (0x4 << 5), // group with opcode 0x4 (zyb, satsub) + OPC_RH850_16bit_5 = (0x5 << 5), // group with opcode 0x5 (sxb, satsub) + OPC_RH850_16bit_6 = (0x6 << 5), // group with opcode 0x6 (zyh, satadd) + OPC_RH850_16bit_7 = (0x7 << 5), // group with opcode 0x7 (sxh, mulh) + OPC_RH850_OR_reg1_reg2 = (0x8 << 5), + OPC_RH850_XOR_reg1_reg2 = (0x9 << 5), + OPC_RH850_AND_reg1_reg2 = (0xA << 5), + OPC_RH850_TST_reg1_reg2 = (0xB << 5), + OPC_RH850_SUBR_reg1_reg2 = (0xC << 5), + OPC_RH850_SUB_reg1_reg2 = (0xD << 5), + OPC_RH850_ADD_reg1_reg2 = (0xE << 5), + OPC_RH850_CMP_reg1_reg2 = (0xF << 5), + + /* FORMAT II */ + OPC_RH850_16bit_16 = (0x10 << 5), // group with opcode 0x10 (mov,callt) + OPC_RH850_16bit_17 = (0x11 << 5), // group with opcode 0x11 (callt, satadd) + OPC_RH850_ADD_imm5_reg2= (0x12 << 5), // group with opcode 0x12 (add) + OPC_RH850_CMP_imm5_reg2 = (0x13 << 5), // group with opcode 0x13 (cmp) + OPC_RH850_SHR_imm5_reg2 = (0x14 << 5), + OPC_RH850_SAR_imm5_reg2 = (0x15 << 5), + OPC_RH850_SHL_imm5_reg2 = (0x16 << 5), + OPC_RH850_MULH_imm5_reg2 = (0x17 << 5), + + /*FORMAT III */ + OPC_RH850_BCOND = (0xB << 7), // different mask! (bits 10-7) + + /* FORMAT IV */ // different mask! (bits 10-7) + OPC_RH850_16bit_SLDB = (0x6 << 5), + OPC_RH850_16bit_SLDH = (0x8 << 5), + OPC_RH850_16bit_IV10 = (0xA << 5), // group with opcode 0xA (sld.w,sst.w) + OPC_RH850_16bit_SSTB = (0x7 << 5), + OPC_RH850_16bit_SSTH = (0x9 << 5), + + /* FORMAT VI */ + OPC_RH850_ADDI_imm16_reg1_reg2 = (0x30 << 5), + OPC_RH850_ANDI_imm16_reg1_reg2 = (0x36 << 5), + OPC_RH850_MOVEA = (0x31 << 5), // this is also MOV 3, which is 48 bit + OPC_RH850_MOVHI_imm16_reg1_reg2 = (0x32 << 5), + OPC_RH850_ORI_imm16_reg1_reg2 = (0x34 << 5), + OPC_RH850_SATSUBI_imm16_reg1_reg2= (0x33 << 5), + OPC_RH850_XORI_imm16_reg1_reg2 = (0x35 << 5), + + + /* FORMAT VII */ + + OPC_RH850_LOOP = (0x37 << 5), //same as MULHI in format VI !!!! + + OPC_RH850_LDB = (0x38 << 5), + OPC_RH850_LDH_LDW = (0x39 << 5), + OPC_RH850_STB = (0x3A << 5), + OPC_RH850_STH_STW = (0x3B << 5), //the store halfword and store word instructions differ on LSB displacement bit 16 (0=ST.H, 1=ST.W) (format VII) + + OPC_RH850_ST_LD_0 = (0x3C << 5), //5 instructions share this opcode, sub-op bits 11-15 are 0, inst. differ in sub-op bits 16-19 (ST.B2=D, ST.W2=F) (format XIV) + OPC_RH850_ST_LD_1 = (0x3D << 5), //5 instructions share this opcode, sub-op bits 11-15 are 0, inst. differ in sub-op bits 16-19 (ST.DW=F, ST.H2=D) (format XIV) + //OPC_RH850_LDHU = (0x3F << 5), //bits 11-15 are not all 0 + + OPC_RH850_32bit_1 = (0x3F << 5), // 111111 + + + + + OPC_RH850_BIT_MANIPULATION_2 = (0x3E << 5), + + OPC_RH850_FORMAT_V_XIII = (0x1E << 6), + + + OPC_RH850_MULH1 = (0x7 << 5), + OPC_RH850_MULH2 = (0x17 << 5), + + +}; + +enum{ + OPC_RH850_SET1_reg2_reg1 = 0, + OPC_RH850_NOT1_reg2_reg1 = 2, + OPC_RH850_CLR1_reg2_reg1 = 4, + OPC_RH850_TST1_reg2_reg1 = 6, +}; + +enum{ + OPC_RH850_SET1_bit3_disp16_reg1 = 1, + OPC_RH850_NOT1_bit3_disp16_reg1 = 3, + OPC_RH850_CLR1_bit3_disp16_reg1 = 5, + OPC_RH850_TST1_bit3_disp16_reg1 = 7, +}; + +enum{ + OPC_RH850_MOV_reg1_reg2 = 1, + OPC_RH850_MOV_imm5_reg2 = 2, + OPC_RH850_MOV_imm32_reg1 = 3, + OPC_RH850_MOVEA_imm16_reg1_reg2 = 4, +}; + +enum{ + OPC_RH850_SATADD_reg1_reg2 = 1, + OPC_RH850_SATADD_imm5_reg2 = 2, + OPC_RH850_SATADD_reg1_reg2_reg3 = 3, + OPC_RH850_SATSUB_reg1_reg2 = 4, + OPC_RH850_SATSUB_reg1_reg2_reg3 = 5, + OPC_RH850_SATSUBR_reg1_reg2 = 6, +}; + +enum{ + OPC_RH850_MUL_reg1_reg2_reg3 = 1, + OPC_RH850_MUL_imm9_reg2_reg3 = 2, + OPC_RH850_MULH_reg1_reg2 = 3, + //OPC_RH850_MULH_imm5_reg2 = 4, + OPC_RH850_MULHI_imm16_reg1_reg2 = 5, + OPC_RH850_MULU_reg1_reg2_reg3 = 8, + OPC_RH850_MULU_imm9_reg2_reg3 = 9, +}; + +enum{ + OPC_RH850_ADF_cccc_reg1_reg2_reg3 = 10, + OPC_RH850_SBF_cccc_reg1_reg2_reg3 = 11, + OPC_RH850_DIVH_reg1_reg2 = 12, +}; + +enum{ //enum for gen_data_manipulation cases + OPC_RH850_SHR_reg1_reg2 = 111, + OPC_RH850_SHR_reg1_reg2_reg3 = 222, + OPC_RH850_CMOV_cccc_reg1_reg2_reg3 = 333, + OPC_RH850_CMOV_cccc_imm5_reg2_reg3 = 444, + OPC_RH850_ROTL_reg1_reg2_reg3 = 445, + OPC_RH850_ROTL_imm5_reg2_reg3 = 446, + OPC_RH850_SAR_reg1_reg2 = 447, + OPC_RH850_SAR_reg1_reg2_reg3 = 448, + OPC_RH850_SASF_cccc_reg2 = 449, + OPC_RH850_SETF_cccc_reg2 = 450, + OPC_RH850_SHL_reg1_reg2 = 451, + OPC_RH850_SHL_reg1_reg2_reg3 = 453, + OPC_RH850_SXB_reg1 = 454, + OPC_RH850_SXH_reg1 = 455, + OPC_RH850_ZXB_reg1 = 456, + OPC_RH850_ZXH_reg1 = 457, + + + +}; + +enum{ + OPC_RH850_LDSR_reg2_regID_selID = 1, + OPC_RH850_STSR_regID_reg2_selID = 2, + //check for unintentional matching + OPC_RH850_PREPARE_list12_imm5 = 12, + OPC_RH850_PREPARE_list12_imm5_sp = 13, + OPC_RH850_RIE = 3, + OPC_RH850_CALLT_imm6 = 4, + OPC_RH850_CAXI_reg1_reg2_reg3 = 5, + OPC_RH850_DISPOSE_imm5_list12 = 7, + OPC_RH850_DISPOSE_imm5_list12_reg1 = 8, + OPC_RH850_FETRAP_vector4 = 15, + OPC_RH850_SWITCH_reg1 = 10, +}; + +enum{ // magic numbers for branch opcodes + OPC_RH850_JR_imm22 = 0, + OPC_RH850_JR_imm32 = 1, + OPC_RH850_JARL_disp22_reg2 = 2, + OPC_RH850_JARL_disp32_reg1 = 3, //48-bit + OPC_RH850_JARL_reg1_reg3 = 4, + OPC_RH850_JMP_reg1 = 5, + OPC_RH850_JMP_disp32_reg1 = 6, + +}; + + +#define MASK_OP_FORMAT_I_0(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 11)) | (op & (0x1F << 0))) +enum { + OPC_RH850_NOP = OPC_RH850_16bit_0 | (0x0 << 11) | (0x0 << 0), + OPC_RH850_SYNCI = OPC_RH850_16bit_0 | (0x0 << 11) | (0x1C << 0), + OPC_RH850_SYNCE = OPC_RH850_16bit_0 | (0x0 << 11) | (0x1D << 0), + OPC_RH850_SYNCM = OPC_RH850_16bit_0 | (0x0 << 11) | (0x1E << 0), + OPC_RH850_SYNCP = OPC_RH850_16bit_0 | (0x0 << 11) | (0x1F << 0) +}; + + + +#define MASK_OP_ST_LD0(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 11)) | (op & (0xF << 16))) +enum { + + OPC_RH850_LDB2 = OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0x5 << 16), + OPC_RH850_LDH2 = OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0x7 << 16), + OPC_RH850_LDW2 = OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0x9 << 16), + OPC_RH850_STB2 = OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0xD << 16), //sub-op bits 11-15 are 0, inst. differ in sub-op bits 16-19 (ST.B2=D, ST.W2=F) (format XIV) + OPC_RH850_STW2 = OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0xF << 16), + +}; +#define MASK_OP_ST_LD1(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 11)) | (op & (0xF << 16))) +enum { + + OPC_RH850_LDBU2 = OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0x5 << 16), + OPC_RH850_LDHU2 = OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0x7 << 16), + OPC_RH850_LDDW = OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0x9 << 16), + OPC_RH850_STDW = OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0xF << 16), + OPC_RH850_STH2 = OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0xD << 16), +}; + +#define MASK_OP_32BIT_SUB(op) (op & (0xF << 23)) +enum { + OPC_RH850_LDSR_RIE_SETF_STSR = (0x0 << 23), + OPC_RH850_FORMAT_IX = (0x1 << 23), // 0001 + OPC_RH850_FORMAT_X = (0x2 << 23), // 0010 + OPC_RH850_MUL_INSTS = (0x4 << 23), // 0100 this is also for SASF + OPC_RH850_FORMAT_XI = (0x5 << 23), // 0101 + OPC_RH850_FORMAT_XII = (0x6 << 23), // 0110 + OPC_RH850_ADDIT_ARITH = (0x7 << 23), // 0111 + OPC_RH850_FORMAT_FI_CAT0 = (0x8 << 23), // 1000 used for floating-point instructions + OPC_RH850_FORMAT_FI_CAT1 = (0x9 << 23) // 1001 used for specific FPU instructions +}; + +#define MASK_OP_FORMAT_IX(op) (op & (0x3 << 21)) //0001 on b26-b23 +enum { + OPC_RH850_BINS_0 = (0x0 << 21), //BINS0,SHR, SHR2 + OPC_RH850_BINS_1 = (0x1 << 21), //BINS1,SAR,SAR2 + OPC_RH850_BINS_2 = (0x2 << 21), //BINS2,SHL, SHL2, ROTL, ROTL2 + OPC_RH850_BIT_MANIPULATION = (0x3 << 21), //clr1, set, tst1, not1, caxi in format IX +}; + +#define MASK_OP_FORMAT_X(op) (op & (0xFFF << 11)) //0010 on b26-b23 +enum { + OPC_RH850_CTRET = (0x880 << 11), + OPC_RH850_DI = (0xC00 << 11), + OPC_RH850_EI = (0XC10 << 11), + OPC_RH850_EIRET = (0X900 << 11), + OPC_RH850_FERET = (0X940 << 11), + OPC_RH850_HALT = (0X400 << 11), + OPC_RH850_JARL3 = (0XC18 << 11), + OPC_RH850_SNOOZE = (0x401 << 11), + OPC_RH850_SYSCALL = (0xC1A << 11), + OPC_RH850_TRAP = (0x000 << 11), + OPC_RH850_PREF = (0xC1B << 11), + OPC_RH850_POPSP_rh_rt = (0xC0C << 11), + OPC_RH850_PUSHSP_rh_rt = (0xC08 << 11), + //don't forget CACHE + OPC_RH850_CLL = (0xC1F << 11), + +}; + +#define MASK_OP_FORMAT_XI(op) (op & (0x7F << 16)) +enum { + OPC_RH850_DIVH_reg1_reg2_reg3 = 0x0, + OPC_RH850_DIVHU_reg1_reg2_reg3 = 0x2, + OPC_RH850_DIV_reg1_reg2_reg3 = 0x40, + OPC_RH850_DIVQ = 0x7C, + OPC_RH850_DIVQU = 0x7E, + OPC_RH850_DIVU_reg1_reg2_reg3 = 0x42 +}; + +#define MASK_OP_FORMAT_XII(op) (op & (0x3 << 17)) +enum { + OPC_RH850_BSW_reg2_reg3 = (0x0 << 0), + OPC_RH850_BSH_reg2_reg3 = (0x1 << 0), + OPC_RH850_HSW_reg2_reg3 = (0x2 << 0), + OPC_RH850_HSH_reg2_reg3 = (0x3 << 0), + // SCHOL, SCHOR, SCH1L, SCH1R + OPC_RH850_SCH0R_reg2_reg3 = (0x0 << 0), + OPC_RH850_SCH1R_reg2_reg3 = (0x1 << 0), //this is also STCW + OPC_RH850_SCH0L_reg2_reg3 = (0x2 << 0), + OPC_RH850_SCH1L_reg2_reg3 = (0x3 << 0), + + +}; + +#define MASK_ADDIT_ARITH_OP(op) (op & (0x3 << 21)) +enum { + OPC_RH850_SBF_SATSUB = 0x0, + OPC_RH850_ADF_SATADD3 = 0x1, + OPC_RH850_MAC_reg1_reg2_reg3_reg4 = 0x2, + OPC_RH850_MACU_reg1_reg2_reg3_reg4 = 0x3, +}; + +/* + * FPU instruction format (F:I) + */ + +enum { + FPU_TYPE_S, + FPU_TYPE_D, + FPU_TYPE_LS, + FPU_TYPE_LD, + FPU_TYPE_DL, + FPU_TYPE_SD, + FPU_TYPE_SL, + FPU_TYPE_DW, + FPU_TYPE_WD, + FPU_TYPE_HS, + FPU_TYPE_SH, + FPU_TYPE_SW, + FPU_TYPE_WS, + FPU_TYPE_DUW, + FPU_TYPE_SUW, + FPU_TYPE_UWD, + FPU_TYPE_UWS, + FPU_TYPE_ULD, + FPU_TYPE_ULS, + FPU_TYPE_SUL, + FPU_TYPE_DUL +}; + +enum { + FPU_OP_ABS, + FPU_OP_ADD, + FPU_OP_CEIL, + FPU_OP_CVT, + FPU_OP_DIV, + FPU_OP_FLOOR, + FPU_OP_CMOV, + FPU_OP_CMP, + FPU_OP_MAX, + FPU_OP_MIN, + FPU_OP_MUL, + FPU_OP_NEG, + FPU_OP_RECIP, + FPU_OP_RSQRT, + FPU_OP_SQRT, + FPU_OP_SUB, + FPU_OP_TRNC +}; + +#define MASK_OP_FORMAT_FI(op) (op & (0x7F << 16)) +enum { + OPC_RH850_FPU_CMOV_S_OR_TRFSR = 0x00 << 16, + OPC_RH850_FPU_CMOV_D = 0x10 << 16, + OPC_RH850_FPU_CMP_S = 0x20 << 16, + OPC_RH850_FPU_CMP_D = 0x30 << 16, + OPC_RH850_FPU_GROUP_CMPD= 0x30 << 16, + OPC_RH850_FPU_GROUP_SW = 0x40 << 16, + OPC_RH850_FPU_GROUP_DS = 0x42 << 16, + OPC_RH850_FPU_GROUP_SL = 0x44 << 16, + OPC_RH850_FPU_GROUP_ABSS = 0x48 << 16, + OPC_RH850_FPU_GROUP_S = 0x4E << 16, + OPC_RH850_FPU_GROUP_DW = 0x50 << 16, + OPC_RH850_FPU_GROUP_DD = 0x52 << 16, + OPC_RH850_FPU_GROUP_DL = 0x54 << 16, + OPC_RH850_FPU_GROUP_ABSD = 0x58 << 16, + OPC_RH850_FPU_GROUP_D = 0x5E << 16, + OPC_RH850_FPU_ADDF_S = 0x60 << 16, + OPC_RH850_FPU_SUBF_S = 0x62 << 16, + OPC_RH850_FPU_MULF_S = 0x64 << 16, + OPC_RH850_FPU_MAXF_S = 0x68 << 16, + OPC_RH850_FPU_MINF_S = 0x6A << 16, + OPC_RH850_FPU_DIVF_S = 0x6E << 16, + OPC_RH850_FPU_ADDF_D = 0x70 << 16, + OPC_RH850_FPU_SUBF_D = 0x72 << 16, + OPC_RH850_FPU_MULF_D = 0x74 << 16, + OPC_RH850_FPU_MAXF_D = 0x78 << 16, + OPC_RH850_FPU_MINF_D = 0x7A << 16, + OPC_RH850_FPU_DIVF_D = 0x7E << 16 +}; + +/* OPC_RH850_FPU_GROUP_CMPS/D, variant defined by cond reg3. */ +enum { + OPC_RH850_FPU_CMPS_F = 0x20, + OPC_RH850_FPU_CMPS_UN, + OPC_RH850_FPU_CMPS_EQ, + OPC_RH850_FPU_CMPS_UEQ, + OPC_RH850_FPU_CMPS_OLT, + OPC_RH850_FPU_CMPS_ULT, + OPC_RH850_FPU_CMPS_OLE, + OPC_RH850_FPU_CMPS_ULE, + OPC_RH850_FPU_CMPS_SF, + OPC_RH850_FPU_CMPS_NGLE, + OPC_RH850_FPU_CMPS_SEQ, + OPC_RH850_FPU_CMPS_NGL, + OPC_RH850_FPU_CMPS_LT, + OPC_RH850_FPU_CMPS_NGE, + OPC_RH850_FPU_CMPS_LE, + OPC_RH850_FPU_CMPS_NGT +}; + +/* OPC_RH850_FPU_GROUP_SW, variant defined by reg1 */ +enum { + OPC_RH850_FPU_TRNCF_SW=0x1, + OPC_RH850_FPU_CEILF_SW, + OPC_RH850_FPU_FLOORF_SW, + OPC_RH850_FPU_CVTF_SW, + OPC_RH850_FPU_TRNCF_SUW=0x11, + OPC_RH850_FPU_CEILF_SUW, + OPC_RH850_FPU_FLOORF_SUW, + OPC_RH850_FPU_CVTF_SUW=0x14 +}; + +/* OPC_RH850_FPU_GROUP_DS, variant defined by reg1 */ +enum { + OPC_RH850_FPU_CVTF_WS=0x0, + OPC_RH850_FPU_CVTF_LS, + OPC_RH850_FPU_CVTF_HS, + OPC_RH850_FPU_CVTF_SH, + OPC_RH850_FPU_CVTF_UWS=0x10, + OPC_RH850_FPU_CVTF_ULS +}; + +/* OPC_RH850_FPU_GROUP_SL, variant defined by reg1 */ +enum { + OPC_RH850_FPU_TRNCF_SL = 0x1, + OPC_RH850_FPU_CEILF_SL, + OPC_RH850_FPU_FLOORF_SL, + OPC_RH850_FPU_CVTF_SL, + OPC_RH850_FPU_TRNCF_SUL = 0x11, + OPC_RH850_FPU_CEILF_SUL, + OPC_RH850_FPU_FLOORF_SUL, + OPC_RH850_FPU_CVTF_SUL +}; + +/* OPC_RH850_FPU_GROUP_ABSS, variant defined by reg1 */ +enum { + OPC_RH850_FPU_ABSF_S = 0x0, + OPC_RH850_FPU_NEGF_S, +}; + +/* OPC_RH850_FPU_GROUP_S, variant defined by reg1 */ +enum { + OPC_RH850_FPU_SQRTF_S = 0x0, + OPC_RH850_FPU_RECIPF_S, + OPC_RH850_FPU_RSQRTF_S +}; + + +/* OPC_RH850_FPU_GROUP_DW, variant defined by reg1 */ +enum { + OPC_RH850_FPU_TRNCF_DW = 0x1, + OPC_RH850_FPU_CEILF_DW, + OPC_RH850_FPU_FLOORF_DW, + OPC_RH850_FPU_CVTF_DW, + OPC_RH850_FPU_TRNCF_DUW = 0x11, + OPC_RH850_FPU_CEILF_DUW, + OPC_RH850_FPU_FLOORF_DUW, + OPC_RH850_FPU_CVTF_DUW +}; + +/* OPC_RH850_FPU_GROUP_DD, variant defined by reg1 */ +enum { + OPC_RH850_FPU_CVTF_WD = 0x00, + OPC_RH850_FPU_CVTF_LD, + OPC_RH850_FPU_CVTF_SD, + OPC_RH850_FPU_CVTF_UWD = 0x10, + OPC_RH850_FPU_CVTF_ULD +}; + +/* OPC_RH850_FPU_GROUP_DL, variant defined by reg1 */ +enum { + OPC_RH850_FPU_TRNCF_DL = 0x1, + OPC_RH850_FPU_CEILF_DL, + OPC_RH850_FPU_FLOORF_DL, + OPC_RH850_FPU_CVTF_DL, + OPC_RH850_FPU_TRNCF_DUL = 0x11, + OPC_RH850_FPU_CEILF_DUL, + OPC_RH850_FPU_FLOORF_DUL, + OPC_RH850_FPU_CVTF_DUL +}; + +/* OPC_RH850_FPU_GROUP_ABSD, variant defined by reg1 */ +enum { + OPC_RH850_FPU_ABSF_D = 0x0, + OPC_RH850_FPU_NEGF_D, +}; + +/* OPC_RH850_FPU_GROUP_D, variant defined by reg1 */ +enum { + OPC_RH850_FPU_SQRTF_D = 0x0, + OPC_RH850_FPU_RECIPF_D, + OPC_RH850_FPU_RSQRTF_D +}; + +/* Format F:I with category=1 */ +enum { + OPC_RH850_FPU_FMAF_S = 0x60 << 16, + OPC_RH850_FPU_FMSF_S = 0x62 << 16, + OPC_RH850_FPU_FNMAF_S = 0x64 << 16, + OPC_RH850_FPU_FNMSF_S = 0x66 << 16 +}; + +#define MASK_OP_FORMAT_V_FORMAT_XIII(op) (op & (0x1F << 6)) + + +enum { + operation_LDL_W = 0, + operation_STC_W = 1, + operation_CLL = 2, +}; + + + +////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////// + + +#define GET_B_IMM(inst) ((extract32(inst, 8, 4) << 1) \ + | (extract32(inst, 25, 6) << 5) \ + | (extract32(inst, 7, 1) << 11) \ + | (sextract64(inst, 31, 1) << 12)) + +#define GET_STORE_IMM(inst) ((extract32(inst, 7, 5)) \ + | (sextract64(inst, 25, 7) << 5)) + +#define GET_JAL_IMM(inst) ((extract32(inst, 21, 10) << 1) \ + | (extract32(inst, 20, 1) << 11) \ + | (extract32(inst, 12, 8) << 12) \ + | (sextract64(inst, 31, 1) << 20)) + + +#define GET_RS1(inst) extract32(inst, 0, 5) //appropriate for RH850 +#define GET_RS2(inst) extract32(inst, 11, 5) //appropriate for RH850 +#define GET_RS3(inst) extract32(inst, 27, 5) //appropriate for RH850 +#define GET_DISP(inst) (extract32(inst, 20, 7) | (sextract32(inst, 32, 16) << 7 ) ) //b47-b32 + b26-b20 + + +#define GET_RM(inst) extract32(inst, 12, 3) +#define GET_RD(inst) extract32(inst, 7, 5) +#define GET_IMM(inst) sextract64(inst, 20, 12) +#define GET_IMM_32(inst) sextract64(inst, 16, 32) + +/* RVC decoding macros */ +#define GET_C_IMM(inst) (extract32(inst, 2, 5) \ + | (sextract64(inst, 12, 1) << 5)) +#define GET_C_ZIMM(inst) (extract32(inst, 2, 5) \ + | (extract32(inst, 12, 1) << 5)) +#define GET_C_ADDI4SPN_IMM(inst) ((extract32(inst, 6, 1) << 2) \ + | (extract32(inst, 5, 1) << 3) \ + | (extract32(inst, 11, 2) << 4) \ + | (extract32(inst, 7, 4) << 6)) +#define GET_C_ADDI16SP_IMM(inst) ((extract32(inst, 6, 1) << 4) \ + | (extract32(inst, 2, 1) << 5) \ + | (extract32(inst, 5, 1) << 6) \ + | (extract32(inst, 3, 2) << 7) \ + | (sextract64(inst, 12, 1) << 9)) +#define GET_C_LWSP_IMM(inst) ((extract32(inst, 4, 3) << 2) \ + | (extract32(inst, 12, 1) << 5) \ + | (extract32(inst, 2, 2) << 6)) +#define GET_C_LDSP_IMM(inst) ((extract32(inst, 5, 2) << 3) \ + | (extract32(inst, 12, 1) << 5) \ + | (extract32(inst, 2, 3) << 6)) +#define GET_C_SWSP_IMM(inst) ((extract32(inst, 9, 4) << 2) \ + | (extract32(inst, 7, 2) << 6)) +#define GET_C_SDSP_IMM(inst) ((extract32(inst, 10, 3) << 3) \ + | (extract32(inst, 7, 3) << 6)) +#define GET_C_LW_IMM(inst) ((extract32(inst, 6, 1) << 2) \ + | (extract32(inst, 10, 3) << 3) \ + | (extract32(inst, 5, 1) << 6)) +#define GET_C_LD_IMM(inst) ((extract32(inst, 10, 3) << 3) \ + | (extract32(inst, 5, 2) << 6)) +#define GET_C_J_IMM(inst) ((extract32(inst, 3, 3) << 1) \ + | (extract32(inst, 11, 1) << 4) \ + | (extract32(inst, 2, 1) << 5) \ + | (extract32(inst, 7, 1) << 6) \ + | (extract32(inst, 6, 1) << 7) \ + | (extract32(inst, 9, 2) << 8) \ + | (extract32(inst, 8, 1) << 10) \ + | (sextract64(inst, 12, 1) << 11)) +#define GET_C_B_IMM(inst) ((extract32(inst, 3, 2) << 1) \ + | (extract32(inst, 10, 2) << 3) \ + | (extract32(inst, 2, 1) << 5) \ + | (extract32(inst, 5, 2) << 6) \ + | (sextract64(inst, 12, 1) << 8)) +#define GET_C_SIMM3(inst) extract32(inst, 10, 3) +#define GET_C_RD(inst) GET_RD(inst) +#define GET_C_RS1(inst) GET_RD(inst) +#define GET_C_RS2(inst) extract32(inst, 2, 5) +#define GET_C_RS1S(inst) (8 + extract32(inst, 7, 3)) +#define GET_C_RS2S(inst) (8 + extract32(inst, 2, 3)) + +#endif /* _RH850_INSTMAP_H */ \ No newline at end of file diff --git a/qemu/target/rh850/op_helper.c b/qemu/target/rh850/op_helper.c new file mode 100644 index 0000000000..36e272e7a0 --- /dev/null +++ b/qemu/target/rh850/op_helper.c @@ -0,0 +1,89 @@ +/* + * RH850 Emulation Helpers for QEMU. + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017-2018 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" + +/* Exceptions processing helpers */ +void QEMU_NORETURN do_raise_exception_err(CPURH850State *env, + uint32_t exception, uintptr_t pc) +{ + CPUState *cs = CPU(rh850_env_get_cpu(env)); + qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception); + cs->exception_index = exception; + cpu_loop_exit_restore(cs, pc); +} + +void QEMU_NORETURN do_raise_exception_err_with_cause(CPURH850State *env, + uint32_t exception, uint32_t cause, uintptr_t pc) +{ + CPUState *cs = CPU(rh850_env_get_cpu(env)); + //qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception); + cs->exception_index = exception; + env->exception_cause = cause; + cpu_loop_exit_restore(cs, pc); +} + + +void helper_raise_exception(CPURH850State *env, uint32_t exception) +{ + do_raise_exception_err(env, exception, 0); +} + +void helper_raise_exception_with_cause(CPURH850State *env, uint32_t exception, uint32_t cause) +{ + do_raise_exception_err_with_cause(env, exception, cause, 0); +} + +target_ulong csr_read_helper(CPURH850State *env, target_ulong csrno) +{ + return 0; +} + +#ifndef CONFIG_USER_ONLY + +/* iothread_mutex must be held */ +void rh850_set_local_interrupt(RH850CPU *cpu, target_ulong mask, int value) +{ +} + +void rh850_set_mode(CPURH850State *env, target_ulong newpriv) +{ +} + +void helper_tlb_flush(CPURH850State *env) +{ + RH850CPU *cpu = rh850_env_get_cpu(env); + CPUState *cs = CPU(cpu); + tlb_flush(cs); +} + +void helper_uc_rh850_exit(CPURH850State *env) +{ + CPUState *cs = CPU(env); + + cs->exception_index = EXCP_HLT; + cs->halted = 1; + cpu_loop_exit(cs); +} + +#endif /* !CONFIG_USER_ONLY */ diff --git a/qemu/target/rh850/pmp.c b/qemu/target/rh850/pmp.c new file mode 100644 index 0000000000..8f98659d3a --- /dev/null +++ b/qemu/target/rh850/pmp.c @@ -0,0 +1,379 @@ +/* + * QEMU RH850 PMP (Physical Memory Protection) + * + * Author: Daire McNamara, daire.mcnamara@emdalo.com + * Ivan Griffin, ivan.griffin@emdalo.com + * + * This provides a RH850 Physical Memory Protection implementation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +/* + * PMP (Physical Memory Protection) is as-of-yet unused and needs testing. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "cpu.h" +#include "qemu-common.h" + +#ifndef CONFIG_USER_ONLY + +#define RH850_DEBUG_PMP 0 +#define PMP_DEBUG(fmt, ...) \ + do { \ + if (RH850_DEBUG_PMP) { \ + qemu_log_mask(LOG_TRACE, "%s: " fmt "\n", __func__, ##__VA_ARGS__);\ + } \ + } while (0) + +static void pmp_write_cfg(CPURH850State *env, uint32_t addr_index, + uint8_t val); +static uint8_t pmp_read_cfg(CPURH850State *env, uint32_t addr_index); +static void pmp_update_rule(CPURH850State *env, uint32_t pmp_index); + +/* + * Accessor method to extract address matching type 'a field' from cfg reg + */ +static inline uint8_t pmp_get_a_field(uint8_t cfg) +{ + uint8_t a = cfg >> 3; + return a & 0x3; +} + +/* + * Check whether a PMP is locked or not. + */ +static inline int pmp_is_locked(CPURH850State *env, uint32_t pmp_index) +{ + + if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) { + return 1; + } + + /* Top PMP has no 'next' to check */ + if ((pmp_index + 1u) >= MAX_RH850_PMPS) { + return 0; + } + + /* In TOR mode, need to check the lock bit of the next pmp + * (if there is a next) + */ + const uint8_t a_field = + pmp_get_a_field(env->pmp_state.pmp[pmp_index + 1].cfg_reg); + if ((env->pmp_state.pmp[pmp_index + 1u].cfg_reg & PMP_LOCK) && + (PMP_AMATCH_TOR == a_field)) { + return 1; + } + + return 0; +} + +/* + * Count the number of active rules. + */ +static inline uint32_t pmp_get_num_rules(CPURH850State *env) +{ + return env->pmp_state.num_rules; +} + +/* + * Accessor to get the cfg reg for a specific PMP/HART + */ +static inline uint8_t pmp_read_cfg(CPURH850State *env, uint32_t pmp_index) +{ + if (pmp_index < MAX_RH850_PMPS) { + return env->pmp_state.pmp[pmp_index].cfg_reg; + } + + return 0; +} + + +/* + * Accessor to set the cfg reg for a specific PMP/HART + * Bounds checks and relevant lock bit. + */ +static void pmp_write_cfg(CPURH850State *env, uint32_t pmp_index, uint8_t val) +{ + if (pmp_index < MAX_RH850_PMPS) { + if (!pmp_is_locked(env, pmp_index)) { + env->pmp_state.pmp[pmp_index].cfg_reg = val; + pmp_update_rule(env, pmp_index); + } else { + PMP_DEBUG("ignoring write - locked"); + } + } else { + PMP_DEBUG("ignoring write - out of bounds"); + } +} + +static void pmp_decode_napot(target_ulong a, target_ulong *sa, target_ulong *ea) +{ + /* + aaaa...aaa0 8-byte NAPOT range + aaaa...aa01 16-byte NAPOT range + aaaa...a011 32-byte NAPOT range + ... + aa01...1111 2^XLEN-byte NAPOT range + a011...1111 2^(XLEN+1)-byte NAPOT range + 0111...1111 2^(XLEN+2)-byte NAPOT range + 1111...1111 Reserved + */ + if (a == -1) { + *sa = 0u; + *ea = -1; + return; + } else { + target_ulong t1 = ctz64(~a); + target_ulong base = (a & ~(((target_ulong)1 << t1) - 1)) << 3; + target_ulong range = ((target_ulong)1 << (t1 + 3)) - 1; + *sa = base; + *ea = base + range; + } +} + + +/* Convert cfg/addr reg values here into simple 'sa' --> start address and 'ea' + * end address values. + * This function is called relatively infrequently whereas the check that + * an address is within a pmp rule is called often, so optimise that one + */ +static void pmp_update_rule(CPURH850State *env, uint32_t pmp_index) +{ + int i; + + env->pmp_state.num_rules = 0; + + uint8_t this_cfg = env->pmp_state.pmp[pmp_index].cfg_reg; + target_ulong this_addr = env->pmp_state.pmp[pmp_index].addr_reg; + target_ulong prev_addr = 0u; + target_ulong sa = 0u; + target_ulong ea = 0u; + + if (pmp_index >= 1u) { + prev_addr = env->pmp_state.pmp[pmp_index - 1].addr_reg; + } + + switch (pmp_get_a_field(this_cfg)) { + case PMP_AMATCH_OFF: + sa = 0u; + ea = -1; + break; + + case PMP_AMATCH_TOR: + sa = prev_addr << 2; /* shift up from [xx:0] to [xx+2:2] */ + ea = (this_addr << 2) - 1u; + break; + + case PMP_AMATCH_NA4: + sa = this_addr << 2; /* shift up from [xx:0] to [xx+2:2] */ + ea = (this_addr + 4u) - 1u; + break; + + case PMP_AMATCH_NAPOT: + pmp_decode_napot(this_addr, &sa, &ea); + break; + + default: + sa = 0u; + ea = 0u; + break; + } + + env->pmp_state.addr[pmp_index].sa = sa; + env->pmp_state.addr[pmp_index].ea = ea; + + for (i = 0; i < MAX_RH850_PMPS; i++) { + const uint8_t a_field = + pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); + if (PMP_AMATCH_OFF != a_field) { + env->pmp_state.num_rules++; + } + } +} + +static int pmp_is_in_range(CPURH850State *env, int pmp_index, target_ulong addr) +{ + int result = 0; + + if ((addr >= env->pmp_state.addr[pmp_index].sa) + && (addr <= env->pmp_state.addr[pmp_index].ea)) { + result = 1; + } else { + result = 0; + } + + return result; +} + + +/* + * Public Interface + */ + +/* + * Check if the address has required RWX privs to complete desired operation + */ +bool pmp_hart_has_privs(CPURH850State *env, target_ulong addr, + target_ulong size, pmp_priv_t privs) +{ + int i = 0; + int ret = -1; + target_ulong s = 0; + target_ulong e = 0; + pmp_priv_t allowed_privs = 0; + + /* Short cut if no rules */ + if (0 == pmp_get_num_rules(env)) { + return true; + } + + /* 1.10 draft priv spec states there is an implicit order + from low to high */ + for (i = 0; i < MAX_RH850_PMPS; i++) { + s = pmp_is_in_range(env, i, addr); + e = pmp_is_in_range(env, i, addr + size); + + /* partially inside */ + if ((s + e) == 1) { + PMP_DEBUG("pmp violation - access is partially inside"); + ret = 0; + break; + } + + /* fully inside */ + const uint8_t a_field = + pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); + if ((s + e) == 2) { + if (PMP_AMATCH_OFF == a_field) { + return 1; + } + + allowed_privs = PMP_READ | PMP_WRITE | PMP_EXEC; + if ((env->priv != PRV_M) || pmp_is_locked(env, i)) { + allowed_privs &= env->pmp_state.pmp[i].cfg_reg; + } + + if ((privs & allowed_privs) == privs) { + ret = 1; + break; + } else { + ret = 0; + break; + } + } + } + + /* No rule matched */ + if (ret == -1) { + if (env->priv == PRV_M) { + ret = 1; /* Privileged spec v1.10 states if no PMP entry matches an + * M-Mode access, the access succeeds */ + } else { + ret = 0; /* Other modes are not allowed to succeed if they don't + * match a rule, but there are rules. We've checked for + * no rule earlier in this function. */ + } + } + + return ret == 1 ? true : false; +} + + +/* + * Handle a write to a pmpcfg CSP + */ +void pmpcfg_csr_write(CPURH850State *env, uint32_t reg_index, + target_ulong val) +{ + int i; + uint8_t cfg_val; + + PMP_DEBUG("hart " TARGET_FMT_ld ": reg%d, val: 0x" TARGET_FMT_lx, + env->mhartid, reg_index, val); + + if ((reg_index & 1) && (sizeof(target_ulong) == 8)) { + PMP_DEBUG("ignoring write - incorrect address"); + return; + } + + for (i = 0; i < sizeof(target_ulong); i++) { + cfg_val = (val >> 8 * i) & 0xff; + pmp_write_cfg(env, (reg_index * sizeof(target_ulong)) + i, + cfg_val); + } +} + + +/* + * Handle a read from a pmpcfg CSP + */ +target_ulong pmpcfg_csr_read(CPURH850State *env, uint32_t reg_index) +{ + int i; + target_ulong cfg_val = 0; + uint8_t val = 0; + + for (i = 0; i < sizeof(target_ulong); i++) { + val = pmp_read_cfg(env, (reg_index * sizeof(target_ulong)) + i); + cfg_val |= (val << (i * 8)); + } + + PMP_DEBUG("hart " TARGET_FMT_ld ": reg%d, val: 0x" TARGET_FMT_lx, + env->mhartid, reg_index, cfg_val); + + return cfg_val; +} + + +/* + * Handle a write to a pmpaddr CSP + */ +void pmpaddr_csr_write(CPURH850State *env, uint32_t addr_index, + target_ulong val) +{ + PMP_DEBUG("hart " TARGET_FMT_ld ": addr%d, val: 0x" TARGET_FMT_lx, + env->mhartid, addr_index, val); + + if (addr_index < MAX_RH850_PMPS) { + if (!pmp_is_locked(env, addr_index)) { + env->pmp_state.pmp[addr_index].addr_reg = val; + pmp_update_rule(env, addr_index); + } else { + PMP_DEBUG("ignoring write - locked"); + } + } else { + PMP_DEBUG("ignoring write - out of bounds"); + } +} + + +/* + * Handle a read from a pmpaddr CSP + */ +target_ulong pmpaddr_csr_read(CPURH850State *env, uint32_t addr_index) +{ + PMP_DEBUG("hart " TARGET_FMT_ld ": addr%d, val: 0x" TARGET_FMT_lx, + env->mhartid, addr_index, + env->pmp_state.pmp[addr_index].addr_reg); + if (addr_index < MAX_RH850_PMPS) { + return env->pmp_state.pmp[addr_index].addr_reg; + } else { + PMP_DEBUG("ignoring read - out of bounds"); + return 0; + } +} + +#endif diff --git a/qemu/target/rh850/pmp.h b/qemu/target/rh850/pmp.h new file mode 100644 index 0000000000..e6e43e8241 --- /dev/null +++ b/qemu/target/rh850/pmp.h @@ -0,0 +1,64 @@ +/* + * QEMU RH850 PMP (Physical Memory Protection) + * + * Author: Daire McNamara, daire.mcnamara@emdalo.com + * Ivan Griffin, ivan.griffin@emdalo.com + * + * This provides a RH850 Physical Memory Protection interface + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _RH850_PMP_H_ +#define _RH850_PMP_H_ + +typedef enum { + PMP_READ = 1 << 0, + PMP_WRITE = 1 << 1, + PMP_EXEC = 1 << 2, + PMP_LOCK = 1 << 7 +} pmp_priv_t; + +typedef enum { + PMP_AMATCH_OFF, /* Null (off) */ + PMP_AMATCH_TOR, /* Top of Range */ + PMP_AMATCH_NA4, /* Naturally aligned four-byte region */ + PMP_AMATCH_NAPOT /* Naturally aligned power-of-two region */ +} pmp_am_t; + +typedef struct { + target_ulong addr_reg; + uint8_t cfg_reg; +} pmp_entry_t; + +typedef struct { + target_ulong sa; + target_ulong ea; +} pmp_addr_t; + +typedef struct { + pmp_entry_t pmp[MAX_RH850_PMPS]; + pmp_addr_t addr[MAX_RH850_PMPS]; + uint32_t num_rules; +} pmp_table_t; + +void pmpcfg_csr_write(CPURH850State *env, uint32_t reg_index, + target_ulong val); +target_ulong pmpcfg_csr_read(CPURH850State *env, uint32_t reg_index); +void pmpaddr_csr_write(CPURH850State *env, uint32_t addr_index, + target_ulong val); +target_ulong pmpaddr_csr_read(CPURH850State *env, uint32_t addr_index); +bool pmp_hart_has_privs(CPURH850State *env, target_ulong addr, + target_ulong size, pmp_priv_t priv); + +#endif diff --git a/qemu/target/rh850/register_indices.h b/qemu/target/rh850/register_indices.h new file mode 100644 index 0000000000..20fcea8cae --- /dev/null +++ b/qemu/target/rh850/register_indices.h @@ -0,0 +1,63 @@ +/* + * register_indices.h + * + * Created on: Jun 18, 2018 + * + */ + +#ifndef TARGET_RH850_REGISTER_INDICES_H_ +#define TARGET_RH850_REGISTER_INDICES_H_ + + +// BANK ID 0, sys basic regs +#define EIPC_IDX 0 +#define EIPSW_IDX 1 +#define FEPC_IDX 2 +#define FEPSW_IDX 3 +#define PSW_IDX 5 //program status word +// sysFpuRegs indices +#define FPSR_IDX 6 //floating-point configuration/status <---write the bit defines +#define FPEPC_IDX 7 //floating point exception PC +#define FPST_IDX 8 +#define FPCC_IDX 9 +#define FPCFG_IDX 10 +#define FPEC_IDX 11 + +#define EIIC_IDX 13 //EI level exception cause +#define FEIC_IDX 14 //FI level exception cause +#define CTPC_IDX 16 +#define CTPSW_IDX 17 +#define CTBP_IDX 20 +#define EIWR_IDX 28 +#define FEWR_IDX 29 +#define BSEL_IDX 31 + +// BANK ID 1, sys basic regs +#define MCFG0_IDX1 0 //machine configuration +#define RBASE_IDX1 2 //reset vector base address (if psw.ebv==0, this is also exception vector) +#define EBASE_IDX1 3 //exception handler vector address +#define INTBP_IDX1 4 +#define MCTL_IDX1 5 //CPU control +#define PID_IDX1 6 //processor ID +#define SCCFG_IDX1 11 // SYSCALL config +#define SCBP_IDX1 12 // SYSCALL base pointer + +// BANK ID 2, sys basic regs +#define HTCFG0_IDX2 0 //thread configuration +#define MEA_IDX2 6 //memory error address (when misaligned or MPU) +#define ASID_IDX2 7 //memory error address (when misaligned or MPU) +#define MEI_IDX2 8 //memory error info (info about instruction that caused exception) + +// BANK ID 1, 2 sysInterruptRegs indices +#define FPIPR_IDX1 7 +#define ISPR_IDX2 10 +#define PMR_IDX2 11 +#define ICSR_IDX2 12 //interrupt control status register +#define INTCFG_IDX2 13 //interrupt function setting + + +// BANK ID 5, 6, 7 system MPU regs indices +#define MPM_IDX5 0 //memory protection operation mode + + +#endif /* TARGET_RH850_REGISTER_INDICES_H_ */ diff --git a/qemu/target/rh850/translate.c b/qemu/target/rh850/translate.c new file mode 100644 index 0000000000..7081656c95 --- /dev/null +++ b/qemu/target/rh850/translate.c @@ -0,0 +1,5190 @@ +/* + * RH850 emulation for qemu: main translation routines. + * + * Copyright (c) 2018 iSYSTEM Labs d.o.o. + * Copyright (c) 2023 Quarkslab + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" +#include "qemu/log.h" +#include "qemu/host-utils.h" +#include "exec/cpu_ldst.h" +#include "exec/gen-icount.h" +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" +#include "exec/translator.h" + +#include "instmap.h" + +#include "unicorn/platform.h" +#include "uc_priv.h" +#include "translate.h" +#include "fpu_translate.h" + +/* + * Unicorn: Special disas state for exiting in the middle of tb. + */ + +/* We are not using a goto_tb (for whatever reason), but have updated + the PC (for whatever reason), so there's no need to do it again on + exiting the TB. */ +#define DISAS_PC_UPDATED DISAS_TARGET_0 + +/* We have emitted one or more goto_tb. No fixup required. */ +#define DISAS_GOTO_TB DISAS_TARGET_1 + +/* We have updated the PC and CC values. */ +#define DISAS_PC_CC_UPDATED DISAS_TARGET_2 + +/* We are exiting the TB, but have neither emitted a goto_tb, nor + updated the PC for the next instruction to be executed. */ +#define DISAS_PC_STALE DISAS_TARGET_3 + +/* We are exiting the TB to the main loop. */ +#define DISAS_PC_STALE_NOCHAIN DISAS_TARGET_4 + +#define DISAS_UNICORN_HALT DISAS_TARGET_11 + +/* global register indices */ +static TCGv cpu_gpr[NUM_GP_REGS]; +static TCGv cpu_pc; +static TCGv cpu_sysRegs[NUM_SYS_REG_BANKS][MAX_SYS_REGS_IN_BANK]; +// static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */ +static TCGv cpu_sysDatabuffRegs[1], cpu_LLbit, cpu_LLAddress; +static TCGv load_res; +static TCGv load_val; + +// PSW register flags. These are for temporary use only during +// calculations. Before usage they should be set from PSW and +// stored back to PSW after changes. +// TODO: since PSW as a register is rarely used - only when ld/str sys reg and +// on some branches (TRAP, ...) it makes sense to compose/decompose PSW +// on these occcasions and not have PSW stored in registers below. +TCGv_i32 cpu_ZF, cpu_SF, cpu_OVF, cpu_CYF, cpu_SATF, cpu_ID, cpu_EP, cpu_NP, + cpu_EBV, cpu_CU0, cpu_CU1, cpu_CU2, cpu_UM; + + +/** Const, RH850 does not have MMU. */ +const int MEM_IDX = 0; + +/* is_jmp field values */ +#define DISAS_INDIRECT_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */ +#define DISAS_EXIT_TB DISAS_TARGET_1 /* cpu state was modified dynamically */ +#define DISAS_TB_EXIT_ALREADY_GENERATED DISAS_TARGET_2 +#define CASE_OP_32_64(X) case X + +/* Possible conditions for tests. */ +enum { + V_COND = 0, /* OV = 1 */ + C_COND = 1, /* CY = 1 */ + Z_COND = 2, /* Z = 1 */ + NH_COND = 3, /* (CY or Z) = 1 */ + S_COND = 4, /* S = 1 */ + T_COND = 5, /* Always */ + LT_COND = 6, /* (S xor OV) = 1 */ + LE_COND = 7, /* ((S xor OV) or Z) = 1 */ + + NV_COND = 8, /* OV = 0 */ + NC_COND = 9, /* CY = 0 */ + NZ_COND = 10, /* Z = 0 */ + H_COND = 11, /* (CY or Z) = 0 */ + NS_COND = 12, /* S = 0 */ + SA_COND = 13, /* SAT = 1 */ + GE_COND = 14, /* (S xor OV) = 0 */ + GT_COND = 15, /* ((S xor OV) or Z) = 0 */ +}; + +// Enumeration for Cache operations. +enum { + CHBII = 0x0, + CIBII = 0x20, + CFALI = 0x40, + CISTI = 0x60, + CILDI = 0x61, + CLL = 0x7e, +}; + +enum { + OPC_RH850_BINS = 123456, +}; + + +static void gen_exception_debug(DisasContext *dc) +{ + TCGContext *tcg_ctx = dc->uc->tcg_ctx; + + TCGv_i32 helper_tmp = tcg_const_i32(tcg_ctx, EXCP_DEBUG); + gen_helper_raise_exception(tcg_ctx, tcg_ctx->cpu_env, helper_tmp); + tcg_temp_free_i32(tcg_ctx, helper_tmp); + + dc->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED; +} + +static void gen_exception_halt(DisasContext *dc) +{ + TCGContext *tcg_ctx = dc->uc->tcg_ctx; + + TCGv_i32 helper_tmp = tcg_const_i32(tcg_ctx, EXCP_HLT); + gen_helper_raise_exception(tcg_ctx, tcg_ctx->cpu_env, helper_tmp); + tcg_temp_free_i32(tcg_ctx, helper_tmp); + + dc->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED; +} + + +static void gen_goto_tb_imm(DisasContext *ctx, int n, target_ulong dest) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + if (unlikely(ctx->base.singlestep_enabled)) { + tcg_gen_movi_tl(tcg_ctx, cpu_pc, dest); + gen_exception_debug(ctx); + } else { + tcg_gen_goto_tb(tcg_ctx, n); + tcg_gen_movi_tl(tcg_ctx, cpu_pc, dest); + tcg_gen_exit_tb(tcg_ctx, ctx->base.tb, n); + } +} + + +/* Wrapper for getting reg values - need to check of reg is zero since + * cpu_gpr[0] is not actually allocated + */ +void gen_get_gpr(TCGContext *tcg_ctx, TCGv t, int reg_num) +{ + if (reg_num == 0) { + tcg_gen_movi_tl(tcg_ctx, t, 0); + } else { + tcg_gen_mov_tl(tcg_ctx, t, cpu_gpr[reg_num]); + } + +} + + +/* Wrapper for setting system register values. */ + +void gen_set_spr(TCGContext *tcg_ctx, int bank_id, int reg_id, TCGv t) +{ + tcg_gen_mov_tl(tcg_ctx, cpu_sysRegs[bank_id][reg_id], t); +} + +/* Wrapper for gettint sysreg values. */ +void gen_get_spr(TCGContext *tcg_ctx, int bank_id, int reg_id, TCGv t) +{ + tcg_gen_mov_tl(tcg_ctx, t, cpu_sysRegs[bank_id][reg_id]); +} + +/* Wrapper for setting reg values - need to check of reg is zero since + * cpu_gpr[0] is not actually allocated. this is more for safety purposes, + * since we usually avoid calling the OP_TYPE_gen function if we see a write to + * $zero + */ +void gen_set_gpr(TCGContext *tcg_ctx, int reg_num_dst, TCGv t) +{ + if (reg_num_dst != 0) { + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[reg_num_dst], t); + } +} + + +/** + * gen_goto_tb_rl() is a customized version of gen_goto_tb() that is able to + * move PC into a specified register before updating PC. V850e3 JARL/JR insts. + * work this way :). + **/ + +static void gen_goto_tb_rl(DisasContext *ctx, int n, int reg, int insn_size, uint32_t dest) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv reg_value = tcg_temp_new_i32(tcg_ctx); + + if (unlikely(ctx->base.singlestep_enabled)) + { + + /* GR[reg] <- PC + insn_size */ + tcg_gen_movi_i32(tcg_ctx, reg_value, ctx->pc); + tcg_gen_addi_i32(tcg_ctx, reg_value, reg_value, insn_size); + gen_set_gpr(tcg_ctx, reg, reg_value); + + /* PC <- dest */ + tcg_gen_movi_i32(tcg_ctx, cpu_pc, dest); + + tcg_temp_free_i32(tcg_ctx, reg_value); + + /* Generate exception. */ + gen_exception_debug(ctx); + } + else + { + tcg_gen_goto_tb(tcg_ctx, n); + + /* GR[reg] <- PC + insn_size */ + tcg_gen_movi_i32(tcg_ctx, reg_value, ctx->pc); + tcg_gen_addi_i32(tcg_ctx, reg_value, reg_value, insn_size); + gen_set_gpr(tcg_ctx, reg, reg_value); + + /* PC <- dest */ + tcg_gen_movi_i32(tcg_ctx, cpu_pc, dest); + + tcg_temp_free_i32(tcg_ctx, reg_value); + + tcg_gen_exit_tb(tcg_ctx, ctx->base.tb, n); + } +} + + +static inline void tcgv_to_flags(TCGContext *tcg_ctx, TCGv reg) +{ + TCGv temp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_mov_i32(tcg_ctx, temp, reg); + tcg_gen_andi_i32(tcg_ctx, cpu_ZF, temp, 0x1); + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_SF, temp, 0x1); + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_OVF, temp, 0x1); + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, temp, 0x1); + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_SATF, temp, 0x1); + + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_ID, temp, 0x1); + + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_EP, temp, 0x1); + + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_NP, temp, 0x1); + + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x8); + tcg_gen_andi_i32(tcg_ctx, cpu_EBV, temp, 0x1); + + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_CU0, temp, 0x1); + + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_CU1, temp, 0x1); + + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_CU2, temp, 0x1); + + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x12); + tcg_gen_andi_i32(tcg_ctx, cpu_UM, temp, 0x1); + + tcg_temp_free(tcg_ctx, temp); +} + + +static void tcgv_to_flags_z_cy_ov_s_sat(TCGContext *tcg_ctx, TCGv reg) +{ + TCGv temp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_mov_i32(tcg_ctx, temp, reg); + tcg_gen_andi_i32(tcg_ctx, cpu_ZF, temp, 0x1); + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_SF, temp, 0x1); + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_OVF, temp, 0x1); + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, temp, 0x1); + tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_andi_i32(tcg_ctx, cpu_SATF, temp, 0x1); + tcg_temp_free(tcg_ctx, temp); +} + + +static void flags_to_tcgv_id_ep_np_ebv_cu_um(TCGContext *tcg_ctx, TCGv reg) +{ + // Set flags in PSW to 0 so we can OR with new state + tcg_gen_andi_i32(tcg_ctx, reg, reg, 0xbff87f1f); + + TCGv temp = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_shli_i32(tcg_ctx, temp, cpu_ID, 0x5); + tcg_gen_or_i32(tcg_ctx, reg, reg,temp); + + tcg_gen_shli_i32(tcg_ctx, temp, cpu_EP, 0x6); + tcg_gen_or_i32(tcg_ctx, reg, reg,temp); + + tcg_gen_shli_i32(tcg_ctx, temp, cpu_NP, 0x7); + tcg_gen_or_i32(tcg_ctx, reg, reg,temp); + + tcg_gen_shli_i32(tcg_ctx, temp, cpu_EBV, 0xF); + tcg_gen_or_i32(tcg_ctx, reg, reg,temp); + + tcg_gen_shli_i32(tcg_ctx, temp, cpu_CU0, 0x10); + tcg_gen_or_i32(tcg_ctx, reg, reg,temp); + + tcg_gen_shli_i32(tcg_ctx, temp, cpu_CU1, 0x11); + tcg_gen_or_i32(tcg_ctx, reg, reg,temp); + + tcg_gen_shli_i32(tcg_ctx, temp, cpu_CU2, 0x12); + tcg_gen_or_i32(tcg_ctx, reg, reg,temp); + + tcg_gen_shli_i32(tcg_ctx, temp, cpu_UM, 0x1E); + tcg_gen_or_i32(tcg_ctx, reg, reg,temp); + + tcg_temp_free(tcg_ctx, temp); +} + + +static void flags_to_tcgv_z_cy_ov_s_sat(TCGContext *tcg_ctx, TCGv reg) +{ + // update psw register, first reset flags before ORing new values + tcg_gen_andi_i32(tcg_ctx, reg, reg, 0xffffffe0); + TCGv temp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_or_i32(tcg_ctx, reg, reg, cpu_ZF); + tcg_gen_shli_i32(tcg_ctx, temp, cpu_SF, 0x1); + tcg_gen_or_i32(tcg_ctx, reg,reg,temp); + tcg_gen_shli_i32(tcg_ctx, temp, cpu_OVF, 0x2); + tcg_gen_or_i32(tcg_ctx, reg,reg,temp); + tcg_gen_shli_i32(tcg_ctx, temp, cpu_CYF, 0x3); + tcg_gen_or_i32(tcg_ctx, reg,reg,temp); + tcg_gen_shli_i32(tcg_ctx, temp, cpu_SATF, 0x4); + tcg_gen_or_i32(tcg_ctx, reg,reg,temp); + tcg_temp_free(tcg_ctx, temp); +} + + +static void flags_to_tcgv(TCGContext *tcg_ctx, TCGv reg) +{ + flags_to_tcgv_z_cy_ov_s_sat(tcg_ctx, reg); + flags_to_tcgv_id_ep_np_ebv_cu_um(tcg_ctx, reg); +} + + +static TCGv condition_satisfied(TCGContext *tcg_ctx, int cond) +{ + TCGv condResult = tcg_temp_new_i32(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, condResult, 0x0); + + switch(cond) { + case GE_COND: + tcg_gen_xor_i32(tcg_ctx, condResult, cpu_SF, cpu_OVF); + tcg_gen_not_i32(tcg_ctx, condResult, condResult); + tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1); + break; + case GT_COND: + tcg_gen_xor_i32(tcg_ctx, condResult, cpu_SF, cpu_OVF); + tcg_gen_or_i32(tcg_ctx, condResult, condResult, cpu_ZF); + tcg_gen_not_i32(tcg_ctx, condResult, condResult); + tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1); + break; + case LE_COND: + tcg_gen_xor_i32(tcg_ctx, condResult, cpu_SF, cpu_OVF); + tcg_gen_or_i32(tcg_ctx, condResult, condResult, cpu_ZF); + break; + case LT_COND: + tcg_gen_xor_i32(tcg_ctx, condResult, cpu_SF, cpu_OVF); + break; + + case H_COND: + tcg_gen_or_i32(tcg_ctx, condResult, cpu_CYF, cpu_ZF); + tcg_gen_not_i32(tcg_ctx, condResult, condResult); + tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1); + break; + case NH_COND: + tcg_gen_or_i32(tcg_ctx, condResult, cpu_CYF, cpu_ZF); + break; + + case NS_COND: + tcg_gen_not_i32(tcg_ctx, condResult, cpu_SF); + tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1); + break; + + case S_COND: + tcg_gen_mov_i32(tcg_ctx, condResult, cpu_SF); + break; + + case C_COND: + tcg_gen_mov_i32(tcg_ctx, condResult, cpu_CYF); + break; + + case NC_COND: + tcg_gen_not_i32(tcg_ctx, condResult, cpu_CYF); + tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1); + break; + case NV_COND: + tcg_gen_not_i32(tcg_ctx, condResult, cpu_OVF); + tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1); + break; + case NZ_COND: + tcg_gen_not_i32(tcg_ctx, condResult, cpu_ZF); + tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1); + break; + + case SA_COND: + tcg_gen_mov_i32(tcg_ctx, condResult, cpu_SATF); + break; + case T_COND: + tcg_gen_movi_i32(tcg_ctx, condResult, 0x1); + break; + case V_COND: + tcg_gen_mov_i32(tcg_ctx, condResult, cpu_OVF); + break; + case Z_COND: + tcg_gen_mov_i32(tcg_ctx, condResult, cpu_ZF); + break; + } + + return condResult; +} + +static void gen_flags_on_add(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGLabel *cont; + TCGLabel *end; + + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, tmp, 0); + // 'add2(rl, rh, al, ah, bl, bh) creates 64-bit values and adds them: + // [CYF : SF] = [tmp : t0] + [tmp : t1] + // While CYF is 0 or 1, SF bit 15 contains sign, so it + // must be shifted 31 bits to the right later. + tcg_gen_add2_i32(tcg_ctx, cpu_SF, cpu_CYF, t0, tmp, t1, tmp); + tcg_gen_mov_i32(tcg_ctx, cpu_ZF, cpu_SF); + + tcg_gen_xor_i32(tcg_ctx, cpu_OVF, cpu_SF, t0); + tcg_gen_xor_i32(tcg_ctx, tmp, t0, t1); + tcg_gen_andc_i32(tcg_ctx, cpu_OVF, cpu_OVF, tmp); + + tcg_gen_shri_i32(tcg_ctx, cpu_SF, cpu_SF, 0x1f); + tcg_gen_shri_i32(tcg_ctx, cpu_OVF, cpu_OVF, 0x1f); + + tcg_temp_free_i32(tcg_ctx, tmp); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, 0x0, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x1); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x0); + + gen_set_label(tcg_ctx, end); +} + + +static void gen_satadd_CC(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1, TCGv_i32 result) +{ + TCGLabel *cont; + TCGLabel *end; + + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, tmp, 0); + tcg_gen_add2_i32(tcg_ctx, cpu_SF, cpu_CYF, t0, tmp, t1, tmp); + tcg_gen_mov_i32(tcg_ctx, cpu_ZF, cpu_SF); + tcg_gen_xor_i32(tcg_ctx, cpu_OVF, cpu_SF, t0); + tcg_gen_xor_i32(tcg_ctx, tmp, t0, t1); + tcg_gen_andc_i32(tcg_ctx, cpu_OVF, cpu_OVF, tmp); + + tcg_gen_shri_i32(tcg_ctx, cpu_SF, result, 0x1f); + tcg_gen_shri_i32(tcg_ctx, cpu_OVF, cpu_OVF, 0x1f); + tcg_temp_free_i32(tcg_ctx, tmp); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, 0x0, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x1); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x0); + + gen_set_label(tcg_ctx, end); +} + +static void gen_flags_on_sub(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +{ + tcg_gen_sub_tl(tcg_ctx, cpu_SF, t0, t1); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GTU, cpu_CYF, t1, t0); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, t0, t1); + tcg_gen_xor_i32(tcg_ctx, cpu_OVF, cpu_SF, t0); + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_xor_i32(tcg_ctx, tmp, t0, t1); + tcg_gen_and_i32(tcg_ctx, cpu_OVF, cpu_OVF, tmp); + + tcg_gen_shri_i32(tcg_ctx, cpu_SF, cpu_SF, 0x1f); + tcg_gen_shri_i32(tcg_ctx, cpu_OVF, cpu_OVF, 0x1f); + tcg_temp_free_i32(tcg_ctx, tmp); +} + +static void gen_satsub_CC(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1, TCGv_i32 result) +{ + TCGLabel *cont; + TCGLabel *end; + + TCGv_i32 tmp; + tcg_gen_sub_tl(tcg_ctx, cpu_SF, t0, t1); + + tcg_gen_mov_i32(tcg_ctx, cpu_ZF, cpu_SF); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GTU, cpu_CYF, t1, t0); + tcg_gen_xor_i32(tcg_ctx, cpu_OVF, cpu_SF, t0); + tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_xor_i32(tcg_ctx, tmp, t0, t1); + tcg_gen_and_i32(tcg_ctx, cpu_OVF, cpu_OVF, tmp); + + tcg_gen_shri_i32(tcg_ctx, cpu_SF, result, 0x1f); + tcg_gen_shri_i32(tcg_ctx, cpu_OVF, cpu_OVF, 0x1f); + tcg_temp_free_i32(tcg_ctx, tmp); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, 0x0, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x1); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x0); + + gen_set_label(tcg_ctx, end); +} + +static void gen_logic_CC(TCGContext *tcg_ctx, TCGv_i32 result){ + + TCGLabel *cont; + TCGLabel *end; + + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, result, 0x1f); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, result, 0x0, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x1); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x0); + + gen_set_label(tcg_ctx, end); +} + + +static void gen_load(DisasContext *ctx, int memop, int rd, int rs1, + target_long imm, unsigned is_disp23) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv t0 = tcg_temp_new(tcg_ctx); + TCGv t1 = tcg_temp_new(tcg_ctx); + TCGv tcg_imm = tcg_temp_new(tcg_ctx); + TCGv_i64 t1_64 = tcg_temp_new_i64(tcg_ctx); + TCGv t1_high = tcg_temp_new(tcg_ctx); + + gen_get_gpr(tcg_ctx, t0, rs1); + tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm); + + if (!is_disp23) + tcg_gen_ext16s_i32(tcg_ctx, tcg_imm, tcg_imm); + else { + tcg_gen_shli_i32(tcg_ctx, tcg_imm, tcg_imm, 9); + tcg_gen_sari_i32(tcg_ctx, tcg_imm, tcg_imm, 9); + } + + tcg_gen_add_tl(tcg_ctx, t0, t0, tcg_imm); + + if (memop == MO_TEQ) { + tcg_gen_qemu_ld_i64(tcg_ctx, t1_64, t0, MEM_IDX, memop); + tcg_gen_extrl_i64_i32(tcg_ctx, t1, t1_64); + tcg_gen_extrh_i64_i32(tcg_ctx, t1_high, t1_64); + gen_set_gpr(tcg_ctx, rd, t1); + gen_set_gpr(tcg_ctx, rd+1, t1_high); + } + else { + tcg_gen_qemu_ld_tl(tcg_ctx, t1, t0, MEM_IDX, memop); + gen_set_gpr(tcg_ctx, rd, t1); + } + + tcg_temp_free(tcg_ctx, t0); + tcg_temp_free(tcg_ctx, t1); + tcg_temp_free(tcg_ctx, tcg_imm); + tcg_temp_free_i64(tcg_ctx, t1_64); + tcg_temp_free(tcg_ctx, t1_high); +} + +static void gen_store(DisasContext *ctx, int memop, int rs1, int rs2, + target_long imm, unsigned is_disp23) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv t0 = tcg_temp_new(tcg_ctx); + TCGv dat = tcg_temp_new(tcg_ctx); + TCGv tcg_imm = tcg_temp_new(tcg_ctx); + TCGv dat_high = tcg_temp_new(tcg_ctx); + TCGv_i64 dat64 = tcg_temp_new_i64(tcg_ctx); + + gen_get_gpr(tcg_ctx, t0, rs1); // loading rs1 to t0 + tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm); + + if (!is_disp23) + tcg_gen_ext16s_i32(tcg_ctx, tcg_imm, tcg_imm); + else { + tcg_gen_shli_i32(tcg_ctx, tcg_imm, tcg_imm, 9); + tcg_gen_sari_i32(tcg_ctx, tcg_imm, tcg_imm, 9); + } + + tcg_gen_add_tl(tcg_ctx, t0, t0, tcg_imm); // adding displacement to t0 + + gen_get_gpr(tcg_ctx, dat, rs2); // getting data from rs2 + + if (memop == MO_TEQ) { + gen_get_gpr(tcg_ctx, dat_high, rs2+1); + tcg_gen_concat_i32_i64(tcg_ctx, dat64, dat, dat_high); + tcg_gen_qemu_st_i64(tcg_ctx, dat64, t0, MEM_IDX, memop); + } + else { + tcg_gen_qemu_st_tl(tcg_ctx, dat, t0, MEM_IDX, memop); + } + + // clear possible mutex + TCGLabel *l = gen_new_label(tcg_ctx); + tcg_gen_brcond_i32(tcg_ctx, TCG_COND_NE, t0, cpu_LLAddress, l); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_LLbit, 0x1, l); + tcg_gen_movi_i32(tcg_ctx, cpu_LLbit, 0); + gen_set_label(tcg_ctx, l); + + tcg_temp_free(tcg_ctx, t0); + tcg_temp_free(tcg_ctx, dat); + tcg_temp_free(tcg_ctx, tcg_imm); + tcg_temp_free_i64(tcg_ctx, dat64); + tcg_temp_free(tcg_ctx, dat_high); +} + +static void gen_mutual_exclusion(DisasContext *ctx, int rs3, int rs1, int operation) +{ + /* LDL.W, STC.W, CLL: Implement as described. + Add two additional global CPU registers called LLBit and LLAddress. + Set them with LDL.W, and reset them with STC.W. + If LLBit is not set or LLAddress does not match STC.W address, make STC.W fail. + CLL clears LLBit. + Since we do not implement multicore CPU emulation, this implementation should be OK. */ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + if (operation == operation_LDL_W) + { + TCGv adr = tcg_temp_new(tcg_ctx); + TCGv dat = tcg_temp_new(tcg_ctx); + + gen_get_gpr(tcg_ctx, adr, rs1); + tcg_gen_qemu_ld_tl(tcg_ctx, dat, adr, MEM_IDX, MO_TESL); + gen_set_gpr(tcg_ctx, rs3, dat); + + tcg_temp_free(tcg_ctx, adr); + tcg_temp_free(tcg_ctx, dat); + + tcg_gen_movi_i32(tcg_ctx, cpu_LLbit, 1); + tcg_gen_mov_i32(tcg_ctx, cpu_LLAddress, adr); + } + else if (operation == operation_STC_W) + { + TCGv adr = tcg_temp_local_new(tcg_ctx); + TCGv dat = tcg_temp_local_new(tcg_ctx); + TCGv token = tcg_temp_local_new(tcg_ctx); + TCGLabel *l_fail = gen_new_label(tcg_ctx); + TCGLabel *l_ok = gen_new_label(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, token, cpu_LLbit); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, token, 0x1, l_fail); + gen_get_gpr(tcg_ctx, adr, rs1); + gen_get_gpr(tcg_ctx, dat, rs3); + tcg_gen_brcond_i32(tcg_ctx, TCG_COND_NE, adr, cpu_LLAddress, l_fail); + tcg_gen_qemu_st_tl(tcg_ctx, dat, adr, MEM_IDX, MO_TESL); + tcg_gen_movi_i32(tcg_ctx, dat, 1); + tcg_gen_br(tcg_ctx, l_ok); + + gen_set_label(tcg_ctx, l_fail); + tcg_gen_movi_i32(tcg_ctx, dat, 0); + gen_set_label(tcg_ctx, l_ok); + gen_set_gpr(tcg_ctx, rs3, dat); + + tcg_gen_movi_tl(tcg_ctx, cpu_LLbit, 0); + + tcg_temp_free(tcg_ctx, adr); + tcg_temp_free(tcg_ctx, dat); + tcg_temp_free(tcg_ctx, token); + } + else if (operation == operation_CLL) + { + tcg_gen_movi_i32(tcg_ctx, cpu_LLbit, 0); + } + else + printf("ERROR gen_mutual_exclusion \n"); +} + + +static void gen_multiply(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv r1 = tcg_temp_new(tcg_ctx); //temp + TCGv r2 = tcg_temp_new(tcg_ctx); //temp + + gen_get_gpr(tcg_ctx, r1, rs1); //loading rs1 to t0 + gen_get_gpr(tcg_ctx, r2, rs2); //loading rs2 to t1 + int imm = rs1; + int imm_32; + int int_rs3; + + TCGv tcg_imm = tcg_temp_new(tcg_ctx); + TCGv tcg_imm32 = tcg_temp_new(tcg_ctx); + TCGv tcg_r3 = tcg_temp_new(tcg_ctx); + TCGv tcg_temp = tcg_temp_new(tcg_ctx); + + switch(operation){ + case OPC_RH850_MUL_reg1_reg2_reg3: + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3,int_rs3); + + tcg_gen_muls2_i32(tcg_ctx, r2, tcg_r3, r1, r2); + if(rs2!=int_rs3){ + gen_set_gpr(tcg_ctx, rs2, r2); + } + gen_set_gpr(tcg_ctx, int_rs3,tcg_r3); + break; + + case OPC_RH850_MUL_imm9_reg2_reg3: + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3,int_rs3); + + imm_32 = extract32(ctx->opcode, 18, 4); + imm_32 = imm | (imm_32 << 5); + + // sign extension + if((imm_32 & 0x100) == 0x100){ + imm_32 = imm_32 | (0x7f << 9); + } + tcg_gen_movi_tl(tcg_ctx, tcg_imm32, imm_32); + tcg_gen_ext16s_tl(tcg_ctx, tcg_imm32, tcg_imm32); + + tcg_gen_muls2_i32(tcg_ctx, r2, tcg_r3, tcg_imm32, r2); + + if(rs2!=int_rs3){ + gen_set_gpr(tcg_ctx, rs2, r2); + } + gen_set_gpr(tcg_ctx, int_rs3, tcg_r3); + break; + + case OPC_RH850_MULH_reg1_reg2: + + tcg_gen_andi_tl(tcg_ctx, r1, r1,0x0000FFFF); + tcg_gen_andi_tl(tcg_ctx, r2, r2,0x0000FFFF); + tcg_gen_ext16s_i32(tcg_ctx, r1, r1); + tcg_gen_ext16s_i32(tcg_ctx, r2, r2); + + tcg_gen_mul_tl(tcg_ctx, r2, r2, r1); + gen_set_gpr(tcg_ctx, rs2, r2); + break; + + case OPC_RH850_MULH_imm5_reg2: + + if ((imm & 0x10) == 0x10){ + imm = imm | (0x7 << 5); + } + tcg_gen_andi_tl(tcg_ctx, r2, r2,0x0000FFFF); + tcg_gen_ext16s_i32(tcg_ctx, r2, r2); + + tcg_gen_movi_tl(tcg_ctx, tcg_imm, imm); + tcg_gen_ext8s_i32(tcg_ctx, tcg_imm, tcg_imm); + tcg_gen_mul_tl(tcg_ctx, r2, r2, tcg_imm); + gen_set_gpr(tcg_ctx, rs2, r2); + break; + + case OPC_RH850_MULHI_imm16_reg1_reg2: + + imm_32 = extract32(ctx->opcode, 16, 16); + tcg_gen_movi_tl(tcg_ctx, tcg_imm32, imm_32); + tcg_gen_ext16s_i32(tcg_ctx, tcg_imm32, tcg_imm32); + + tcg_gen_andi_tl(tcg_ctx, r1, r1, 0x0000FFFF); + tcg_gen_ext16s_i32(tcg_ctx, r1, r1); + + tcg_gen_mul_tl(tcg_ctx, r2, r1, tcg_imm32); + + gen_set_gpr(tcg_ctx, rs2, r2); + break; + + case OPC_RH850_MULU_reg1_reg2_reg3: + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3,int_rs3); + + tcg_gen_mulu2_i32(tcg_ctx, r2, tcg_r3, r2, r1); + + if(rs2!=int_rs3){ + gen_set_gpr(tcg_ctx, rs2, r2); + } + gen_set_gpr(tcg_ctx, int_rs3,tcg_r3); + break; + + case OPC_RH850_MULU_imm9_reg2_reg3: + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3,int_rs3); + + imm_32 = extract32(ctx->opcode, 18, 4); + imm_32 = imm | (imm_32 << 5); + tcg_gen_movi_tl(tcg_ctx, tcg_imm32, imm_32); + + tcg_gen_ext16u_tl(tcg_ctx, tcg_imm32, tcg_imm32); + + tcg_gen_mulu2_i32(tcg_ctx, r2, tcg_r3, tcg_imm32, r2); + + if(rs2!=int_rs3){ + gen_set_gpr(tcg_ctx, rs2, r2); + } + gen_set_gpr(tcg_ctx, int_rs3,tcg_r3); + break; + } + + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, tcg_r3); + tcg_temp_free(tcg_ctx, tcg_temp); + tcg_temp_free(tcg_ctx, tcg_imm); + tcg_temp_free(tcg_ctx, tcg_imm32); +} + +static void gen_mul_accumulate(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv r1 = tcg_temp_new(tcg_ctx); + TCGv r2 = tcg_temp_new(tcg_ctx); + TCGv addLo = tcg_temp_new(tcg_ctx); + TCGv addHi = tcg_temp_new(tcg_ctx); + TCGv resLo = tcg_temp_new(tcg_ctx); + TCGv resHi = tcg_temp_new(tcg_ctx); + TCGv destLo = tcg_temp_new(tcg_ctx); + TCGv destHi = tcg_temp_new(tcg_ctx); + + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + + int rs3; + int rs4; + + rs3 = extract32(ctx->opcode, 28, 4) << 1; + rs4 = extract32(ctx->opcode, 17, 4) << 1; + + gen_get_gpr(tcg_ctx, addLo, rs3); + gen_get_gpr(tcg_ctx, addHi, rs3+1); + + switch(operation){ + case OPC_RH850_MAC_reg1_reg2_reg3_reg4: + + tcg_gen_muls2_i32(tcg_ctx, resLo, resHi, r1, r2); + tcg_gen_add2_i32(tcg_ctx, destLo, destHi, resLo, resHi, addLo, addHi); + + gen_set_gpr(tcg_ctx, rs4, destLo); + gen_set_gpr(tcg_ctx, rs4+1, destHi); + break; + + case OPC_RH850_MACU_reg1_reg2_reg3_reg4: + tcg_gen_mulu2_i32(tcg_ctx, resLo, resHi, r1, r2); + tcg_gen_add2_i32(tcg_ctx, destLo, destHi, resLo, resHi, addLo, addHi); + + gen_set_gpr(tcg_ctx, rs4, destLo); + gen_set_gpr(tcg_ctx, (rs4+1), destHi); + break; + } + + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, addLo); + tcg_temp_free(tcg_ctx, addHi); + tcg_temp_free(tcg_ctx, resLo); + tcg_temp_free(tcg_ctx, resHi); + tcg_temp_free(tcg_ctx, destLo); + tcg_temp_free(tcg_ctx, destHi); + +} + +static void gen_arithmetic(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv r1 = tcg_temp_new(tcg_ctx); + TCGv r2 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + + int imm = rs1; + int imm_32; + uint64_t opcode48; + + TCGv tcg_imm = tcg_temp_new(tcg_ctx); + TCGv tcg_r3 = tcg_temp_new(tcg_ctx); + TCGv tcg_result = tcg_temp_new(tcg_ctx); + + switch(operation) { + + case OPC_RH850_ADD_reg1_reg2: { + + tcg_gen_add_tl(tcg_ctx, tcg_result, r2, r1); + gen_set_gpr(tcg_ctx, rs2, tcg_result); + + gen_flags_on_add(tcg_ctx, r1, r2); + + } break; + + case OPC_RH850_ADD_imm5_reg2: + if((imm & 0x10) == 0x10){ + imm = imm | (0x7 << 5); + } + tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm); + tcg_gen_ext8s_i32(tcg_ctx, tcg_imm, tcg_imm); + tcg_gen_add_tl(tcg_ctx, tcg_result, r2, tcg_imm); + gen_set_gpr(tcg_ctx, rs2, tcg_result); + + gen_flags_on_add(tcg_ctx, r2, tcg_imm); + + break; + + case OPC_RH850_ADDI_imm16_reg1_reg2: + imm_32 = extract32(ctx->opcode, 16, 16); + tcg_gen_movi_tl(tcg_ctx, tcg_imm, imm_32); + tcg_gen_ext16s_tl(tcg_ctx, tcg_imm, tcg_imm); + tcg_gen_add_tl(tcg_ctx, r2,r1, tcg_imm); + gen_set_gpr(tcg_ctx, rs2, r2); + + gen_flags_on_add(tcg_ctx, r1, tcg_imm); + + break; + + case OPC_RH850_CMP_reg1_reg2: { + gen_flags_on_sub(tcg_ctx, r2, r1); + } break; + + case OPC_RH850_CMP_imm5_reg2: { + + if ((imm & 0x10) == 0x10){ + imm = imm | (0x7 << 5); + } + tcg_gen_movi_tl(tcg_ctx, tcg_imm, imm); + tcg_gen_ext8s_i32(tcg_ctx, tcg_imm, tcg_imm); + + gen_flags_on_sub(tcg_ctx, r2, tcg_imm); + + } break; + + case OPC_RH850_MOV_reg1_reg2: + tcg_gen_mov_tl(tcg_ctx, r2, r1); + gen_set_gpr(tcg_ctx, rs2, r2); + break; + + case OPC_RH850_MOV_imm5_reg2: + if ((imm & 0x10) == 0x10){ + imm = imm | (0x7 << 5); + } + tcg_gen_movi_tl(tcg_ctx, r2, imm); + tcg_gen_ext8s_i32(tcg_ctx, r2, r2); + + gen_set_gpr(tcg_ctx, rs2, r2); + break; + + case OPC_RH850_MOV_imm32_reg1: // 48bit instruction + opcode48 = (ctx->opcode1); + opcode48 = (ctx->opcode) | (opcode48 << 0x20); + imm_32 = extract64(opcode48, 16, 32) & 0xffffffff; + tcg_gen_movi_i32(tcg_ctx, r2, imm_32); + gen_set_gpr(tcg_ctx, rs2, r2); + break; + + case OPC_RH850_MOVEA_imm16_reg1_reg2: + imm_32 = extract32(ctx->opcode, 16, 16); + tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm_32); + tcg_gen_ext16s_i32(tcg_ctx, tcg_imm, tcg_imm); + + tcg_gen_add_i32(tcg_ctx, r2, tcg_imm, r1); + gen_set_gpr(tcg_ctx, rs2, r2); + break; + + case OPC_RH850_MOVHI_imm16_reg1_reg2: + imm_32 = extract32(ctx->opcode, 16, 16); + tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm_32); + tcg_gen_shli_i32(tcg_ctx, tcg_imm, tcg_imm, 0x10); + + tcg_gen_add_i32(tcg_ctx, r2, tcg_imm, r1); + gen_set_gpr(tcg_ctx, rs2, r2); + break; + + case OPC_RH850_SUB_reg1_reg2: + + tcg_gen_sub_tl(tcg_ctx, tcg_result, r2, r1); + gen_set_gpr(tcg_ctx, rs2, tcg_result); + gen_flags_on_sub(tcg_ctx, r2, r1); + break; + + case OPC_RH850_SUBR_reg1_reg2: + tcg_gen_sub_tl(tcg_ctx, tcg_result, r1, r2); + gen_set_gpr(tcg_ctx, rs2, tcg_result); + gen_flags_on_sub(tcg_ctx, r1, r2); + break; + } + + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, tcg_imm); + tcg_temp_free(tcg_ctx, tcg_r3); + tcg_temp_free(tcg_ctx, tcg_result); +} + +static void gen_cond_arith(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv r1 = tcg_temp_local_new(tcg_ctx); + TCGv r2 = tcg_temp_local_new(tcg_ctx); + + TCGLabel *cont; + + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + + int int_rs3; + int int_cond; + + switch(operation){ + + case OPC_RH850_ADF_cccc_reg1_reg2_reg3:{ + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv addIfCond = tcg_temp_local_new_i32(tcg_ctx); + TCGv carry = tcg_temp_local_new_i32(tcg_ctx); + TCGv overflow = tcg_temp_local_new_i32(tcg_ctx); + + tcg_gen_movi_tl(tcg_ctx, carry, 0); + tcg_gen_movi_tl(tcg_ctx, overflow, 0); + + int_rs3 = extract32(ctx->opcode, 27, 5); + int_cond = extract32(ctx->opcode, 17, 4); + if(int_cond == 0xd){ + //throw exception/warning for inappropriate condition (SA) + break; + } + + tcg_gen_mov_i32(tcg_ctx, r1_local, r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, r2); + gen_get_gpr(tcg_ctx, r3_local,int_rs3); + tcg_gen_movi_i32(tcg_ctx, addIfCond, 0x1); + + TCGv condResult = condition_satisfied(tcg_ctx, int_cond); + cont = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont); + // calc and store CY and OV flags to be used to obtain final values + gen_flags_on_add(tcg_ctx, r2_local, addIfCond); + tcg_gen_mov_tl(tcg_ctx, carry, cpu_CYF); + tcg_gen_mov_tl(tcg_ctx, overflow, cpu_OVF); + // on cond true, add 1 + tcg_gen_add_tl(tcg_ctx, r2_local, r2_local, addIfCond); + + gen_set_label(tcg_ctx, cont); + tcg_gen_add_tl(tcg_ctx, r3_local, r1_local, r2_local); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + + gen_flags_on_add(tcg_ctx, r1_local, r2_local); + tcg_gen_or_tl(tcg_ctx, cpu_CYF, cpu_CYF, carry); + tcg_gen_or_tl(tcg_ctx, cpu_OVF, cpu_OVF, overflow); + + tcg_temp_free(tcg_ctx, condResult); + tcg_temp_free_i32(tcg_ctx, r1_local); + tcg_temp_free_i32(tcg_ctx, r2_local); + tcg_temp_free_i32(tcg_ctx, r3_local); + tcg_temp_free_i32(tcg_ctx, addIfCond); + } + break; + + case OPC_RH850_SBF_cccc_reg1_reg2_reg3:{ + + int_rs3 = extract32(ctx->opcode, 27, 5); + int_cond = extract32(ctx->opcode, 17, 4); + if(int_cond == 0xd){ + //throw exception/warning for inappropriate condition (SA) + break; + } + + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + TCGv tmpReg = tcg_temp_local_new(tcg_ctx); + TCGv carry = tcg_temp_local_new(tcg_ctx); + TCGv overflow = tcg_temp_local_new(tcg_ctx); + cont = gen_new_label(tcg_ctx); + + tcg_gen_movi_tl(tcg_ctx, carry, 0); + tcg_gen_movi_tl(tcg_ctx, overflow, 0); + + tcg_gen_mov_i32(tcg_ctx, r3_local, r2); + + TCGv condResult = condition_satisfied(tcg_ctx, int_cond); + // store to local temp, because condResult is valid only until branch in gen_flags_on_sub + tcg_gen_mov_tl(tcg_ctx, tmpReg, condResult); + + gen_flags_on_sub(tcg_ctx, r3_local, r1); + tcg_gen_mov_tl(tcg_ctx, carry, cpu_CYF); + tcg_gen_mov_tl(tcg_ctx, overflow, cpu_OVF); + tcg_gen_sub_tl(tcg_ctx, r3_local, r3_local, r1); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tmpReg, 0x1, cont); + tcg_gen_movi_i32(tcg_ctx, tmpReg, 0x1); + gen_flags_on_sub(tcg_ctx, r3_local, tmpReg); + tcg_gen_subi_tl(tcg_ctx, r3_local, r3_local, 1); + tcg_gen_or_tl(tcg_ctx, cpu_CYF, cpu_CYF, carry); + // overflow twice means no overflow + tcg_gen_xor_tl(tcg_ctx, cpu_OVF, cpu_OVF, overflow); + + gen_set_label(tcg_ctx, cont); + + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + + tcg_temp_free(tcg_ctx, condResult); + tcg_temp_free_i32(tcg_ctx, r3_local); + tcg_temp_free_i32(tcg_ctx, tmpReg); + tcg_temp_free_i32(tcg_ctx, overflow); + tcg_temp_free_i32(tcg_ctx, carry); + } + break; + } + + tcg_temp_free_i32(tcg_ctx, r1); + tcg_temp_free_i32(tcg_ctx, r2); +} + +static void gen_sat_op(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv r1 = tcg_temp_new(tcg_ctx); + TCGv r2 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + + int imm = rs1; + int int_rs3; + + TCGLabel *end; + TCGLabel *cont; + TCGLabel *cont2; + TCGLabel *setMax; + TCGLabel *dontChange; + + switch(operation){ + + case OPC_RH850_SATADD_reg1_reg2: { + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv min = tcg_temp_local_new(tcg_ctx); + TCGv max = tcg_temp_local_new(tcg_ctx); + TCGv zero = tcg_temp_local_new(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, min, 0x80000000); + tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff); + tcg_gen_mov_i32(tcg_ctx, r1_local, r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, r2); + tcg_gen_movi_i32(tcg_ctx, zero, 0x0); + end = gen_new_label(tcg_ctx); + cont = gen_new_label(tcg_ctx); + cont2 = gen_new_label(tcg_ctx); + + + tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont); + + tcg_gen_sub_i32(tcg_ctx, check, max, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end); + tcg_gen_mov_i32(tcg_ctx, result, max); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + tcg_gen_br(tcg_ctx, end); + + //--------------------------------------------------------------------------------- + gen_set_label(tcg_ctx, cont); + tcg_gen_sub_i32(tcg_ctx, check, min, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2); + tcg_gen_mov_i32(tcg_ctx, result, min); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + + gen_set_label(tcg_ctx, cont2); + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, result); + + gen_satadd_CC(tcg_ctx, r1_local, r2_local, result); // moves also SET flag to psw + + tcg_temp_free(tcg_ctx, result); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, min); + tcg_temp_free(tcg_ctx, max); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, zero); + + } break; + + case OPC_RH850_SATADD_imm5_reg2: { + + TCGv imm_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv min = tcg_temp_local_new(tcg_ctx); + TCGv max = tcg_temp_local_new(tcg_ctx); + TCGv zero = tcg_temp_local_new(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, min, 0x80000000); + tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff); + tcg_gen_mov_i32(tcg_ctx, r2_local, r2); + tcg_gen_movi_i32(tcg_ctx, zero, 0x0); + end = gen_new_label(tcg_ctx); + cont = gen_new_label(tcg_ctx); + cont2 = gen_new_label(tcg_ctx); + + if ((imm & 0x10) == 0x10){ + imm = imm | (0x7 << 5); + } + + tcg_gen_movi_tl(tcg_ctx, imm_local, imm); + tcg_gen_ext8s_tl(tcg_ctx, imm_local, imm_local); + + tcg_gen_add_i32(tcg_ctx, result, imm_local, r2_local); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, imm_local, zero, cont); + + tcg_gen_sub_i32(tcg_ctx, check, max, imm_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end); + tcg_gen_mov_i32(tcg_ctx, result, max); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + tcg_gen_br(tcg_ctx, end); + + //--------------------------------------------------------------------------------- + gen_set_label(tcg_ctx, cont); + tcg_gen_sub_i32(tcg_ctx, check, min, imm_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2); + tcg_gen_mov_i32(tcg_ctx, result, min); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + + gen_set_label(tcg_ctx, cont2); + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, result); + + gen_satadd_CC(tcg_ctx, r2_local, imm_local, result); + + tcg_temp_free(tcg_ctx, result); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, min); + tcg_temp_free(tcg_ctx, max); + tcg_temp_free(tcg_ctx, imm_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, zero); + + } break; + + case OPC_RH850_SATADD_reg1_reg2_reg3: { + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv min = tcg_temp_local_new(tcg_ctx); + TCGv max = tcg_temp_local_new(tcg_ctx); + TCGv zero = tcg_temp_local_new(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, min, 0x80000000); + tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff); + tcg_gen_mov_i32(tcg_ctx, r1_local, r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, r2); + tcg_gen_movi_i32(tcg_ctx, zero, 0x0); + end = gen_new_label(tcg_ctx); + cont = gen_new_label(tcg_ctx); + cont2 = gen_new_label(tcg_ctx); + + int_rs3 = extract32(ctx->opcode, 27, 5); + tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont); //if (r1 > 0) + + tcg_gen_sub_i32(tcg_ctx, check, max, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end); //if (r2 > MAX-r1) + tcg_gen_mov_i32(tcg_ctx, result, max); //return MAX; + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + tcg_gen_br(tcg_ctx, end); + + //--------------------------------------------------------------------------------- + gen_set_label(tcg_ctx, cont); //else + tcg_gen_sub_i32(tcg_ctx, check, min, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2); //if (r2 < MIN-r1) + tcg_gen_mov_i32(tcg_ctx, result, min); //return MIN; + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + + gen_set_label(tcg_ctx, cont2); + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, int_rs3, result); + + gen_satadd_CC(tcg_ctx, r1_local, r2_local, result); + + tcg_temp_free(tcg_ctx, result); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, min); + tcg_temp_free(tcg_ctx, max); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, zero); + + } break; + + case OPC_RH850_SATSUB_reg1_reg2: { + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv min = tcg_temp_local_new(tcg_ctx); + TCGv max = tcg_temp_local_new(tcg_ctx); + TCGv zero = tcg_temp_local_new(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, min, 0x80000000); + tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff); + tcg_gen_mov_i32(tcg_ctx, r1_local, r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, r2); + tcg_gen_movi_i32(tcg_ctx, zero, 0x0); + end = gen_new_label(tcg_ctx); + cont = gen_new_label(tcg_ctx); + cont2 = gen_new_label(tcg_ctx); + setMax = gen_new_label(tcg_ctx); + dontChange = gen_new_label(tcg_ctx); + + /* + * Negating second operand and using satadd code. When negating an operand + * with value 0x80000000, the result overflows positive numbers and is not + * negated. If this happens, the operand is first incremented, and then negated. + * The second operand is as well incremented, if it's value is less than 0x7fffffff. + * Otherwise, the result is set to MAX and SATF is set. + * This was done in all following saturated subtraction functions. + */ + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x80000000, dontChange); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r2_local, 0x7fffffff, setMax); + + tcg_gen_addi_i32(tcg_ctx, r1_local, r1_local, 0x1); + tcg_gen_addi_i32(tcg_ctx, r2_local, r2_local, 0x1); + gen_set_label(tcg_ctx, dontChange); + + tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local); + tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont); + + tcg_gen_sub_i32(tcg_ctx, check, max, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end); + gen_set_label(tcg_ctx, setMax); + tcg_gen_mov_i32(tcg_ctx, result, max); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + tcg_gen_br(tcg_ctx, end); + + //--------------------------------------------------------------------------------- + gen_set_label(tcg_ctx, cont); + tcg_gen_sub_i32(tcg_ctx, check, min, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2); + tcg_gen_mov_i32(tcg_ctx, result, min); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + + gen_set_label(tcg_ctx, cont2); + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, result); + + // second negation is needed for appropriate flag calculation + tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local); + gen_satsub_CC(tcg_ctx, r2_local, r1_local, result); + + tcg_temp_free(tcg_ctx, result); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, min); + tcg_temp_free(tcg_ctx, max); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, zero); + + } break; + + case OPC_RH850_SATSUB_reg1_reg2_reg3: { + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv min = tcg_temp_local_new(tcg_ctx); + TCGv max = tcg_temp_local_new(tcg_ctx); + TCGv zero = tcg_temp_local_new(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, min, 0x80000000); + tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff); + tcg_gen_mov_i32(tcg_ctx, r1_local, r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, r2); + tcg_gen_movi_i32(tcg_ctx, zero, 0x0); + end = gen_new_label(tcg_ctx); + cont = gen_new_label(tcg_ctx); + cont2 = gen_new_label(tcg_ctx); + setMax = gen_new_label(tcg_ctx); + dontChange = gen_new_label(tcg_ctx); + int_rs3 = extract32(ctx->opcode, 27, 5); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x80000000, dontChange); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r2_local, 0x7fffffff, setMax); + + tcg_gen_addi_i32(tcg_ctx, r1_local, r1_local, 0x1); + tcg_gen_addi_i32(tcg_ctx, r2_local, r2_local, 0x1); + gen_set_label(tcg_ctx, dontChange); + + tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local); + tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont); + + tcg_gen_sub_i32(tcg_ctx, check, max, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end); + gen_set_label(tcg_ctx, setMax); + tcg_gen_mov_i32(tcg_ctx, result, max); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + tcg_gen_br(tcg_ctx, end); + + //--------------------------------------------------------------------------------- + gen_set_label(tcg_ctx, cont); + tcg_gen_sub_i32(tcg_ctx, check, min, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2); + tcg_gen_mov_i32(tcg_ctx, result, min); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + + gen_set_label(tcg_ctx, cont2); + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, int_rs3, result); + + tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local); + gen_satsub_CC(tcg_ctx, r2_local, r1_local, result); + + tcg_temp_free(tcg_ctx, result); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, min); + tcg_temp_free(tcg_ctx, max); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, zero); + + } break; + + case OPC_RH850_SATSUBI_imm16_reg1_reg2: { + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv imm_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv min = tcg_temp_local_new(tcg_ctx); + TCGv max = tcg_temp_local_new(tcg_ctx); + TCGv zero = tcg_temp_local_new(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, min, 0x80000000); + tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff); + tcg_gen_mov_i32(tcg_ctx, r1_local, r1); + imm = extract32(ctx->opcode, 16, 16); + tcg_gen_movi_i32(tcg_ctx, imm_local, imm); + tcg_gen_ext16s_i32(tcg_ctx, imm_local, imm_local); + tcg_gen_movi_i32(tcg_ctx, zero, 0x0); + end = gen_new_label(tcg_ctx); + cont = gen_new_label(tcg_ctx); + cont2 = gen_new_label(tcg_ctx); + setMax = gen_new_label(tcg_ctx); + dontChange = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x80000000, dontChange); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, imm_local, 0x7fffffff, setMax); + + tcg_gen_addi_i32(tcg_ctx, r1_local, r1_local, 0x1); + tcg_gen_addi_i32(tcg_ctx, imm_local, imm_local, 0x1); + gen_set_label(tcg_ctx, dontChange); + + + tcg_gen_neg_i32(tcg_ctx, imm_local, imm_local); + + tcg_gen_add_i32(tcg_ctx, result, r1_local, imm_local); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont); + + tcg_gen_sub_i32(tcg_ctx, check, max, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, imm_local, check, end); + gen_set_label(tcg_ctx, setMax); + tcg_gen_mov_i32(tcg_ctx, result, max); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + tcg_gen_br(tcg_ctx, end); + + //--------------------------------------------------------------------------------- + gen_set_label(tcg_ctx, cont); + tcg_gen_sub_i32(tcg_ctx, check, min, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, imm_local, check, cont2); + tcg_gen_mov_i32(tcg_ctx, result, min); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + + gen_set_label(tcg_ctx, cont2); + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, result); + + tcg_gen_neg_i32(tcg_ctx, imm_local, imm_local); + gen_satsub_CC(tcg_ctx, r1_local, imm_local, result); + + tcg_temp_free(tcg_ctx, result); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, min); + tcg_temp_free(tcg_ctx, max); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, imm_local); + tcg_temp_free(tcg_ctx, zero); + + } break; + + case OPC_RH850_SATSUBR_reg1_reg2: { + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv min = tcg_temp_local_new(tcg_ctx); + TCGv max = tcg_temp_local_new(tcg_ctx); + TCGv zero = tcg_temp_local_new(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, min, 0x80000000); + tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff); + tcg_gen_mov_i32(tcg_ctx, r1_local, r2); + tcg_gen_mov_i32(tcg_ctx, r2_local, r1); + tcg_gen_movi_i32(tcg_ctx, zero, 0x0); + end = gen_new_label(tcg_ctx); + cont = gen_new_label(tcg_ctx); + cont2 = gen_new_label(tcg_ctx); + setMax = gen_new_label(tcg_ctx); + dontChange = gen_new_label(tcg_ctx); + + /* + * Negating second operand and using satadd code. When negating an operand + * with value 0x80000000, the result overflows positive numbers and is not + * negated. If this happens, the operand is first incremented, and then negated. + * The second operand is as well incremented, if it's value is less than 0x7fffffff. + * Otherwise, the result is set to MAX and SATF is set. + * This was done in all following saturated subtraction functions. + */ + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x80000000, dontChange); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r2_local, 0x7fffffff, setMax); + + tcg_gen_addi_i32(tcg_ctx, r1_local, r1_local, 0x1); + tcg_gen_addi_i32(tcg_ctx, r2_local, r2_local, 0x1); + gen_set_label(tcg_ctx, dontChange); + + tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local); + tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont); + + tcg_gen_sub_i32(tcg_ctx, check, max, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end); + gen_set_label(tcg_ctx, setMax); + tcg_gen_mov_i32(tcg_ctx, result, max); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + tcg_gen_br(tcg_ctx, end); + + //--------------------------------------------------------------------------------- + gen_set_label(tcg_ctx, cont); + tcg_gen_sub_i32(tcg_ctx, check, min, r1_local); + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2); + tcg_gen_mov_i32(tcg_ctx, result, min); + tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1); + + gen_set_label(tcg_ctx, cont2); + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, result); + + tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local); + gen_satsub_CC(tcg_ctx, r2_local, r1_local, result); + + tcg_temp_free(tcg_ctx, result); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, min); + tcg_temp_free(tcg_ctx, max); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, zero); + + } break; + } + + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); +} + +static void gen_logical(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv r1 = tcg_temp_new(tcg_ctx); + TCGv r2 = tcg_temp_new(tcg_ctx); + TCGv result = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + + int imm_32; + TCGv tcg_imm = tcg_temp_new(tcg_ctx); + + switch(operation){ + + case OPC_RH850_AND_reg1_reg2: + tcg_gen_and_tl(tcg_ctx, r2, r2, r1); + gen_set_gpr(tcg_ctx, rs2, r2); + gen_logic_CC(tcg_ctx, r2); + break; + + case OPC_RH850_ANDI_imm16_reg1_reg2: + imm_32 = extract32(ctx->opcode, 16, 16); + tcg_gen_movi_tl(tcg_ctx, tcg_imm, imm_32); + tcg_gen_ext16u_i32(tcg_ctx, tcg_imm, tcg_imm); + tcg_gen_and_i32(tcg_ctx, r2, r1, tcg_imm); + gen_set_gpr(tcg_ctx, rs2, r2); + gen_logic_CC(tcg_ctx, r2); + break; + + case OPC_RH850_NOT_reg1_reg2: + tcg_gen_not_i32(tcg_ctx, r2, r1); + gen_set_gpr(tcg_ctx, rs2, r2); + gen_logic_CC(tcg_ctx, r2); + break; + + case OPC_RH850_OR_reg1_reg2: + tcg_gen_or_tl(tcg_ctx, r2, r2, r1); + gen_set_gpr(tcg_ctx, rs2, r2); + gen_logic_CC(tcg_ctx, r2); + break; + + case OPC_RH850_ORI_imm16_reg1_reg2: + imm_32 = extract32(ctx->opcode, 16, 16); + tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm_32); + tcg_gen_ext16u_i32(tcg_ctx, tcg_imm,tcg_imm); + + tcg_gen_or_i32(tcg_ctx, r2, r1, tcg_imm); + gen_set_gpr(tcg_ctx, rs2, r2); + gen_logic_CC(tcg_ctx, r2); + break; + + case OPC_RH850_TST_reg1_reg2: + tcg_gen_and_i32(tcg_ctx, result, r1, r2); + gen_logic_CC(tcg_ctx, result); + break; + + case OPC_RH850_XOR_reg1_reg2: + tcg_gen_xor_i32(tcg_ctx, result, r2, r1); + gen_set_gpr(tcg_ctx, rs2, result); + gen_logic_CC(tcg_ctx, result); + break; + + case OPC_RH850_XORI_imm16_reg1_reg2: + imm_32 = extract32(ctx->opcode, 16, 16); + tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm_32); + tcg_gen_ext16u_i32(tcg_ctx, tcg_imm,tcg_imm); + + tcg_gen_xor_i32(tcg_ctx, result, r1, tcg_imm); + gen_set_gpr(tcg_ctx, rs2, result); + gen_logic_CC(tcg_ctx, result); + break; + } + + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, tcg_imm); + tcg_temp_free(tcg_ctx, result); +} + +static void gen_data_manipulation(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv tcg_r1 = tcg_temp_new(tcg_ctx); + TCGv tcg_r2 = tcg_temp_new(tcg_ctx); + TCGv tcg_r3 = tcg_temp_new(tcg_ctx); + TCGv tcg_imm = tcg_temp_new(tcg_ctx); + TCGv tcg_temp = tcg_temp_new(tcg_ctx); + TCGv tcg_temp2 = tcg_temp_new(tcg_ctx); + TCGv insert = tcg_temp_new(tcg_ctx); + + TCGLabel *cont; + TCGLabel *end; + TCGLabel *set; + + int int_imm = rs1; + int int_rs3; + int int_cond; + int pos; + int lsb; + int msb; + int width; + int mask; + int group; + + gen_get_gpr(tcg_ctx, tcg_r1, rs1); + gen_get_gpr(tcg_ctx, tcg_r2, rs2); + + switch(operation) { + + case OPC_RH850_BINS: + + group = extract32(ctx->opcode, 21, 2); + + mask = 0; + pos = extract32(ctx->opcode, 17, 3) | (extract32(ctx->opcode, 27, 1) << 3); + lsb = pos; + + msb = extract32(ctx->opcode, 28, 4); + width = extract32(ctx->opcode, 28, 4) - pos + 1; + + switch(group){ + case 0: //bins0 + pos += 16; + break; + case 1: //bins1 + width += 16; + msb+=16; + break; + case 2: //bins2 + break; + } + + if(msbopcode, 27, 5); + tcg_gen_mov_tl(tcg_ctx, tcg_temp2, tcg_r2); + tcg_gen_movi_i32(tcg_ctx, tcg_r3, 0x0); + + tcg_gen_andi_tl(tcg_ctx, tcg_temp, tcg_temp2, 0xff000000); + tcg_gen_shri_tl(tcg_ctx, tcg_temp, tcg_temp, 0x8); + tcg_gen_or_tl(tcg_ctx, tcg_r3, tcg_r3, tcg_temp); + + tcg_gen_andi_tl(tcg_ctx, tcg_temp, tcg_temp2, 0x00ff0000); + tcg_gen_shli_tl(tcg_ctx, tcg_temp, tcg_temp, 0x8); + tcg_gen_or_tl(tcg_ctx, tcg_r3, tcg_r3, tcg_temp); + + tcg_gen_andi_tl(tcg_ctx, tcg_temp, tcg_temp2, 0x0000ff00); + tcg_gen_shri_tl(tcg_ctx, tcg_temp, tcg_temp, 0x8); + tcg_gen_or_tl(tcg_ctx, tcg_r3, tcg_r3, tcg_temp); + + tcg_gen_andi_tl(tcg_ctx, tcg_temp, tcg_temp2, 0x000000ff); + tcg_gen_shli_tl(tcg_ctx, tcg_temp, tcg_temp, 0x8); + tcg_gen_or_tl(tcg_ctx, tcg_r3, tcg_r3, tcg_temp); + + gen_set_gpr(tcg_ctx, int_rs3, tcg_r3); + + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + set = gen_new_label(tcg_ctx); + tcg_gen_andi_i32(tcg_ctx, temp_local, r3_local, 0x0000ffff); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, temp_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f); + + tcg_gen_movi_i32(tcg_ctx, count_local, 0x0); + + tcg_gen_andi_i32(tcg_ctx, temp_local, r3_local, 0x000000ff); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp_local, 0x0, set); + tcg_gen_andi_i32(tcg_ctx, temp_local, r3_local, 0x0000ff00); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp_local, 0x0, set); + + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, set);//// + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x1); + + gen_set_label(tcg_ctx, end);//// + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, count_local); + tcg_temp_free(tcg_ctx, temp_local); + } break; + + case OPC_RH850_BSW_reg2_reg3: { + + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv count_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + set = gen_new_label(tcg_ctx); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3,int_rs3); + tcg_gen_bswap32_i32(tcg_ctx, tcg_r3, tcg_r2); + gen_set_gpr(tcg_ctx, int_rs3, tcg_r3); + + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f); + + tcg_gen_movi_i32(tcg_ctx, count_local, 0x0); + + gen_set_label(tcg_ctx, cont);//// + + tcg_gen_andi_i32(tcg_ctx, temp_local, r3_local, 0x000000ff); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp_local, 0x0, set);//// + tcg_gen_addi_i32(tcg_ctx, count_local, count_local, 0x1); + tcg_gen_shri_i32(tcg_ctx, r3_local, r3_local, 0x8); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, count_local, 0x4, cont);//// + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, set); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x1); + + gen_set_label(tcg_ctx, end); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, count_local); + tcg_temp_free(tcg_ctx, temp_local); + } + break; + + case OPC_RH850_CMOV_cccc_reg1_reg2_reg3: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + + int_rs3 = extract32(ctx->opcode, 27, 5); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + int_cond = extract32(ctx->opcode, 17, 4); + TCGv condResult = condition_satisfied(tcg_ctx, int_cond); + cont = gen_new_label(tcg_ctx); + + tcg_gen_mov_tl(tcg_ctx, r3_local, r2_local); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont); + tcg_gen_mov_tl(tcg_ctx, r3_local, r1_local); + gen_set_label(tcg_ctx, cont); + + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + + tcg_temp_free(tcg_ctx, condResult); + tcg_temp_free_i32(tcg_ctx, r1_local); + tcg_temp_free_i32(tcg_ctx, r2_local); + tcg_temp_free_i32(tcg_ctx, r3_local); + } + break; + + case OPC_RH850_CMOV_cccc_imm5_reg2_reg3: { + + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + + if (int_imm & 0x10) { // if is sign bit in imm5 set + int_imm = int_imm | 0xffffffe0; + } + + int_cond = extract32(ctx->opcode, 17, 4); + TCGv condResult = condition_satisfied(tcg_ctx, int_cond); + cont = gen_new_label(tcg_ctx); + + tcg_gen_mov_tl(tcg_ctx, r3_local, tcg_r2); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont); + tcg_gen_movi_tl(tcg_ctx, r3_local, int_imm); + + gen_set_label(tcg_ctx, cont); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + + tcg_temp_free(tcg_ctx, condResult); + tcg_temp_free_i32(tcg_ctx, r3_local); + } + break; + + case OPC_RH850_HSH_reg2_reg3: + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_set_gpr(tcg_ctx, int_rs3, tcg_r2); + + tcg_gen_shri_i32(tcg_ctx, cpu_SF, tcg_r2, 0x1f); + tcg_gen_andi_i32(tcg_ctx, tcg_temp, tcg_r2, 0x0000ffff); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, tcg_temp, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, tcg_temp, 0x0); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + break; + + case OPC_RH850_HSW_reg2_reg3: { + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp3_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv count_local = tcg_temp_local_new_i32(tcg_ctx); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + set = gen_new_label(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, r3_local,int_rs3); + + tcg_gen_andi_tl(tcg_ctx, temp_local, r2_local, 0xffff); + tcg_gen_shli_tl(tcg_ctx, temp_local, temp_local, 0x10); + tcg_gen_andi_tl(tcg_ctx, temp2_local, r2_local, 0xffff0000); + tcg_gen_shri_tl(tcg_ctx, temp2_local, temp2_local, 0x10); + + tcg_gen_or_tl(tcg_ctx, r3_local, temp2_local, temp_local); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f); + + tcg_gen_movi_i32(tcg_ctx, count_local, 0x0); + + gen_set_label(tcg_ctx, cont); + + tcg_gen_andi_i32(tcg_ctx, temp3_local, r3_local, 0x0000ffff); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp3_local, 0x0, set); + tcg_gen_andi_i32(tcg_ctx, temp3_local, r3_local, 0xffff0000); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp3_local, 0x0, set); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, set);//// + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x1); + + gen_set_label(tcg_ctx, end);//// + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, count_local); + tcg_temp_free(tcg_ctx, temp_local); + tcg_temp_free(tcg_ctx, temp2_local); + tcg_temp_free(tcg_ctx, temp3_local); + } + break; + + case OPC_RH850_ROTL_imm5_reg2_reg3: + { + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv imm_local = tcg_temp_local_new_i32(tcg_ctx); + cont = gen_new_label(tcg_ctx); + + tcg_gen_movi_tl(tcg_ctx, tcg_imm, int_imm); + tcg_gen_ext8u_tl(tcg_ctx, tcg_imm, tcg_imm); + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3,int_rs3); + tcg_gen_rotl_tl(tcg_ctx, tcg_r3, tcg_r2, tcg_imm); + gen_set_gpr(tcg_ctx, int_rs3, tcg_r3); + + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, tcg_r3, 0x1); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, tcg_r3, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, tcg_r3, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + tcg_gen_mov_i32(tcg_ctx, imm_local, tcg_imm); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tcg_imm, 0x0, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + gen_set_label(tcg_ctx, cont); + + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, imm_local); + } break; + + case OPC_RH850_ROTL_reg1_reg2_reg3: + { + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + cont = gen_new_label(tcg_ctx); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3,int_rs3); + tcg_gen_rotl_tl(tcg_ctx, tcg_r3, tcg_r2, tcg_r1); + gen_set_gpr(tcg_ctx, int_rs3, tcg_r3); + + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, tcg_r3, 0x1); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, tcg_r3, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, tcg_r3, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tcg_r1, 0x0, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + gen_set_label(tcg_ctx, cont); + + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, r1_local); + } break; + + case OPC_RH850_SAR_reg1_reg2: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f); //shift by value of lower 5 bits of reg1 + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x0, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + + tcg_gen_subi_i32(tcg_ctx, r1_local, r1_local, 0x1); //shift by r1-1 + + tcg_gen_sar_i32(tcg_ctx, r2_local, r2_local, r1_local); + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r2_local, 0x1); //LSB here is the last bit to be shifted + tcg_gen_sari_i32(tcg_ctx, r2_local, r2_local, 0x1); + + gen_set_label(tcg_ctx, end); + + gen_set_gpr(tcg_ctx, rs2, r2_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r1_local); + } + break; + + case OPC_RH850_SAR_imm5_reg2: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_movi_tl(tcg_ctx, r1_local, int_imm); + tcg_gen_ext8u_i32(tcg_ctx, r1_local, r1_local); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x0, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + + tcg_gen_subi_i32(tcg_ctx, r1_local, r1_local, 0x1); //shift by one less + tcg_gen_sar_i32(tcg_ctx, r2_local, r2_local, r1_local); + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r2_local, 0x1); //LSB here is the last bit to be shifted + tcg_gen_sari_i32(tcg_ctx, r2_local, r2_local, 0x1); + + gen_set_label(tcg_ctx, end); + + gen_set_gpr(tcg_ctx, rs2, r2_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r1_local); + } + break; + + case OPC_RH850_SAR_reg1_reg2_reg3: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f); //shift by only lower 5 bits of reg1 + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, r3_local, int_rs3); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x0, cont); //is non-shift? + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + tcg_gen_mov_i32(tcg_ctx, r3_local, r2_local); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + + + tcg_gen_subi_i32(tcg_ctx, r1_local, r1_local, 0x1); //shift by one less + tcg_gen_sar_i32(tcg_ctx, r3_local, r2_local, r1_local); + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r3_local, 0x1); //LSB here is the last bit to be shifted + tcg_gen_sari_i32(tcg_ctx, r3_local, r3_local, 0x1); + + gen_set_label(tcg_ctx, end); + + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r1_local); + } + break; + + case OPC_RH850_SASF_cccc_reg2: { + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv operand_local = tcg_temp_local_new_i32(tcg_ctx); + + int_cond = extract32(ctx->opcode,0,4); + TCGv condResult = condition_satisfied(tcg_ctx, int_cond); + cont = gen_new_label(tcg_ctx); + + tcg_gen_shli_tl(tcg_ctx, r2_local, tcg_r2, 0x1); + + tcg_gen_movi_i32(tcg_ctx, operand_local, 0x00000000); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont); + tcg_gen_movi_i32(tcg_ctx, operand_local, 0x00000001); + + gen_set_label(tcg_ctx, cont); + tcg_gen_or_tl(tcg_ctx, r2_local, r2_local, operand_local); + + gen_set_gpr(tcg_ctx, rs2, r2_local); + + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, operand_local); + tcg_temp_free(tcg_ctx, condResult); + } + break; + + case OPC_RH850_SETF_cccc_reg2:{ + + TCGv operand_local = tcg_temp_local_new_i32(tcg_ctx); + int_cond = extract32(ctx->opcode,0,4); + TCGv condResult = condition_satisfied(tcg_ctx, int_cond); + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont); + tcg_gen_movi_i32(tcg_ctx, operand_local, 0x00000001); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, operand_local, 0x00000000); + + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, operand_local); + + tcg_temp_free(tcg_ctx, condResult); + tcg_temp_free(tcg_ctx, operand_local); + } + break; + + case OPC_RH850_SHL_reg1_reg2: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f); //get only lower 5 bits + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); + + tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); // shifting for [r1]-1 + tcg_gen_shl_tl(tcg_ctx, r2_local, r2_local, temp_local); + + tcg_gen_shri_i32(tcg_ctx, cpu_CYF, r2_local, 0x1f); // checking the last bit to shift + tcg_gen_shli_i32(tcg_ctx, r2_local, r2_local, 0x1); // shifting for that remaining 1 + + gen_set_gpr(tcg_ctx, rs2, r2_local); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + + gen_set_label(tcg_ctx, end); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, temp_local); + } + break; + + case OPC_RH850_SHL_imm5_reg2: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + tcg_gen_movi_tl(tcg_ctx, r1_local, int_imm); + tcg_gen_ext8u_tl(tcg_ctx, r1_local, r1_local); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); + + tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); + tcg_gen_shl_tl(tcg_ctx, r2_local, r2_local, temp_local); + tcg_gen_shri_i32(tcg_ctx, cpu_CYF, r2_local, 0x1f); + tcg_gen_shli_tl(tcg_ctx, r2_local, r2_local, 0x1); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, r2_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, temp_local); + } + break; + + case OPC_RH850_SHL_reg1_reg2_reg3: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, r3_local,int_rs3); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); // when reg1 = 0, do not shift + + tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); + tcg_gen_shl_tl(tcg_ctx, r3_local, r2_local, temp_local); + + tcg_gen_shri_i32(tcg_ctx, cpu_CYF, r3_local, 0x1f); + tcg_gen_shli_tl(tcg_ctx, r3_local, r3_local, 0x1); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_mov_i32(tcg_ctx, r3_local, r2_local); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, temp_local); + } + break; + + case OPC_RH850_SHR_reg1_reg2: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx); + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f); // + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); //checking for non-shift + + tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); // shifting for [r1]-1 + tcg_gen_shr_tl(tcg_ctx, r2_local, r2_local, temp_local); + + + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r2_local, 0x1); // checking the last bit to shift (LSB) + tcg_gen_shri_i32(tcg_ctx, r2_local, r2_local, 0x1); + + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, r2_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, temp_local); + } + break; + + case OPC_RH850_SHR_imm5_reg2: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + tcg_gen_movi_tl(tcg_ctx, r1_local, int_imm); + tcg_gen_ext8u_tl(tcg_ctx, r1_local, r1_local); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); //checking for non-shift + + tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); // shifting for [r1]-1 + tcg_gen_shr_tl(tcg_ctx, r2_local, r2_local, temp_local); + + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r2_local, 0x1); // checking the last bit to shift (LSB) + tcg_gen_shri_i32(tcg_ctx, r2_local, r2_local, 0x1); + + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, rs2, r2_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, temp_local); + } + break; + + case OPC_RH850_SHR_reg1_reg2_reg3: { + + TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx); + TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx); + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, r3_local, int_rs3); + + + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); //checking for non-shift + + tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); // shifting for [r1]-1 + tcg_gen_shr_tl(tcg_ctx, r3_local, r2_local, temp_local); + + tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r3_local, 0x1); // checking the last bit to shift (LSB) + tcg_gen_shri_i32(tcg_ctx, r3_local, r3_local, 0x1); + + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, cont); + tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0); + tcg_gen_mov_i32(tcg_ctx, r3_local, r2_local); + + gen_set_label(tcg_ctx, end); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0); + tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, temp_local); + } + break; + + case OPC_RH850_SXB_reg1: + tcg_gen_andi_tl(tcg_ctx, tcg_r1, tcg_r1,0xFF); + tcg_gen_ext8s_tl(tcg_ctx, tcg_r1, tcg_r1); + gen_set_gpr(tcg_ctx, rs1, tcg_r1); + break; + + case OPC_RH850_SXH_reg1: + tcg_gen_andi_tl(tcg_ctx, tcg_r1, tcg_r1,0xFFFF); + tcg_gen_ext16s_tl(tcg_ctx, tcg_r1, tcg_r1); + gen_set_gpr(tcg_ctx, rs1, tcg_r1); + break; + + case OPC_RH850_ZXH_reg1: + tcg_gen_andi_tl(tcg_ctx, tcg_r1, tcg_r1,0xFFFF); + tcg_gen_ext16u_tl(tcg_ctx, tcg_r1, tcg_r1); + gen_set_gpr(tcg_ctx, rs1, tcg_r1); + break; + + case OPC_RH850_ZXB_reg1: + tcg_gen_andi_tl(tcg_ctx, tcg_r1, tcg_r1,0xFF); + tcg_gen_ext8u_tl(tcg_ctx, tcg_r1, tcg_r1); + gen_set_gpr(tcg_ctx, rs1, tcg_r1); + break; + } + + tcg_temp_free(tcg_ctx, tcg_r1); + tcg_temp_free(tcg_ctx, tcg_r2); + tcg_temp_free(tcg_ctx, tcg_r3); + tcg_temp_free(tcg_ctx, tcg_imm); + tcg_temp_free(tcg_ctx, tcg_temp); + tcg_temp_free(tcg_ctx, tcg_temp2); + tcg_temp_free(tcg_ctx, insert); +} + +static void gen_bit_search(DisasContext *ctx, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv tcg_r2 = tcg_temp_new(tcg_ctx); + TCGv tcg_r3 = tcg_temp_new(tcg_ctx); + int int_rs3; + int_rs3 = extract32(ctx->opcode, 27, 5); + + gen_get_gpr(tcg_ctx, tcg_r2, rs2); + gen_get_gpr(tcg_ctx, tcg_r3, int_rs3); + + TCGLabel *end; + TCGLabel *found; + TCGLabel *loop; + + switch(operation){ + case OPC_RH850_SCH0L_reg2_reg3: { + + TCGv foundFlag = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv count = tcg_temp_local_new(tcg_ctx); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + tcg_gen_movi_i32(tcg_ctx, count, 0x0); + + end = gen_new_label(tcg_ctx); + found = gen_new_label(tcg_ctx); + loop = gen_new_label(tcg_ctx); + + gen_set_label(tcg_ctx, loop);//--------------------------------------------------- + + tcg_gen_shl_i32(tcg_ctx, check, r2_local, count); + tcg_gen_ori_i32(tcg_ctx, check, check, 0x7fffffff); // check MSB bit + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, check, 0x7fffffff, found); + + tcg_gen_addi_i32(tcg_ctx, count, count, 0x1); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_NE, count, 0x20, loop);//-------------------- + + tcg_gen_movi_i32(tcg_ctx, result, 0x0); + tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, found); + tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x1); + tcg_gen_addi_i32(tcg_ctx, result, count, 0x1); + + gen_set_label(tcg_ctx, end); + + gen_set_gpr(tcg_ctx, int_rs3, result); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, foundFlag, 0x1); //setting Z if not found + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + tcg_gen_movi_i32(tcg_ctx, cpu_SF, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, r2_local, 0xfffffffe); //setting CY if found at the end + + tcg_temp_free(tcg_ctx, foundFlag); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, count); + tcg_temp_free(tcg_ctx, result); + } break; + + case OPC_RH850_SCH0R_reg2_reg3: { + + TCGv foundFlag = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv count = tcg_temp_local_new(tcg_ctx); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + tcg_gen_movi_i32(tcg_ctx, count, 0x0); + + end = gen_new_label(tcg_ctx); + found = gen_new_label(tcg_ctx); + loop = gen_new_label(tcg_ctx); + + gen_set_label(tcg_ctx, loop);//--------------------------------------------------- + + tcg_gen_shr_i32(tcg_ctx, check, r2_local, count); + tcg_gen_ori_i32(tcg_ctx, check, check, 0xfffffffe); // check MSB bit + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, check, 0xfffffffe, found); + + tcg_gen_addi_i32(tcg_ctx, count, count, 0x1); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_NE, count, 0x20, loop);//-------------------- + + tcg_gen_movi_i32(tcg_ctx, result, 0x0); + tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, found); + tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x1); + tcg_gen_addi_i32(tcg_ctx, result, count, 0x1); + + gen_set_label(tcg_ctx, end); + + gen_set_gpr(tcg_ctx, int_rs3, result); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, foundFlag, 0x1); //setting Z if not found + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + tcg_gen_movi_i32(tcg_ctx, cpu_SF, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, r2_local, 0x7fffffff); + + tcg_temp_free(tcg_ctx, foundFlag); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, count); + tcg_temp_free(tcg_ctx, result); + } break; + + case OPC_RH850_SCH1L_reg2_reg3: { + + TCGv foundFlag = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv count = tcg_temp_local_new(tcg_ctx); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + tcg_gen_movi_i32(tcg_ctx, count, 0x0); + + end = gen_new_label(tcg_ctx); + found = gen_new_label(tcg_ctx); + loop = gen_new_label(tcg_ctx); + + gen_set_label(tcg_ctx, loop);//--------------------------------------------------- + + tcg_gen_shl_i32(tcg_ctx, check, r2_local, count); + tcg_gen_andi_i32(tcg_ctx, check, check, 0x80000000); // check MSB bit + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, check, 0x80000000, found); + + tcg_gen_addi_i32(tcg_ctx, count, count, 0x1); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_NE, count, 0x20, loop);//-------------------- + + tcg_gen_movi_i32(tcg_ctx, result, 0x0); + tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, found); + tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x1); + tcg_gen_addi_i32(tcg_ctx, result, count, 0x1); + + gen_set_label(tcg_ctx, end); + + gen_set_gpr(tcg_ctx, int_rs3, result); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, foundFlag, 0x1); //setting Z if not found + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + tcg_gen_movi_i32(tcg_ctx, cpu_SF, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, r2_local, 0x1); + + tcg_temp_free(tcg_ctx, foundFlag); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, count); + tcg_temp_free(tcg_ctx, result); + } break; + + case OPC_RH850_SCH1R_reg2_reg3: { + + TCGv foundFlag = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + TCGv result = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + TCGv count = tcg_temp_local_new(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + tcg_gen_movi_i32(tcg_ctx, count, 0x0); + + end = gen_new_label(tcg_ctx); + found = gen_new_label(tcg_ctx); + loop = gen_new_label(tcg_ctx); + + gen_set_label(tcg_ctx, loop);//--------------------------------------------------- + + tcg_gen_shr_i32(tcg_ctx, check, r2_local, count); + tcg_gen_andi_i32(tcg_ctx, check, check, 0x1); // check MSB bit + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, check, 0x1, found); + + tcg_gen_addi_i32(tcg_ctx, count, count, 0x1); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_NE, count, 0x20, loop);//-------------------- + + tcg_gen_movi_i32(tcg_ctx, result, 0x0); + tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x0); + tcg_gen_br(tcg_ctx, end); + + gen_set_label(tcg_ctx, found); + tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x1); + tcg_gen_addi_i32(tcg_ctx, result, count, 0x1); + + gen_set_label(tcg_ctx, end); + + gen_set_gpr(tcg_ctx, int_rs3, result); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, foundFlag, 0x1); //setting Z if not found + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0); + tcg_gen_movi_i32(tcg_ctx, cpu_SF, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, r2_local, 0x80000000); + + tcg_temp_free(tcg_ctx, foundFlag); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, check); + tcg_temp_free(tcg_ctx, count); + tcg_temp_free(tcg_ctx, result); + } break; + } + + tcg_temp_free(tcg_ctx, tcg_r2); + tcg_temp_free(tcg_ctx, tcg_r3); +} + +static void gen_divide(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv tcg_r1 = tcg_temp_new(tcg_ctx); + TCGv tcg_r2 = tcg_temp_new(tcg_ctx); + + gen_get_gpr(tcg_ctx, tcg_r1, rs1); + gen_get_gpr(tcg_ctx, tcg_r2, rs2); + + int int_rs3; + + TCGv tcg_r3 = tcg_temp_new(tcg_ctx); + + switch(operation){ + + case OPC_RH850_DIV_reg1_reg2_reg3:{ + + TCGLabel *cont; + TCGLabel *end; + TCGLabel *fin; + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3, int_rs3); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + TCGv overflowed = tcg_temp_local_new(tcg_ctx); + TCGv overflowed2 = tcg_temp_local_new(tcg_ctx); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + fin = gen_new_label(tcg_ctx); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont); //if r1=0 jump to end + + tcg_gen_movi_i32(tcg_ctx, r2_local, 0x80000000); + tcg_gen_br(tcg_ctx, fin); + + gen_set_label(tcg_ctx, cont); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed, r2_local, 0x80000000); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed2, r1_local, 0xffffffff); + tcg_gen_and_i32(tcg_ctx, overflowed, overflowed, overflowed2); //if both + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, overflowed, 0x1); //are 1 + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, end); + tcg_gen_movi_i32(tcg_ctx, r2_local, 0x80000000); //DO THIS + tcg_gen_movi_i32(tcg_ctx, r3_local, 0x0000); + gen_set_gpr(tcg_ctx, rs2, r2_local); //write zeros if undefined + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0); + tcg_gen_br(tcg_ctx, fin); + + gen_set_label(tcg_ctx, end); + + tcg_gen_rem_i32(tcg_ctx, r3_local, r2_local, r1_local); + tcg_gen_div_i32(tcg_ctx, r2_local, r2_local, r1_local); + + if(rs2==int_rs3){ + gen_set_gpr(tcg_ctx, rs2, r3_local); + } else { + gen_set_gpr(tcg_ctx, rs2, r2_local); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + } + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + + gen_set_label(tcg_ctx, fin); + + tcg_temp_free(tcg_ctx, overflowed); + tcg_temp_free(tcg_ctx, overflowed2); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + } break; + + case OPC_RH850_DIVH_reg1_reg2:{ + + TCGLabel *cont; + TCGLabel *end; + TCGLabel *fin; + + tcg_gen_andi_i32(tcg_ctx, tcg_r1, tcg_r1, 0x0000FFFF); + tcg_gen_ext16s_i32(tcg_ctx, tcg_r1, tcg_r1); + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv overflowed = tcg_temp_local_new(tcg_ctx); + TCGv overflowed2 = tcg_temp_local_new(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + fin = gen_new_label(tcg_ctx); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont); //if r1=0 jump to cont + tcg_gen_br(tcg_ctx, fin); + + gen_set_label(tcg_ctx, cont); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed, r2_local, 0x80000000); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed2, r1_local, 0xffffffff); + tcg_gen_and_i32(tcg_ctx, overflowed, overflowed, overflowed2); //if both + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, overflowed, 0x1); //are 1 + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, end); + tcg_gen_movi_i32(tcg_ctx, r2_local, 0x80000000); //DO THIS + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x1); + gen_set_gpr(tcg_ctx, rs2, r2_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_br(tcg_ctx, fin); + + gen_set_label(tcg_ctx, end); + + tcg_gen_div_i32(tcg_ctx, r2_local, r2_local, r1_local); + gen_set_gpr(tcg_ctx, rs2, r2_local); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + + gen_set_label(tcg_ctx, fin); + + tcg_temp_free(tcg_ctx, overflowed); + tcg_temp_free(tcg_ctx, overflowed2); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + } break; + + case OPC_RH850_DIVH_reg1_reg2_reg3: { + // 0x80000000/0xffffffff=0x80000000; cpu_OVF=1, cpu_Z=1? + // reg2/0x0000=undefined; cpu_OVF=1 + // if reg2==reg3; reg2=remainder + + TCGLabel *cont; + TCGLabel *end; + TCGLabel *fin; + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + + tcg_gen_andi_i32(tcg_ctx, tcg_r1, tcg_r1, 0x0000FFFF); + tcg_gen_ext16s_i32(tcg_ctx, tcg_r1, tcg_r1); + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3, int_rs3); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + TCGv overflowed = tcg_temp_local_new(tcg_ctx); + TCGv overflowed2 = tcg_temp_local_new(tcg_ctx); + + cont = gen_new_label(tcg_ctx); + end = gen_new_label(tcg_ctx); + fin = gen_new_label(tcg_ctx); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont); + tcg_gen_br(tcg_ctx, fin); + + gen_set_label(tcg_ctx, cont); ///// + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed, r2_local, 0x80000000); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed2, r1_local, 0xffffffff); + tcg_gen_and_i32(tcg_ctx, overflowed, overflowed, overflowed2); // if result is 1, cpu_OVF = 1 + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, overflowed, 0x1); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, end); + tcg_gen_movi_i32(tcg_ctx, r2_local, 0x80000000); + tcg_gen_movi_i32(tcg_ctx, r3_local, 0x0000); + tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x1); + gen_set_gpr(tcg_ctx, rs2, r2_local); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_br(tcg_ctx, fin); + + gen_set_label(tcg_ctx, end); ///// + + tcg_gen_rem_i32(tcg_ctx, r3_local, r2_local, r1_local); + tcg_gen_div_i32(tcg_ctx, r2_local, r2_local, r1_local); + + if(rs2==int_rs3){ + gen_set_gpr(tcg_ctx, rs2, r3_local); + } else { + gen_set_gpr(tcg_ctx, rs2, r2_local); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + } + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + + gen_set_label(tcg_ctx, fin); ///// + + tcg_temp_free(tcg_ctx, overflowed); + tcg_temp_free(tcg_ctx, overflowed2); + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + } break; + + case OPC_RH850_DIVHU_reg1_reg2_reg3:{ + + TCGLabel *cont; + TCGLabel *fin; + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + + tcg_gen_andi_i32(tcg_ctx, tcg_r1, tcg_r1, 0x0000FFFF); + tcg_gen_ext16u_i32(tcg_ctx, tcg_r1, tcg_r1); + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3, int_rs3); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + + cont = gen_new_label(tcg_ctx); + fin = gen_new_label(tcg_ctx); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont); + tcg_gen_br(tcg_ctx, fin); + + gen_set_label(tcg_ctx, cont); ///// + tcg_gen_remu_i32(tcg_ctx, r3_local, r2_local, r1_local); + tcg_gen_divu_i32(tcg_ctx, r2_local, r2_local, r1_local); + + if(rs2==int_rs3){ + gen_set_gpr(tcg_ctx, rs2, r3_local); + } else { + gen_set_gpr(tcg_ctx, rs2, r2_local); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + } + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + + gen_set_label(tcg_ctx, fin); ///// + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + } + break; + + case OPC_RH850_DIVU_reg1_reg2_reg3:{ + + // reg2/0x0000=undefined; cpu_OVF=1 + // if reg2==reg3; reg2=remainder + + TCGLabel *cont; + TCGLabel *fin; + + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv r2_local = tcg_temp_local_new(tcg_ctx); + TCGv r3_local = tcg_temp_local_new(tcg_ctx); + TCGv check = tcg_temp_local_new(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1); + tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2); + + int_rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, tcg_r3, int_rs3); + tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3); + + cont = gen_new_label(tcg_ctx); + fin = gen_new_label(tcg_ctx); + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont); + tcg_gen_br(tcg_ctx, fin); + + gen_set_label(tcg_ctx, cont); ///// + + tcg_gen_remu_i32(tcg_ctx, r3_local, r2_local, r1_local); + tcg_gen_divu_i32(tcg_ctx, r2_local, r2_local, r1_local); + + if(rs2==int_rs3){ + gen_set_gpr(tcg_ctx, rs2, r3_local); + } else { + gen_set_gpr(tcg_ctx, rs2, r2_local); + gen_set_gpr(tcg_ctx, int_rs3, r3_local); + } + + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0); + tcg_gen_andi_i32(tcg_ctx, check, r2_local, 0x80000000); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_SF, check, 0x80000000); + + gen_set_label(tcg_ctx, fin); ///// + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, r2_local); + tcg_temp_free(tcg_ctx, r3_local); + tcg_temp_free(tcg_ctx, check); + } + break; + } + + tcg_temp_free_i32(tcg_ctx, tcg_r1); + tcg_temp_free_i32(tcg_ctx, tcg_r2); + tcg_temp_free_i32(tcg_ctx, tcg_r3); +} + +static void gen_branch(CPURH850State *env, DisasContext *ctx, uint32_t cond, + int rs1, int rs2, target_long bimm) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGLabel *l = gen_new_label(tcg_ctx); + TCGv condOK = tcg_temp_new(tcg_ctx); + TCGv condResult = condition_satisfied(tcg_ctx, cond); + tcg_gen_movi_i32(tcg_ctx, condOK, 0x1); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_EQ, condResult, condOK, l); + + tcg_temp_free(tcg_ctx, condResult); + tcg_temp_free(tcg_ctx, condOK); + + gen_goto_tb_imm(ctx, 1, ctx->base.pc_next); // no jump, continue with next instr. + gen_set_label(tcg_ctx, l); /* branch taken */ + gen_goto_tb_imm(ctx, 0, ctx->pc + bimm); // jump + ctx->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED; +} + +static void gen_jmp(DisasContext *ctx, int rs1, uint32_t disp32, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + // disp32 is already generated when entering this function + int rs2, rs3; + TCGv link_addr = tcg_temp_new(tcg_ctx); + TCGv dest_addr = tcg_temp_new(tcg_ctx); + + switch (operation) + { + /** + * Jump with immediate displacement. + * PC and disp32 are fixed and won't change at + * execution time, we can call gen_goto_tb_imm() with + * the computed destination address. + */ + case OPC_RH850_JR_imm22: + case OPC_RH850_JR_imm32: + { + gen_goto_tb_imm(ctx, 0, ctx->pc + disp32); + ctx->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED; + } + break; + + /** + * Jump with immediate displacement but store + * PC+4 first in reg2. We first call tcg_goto_tb(), + * update PC and reg2 and then issue an exit TB. + **/ + case OPC_RH850_JARL_disp22_reg2: + { + rs2 = extract32(ctx->opcode, 11, 5); + tcg_gen_movi_i32(tcg_ctx, link_addr, ctx->pc); + tcg_gen_addi_i32(tcg_ctx, link_addr, link_addr, 0x4); + gen_set_gpr(tcg_ctx, rs2, link_addr); + + /* Update pc */ + tcg_gen_movi_i32(tcg_ctx, cpu_pc, ctx->pc + disp32); + + /* Goto corresponding TB (indirect jump). */ + ctx->base.is_jmp = DISAS_INDIRECT_JUMP; + } + break; + + /** + * Jump with immediate displacement but store PC+6 first in reg1. + * We first call tcg_gen_goto_tb(), update PC and reg1 and then + * issue an exit TB. + **/ + + case OPC_RH850_JARL_disp32_reg1: + { + gen_goto_tb_rl(ctx, 0, rs1, 6, ctx->pc + disp32); + ctx->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED; + } + break; + + /** + * This is a pure indirect call that will move GR[reg1] into PC, + * so we need to process in a different way. + **/ + + case OPC_RH850_JARL_reg1_reg3: + { + /* Get reg1 content into dest_addr. */ + gen_get_gpr(tcg_ctx, dest_addr, rs1); + + /* Get reg3 index, and store PC+4 in it. */ + rs3 = extract32(ctx->opcode, 27, 5); + tcg_gen_movi_i32(tcg_ctx, link_addr, ctx->pc); + tcg_gen_addi_i32(tcg_ctx, link_addr, link_addr, 0x4); + gen_set_gpr(tcg_ctx, rs3, link_addr); + + /* Update pc */ + tcg_gen_andi_i32(tcg_ctx, dest_addr, dest_addr, 0xfffffffe); + tcg_gen_mov_i32(tcg_ctx, cpu_pc, dest_addr); + + /* Goto corresponding TB (indirect jump). */ + ctx->base.is_jmp = DISAS_INDIRECT_JUMP; + } + break; + + default: // JMP instruction + { + /* Get reg1 into dest_addr. */ + gen_get_gpr(tcg_ctx, dest_addr, rs1); + + /* Apply displacement if provided. */ + if (disp32 != 0) + { + tcg_gen_addi_i32(tcg_ctx, dest_addr, dest_addr, disp32); + } + + /* Align and update PC. */ + tcg_gen_andi_i32(tcg_ctx, dest_addr, dest_addr, 0xfffffffe); + tcg_gen_mov_i32(tcg_ctx, cpu_pc, dest_addr); + + /* Indirect jump. */ + ctx->base.is_jmp = DISAS_INDIRECT_JUMP; + } + break; + } + + /* Free temporary values. */ + tcg_temp_free_i32(tcg_ctx, link_addr); + tcg_temp_free_i32(tcg_ctx, dest_addr); +} + +static void gen_loop(DisasContext *ctx, int rs1, int32_t disp16) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGLabel *l = gen_new_label(tcg_ctx); + TCGv zero_local = tcg_temp_local_new(tcg_ctx); + TCGv r1_local = tcg_temp_local_new(tcg_ctx); + TCGv minusone_local = tcg_temp_local_new(tcg_ctx); + + tcg_gen_movi_i32(tcg_ctx, zero_local, 0); + tcg_gen_movi_i32(tcg_ctx, minusone_local, 0xffffffff); + gen_get_gpr(tcg_ctx, r1_local, rs1); + gen_flags_on_add(tcg_ctx, r1_local, minusone_local); //set flags + tcg_gen_add_i32(tcg_ctx, r1_local, r1_local, minusone_local); + gen_set_gpr(tcg_ctx, rs1, r1_local); + + tcg_gen_brcond_tl(tcg_ctx, TCG_COND_NE, r1_local, zero_local, l); + + tcg_temp_free(tcg_ctx, r1_local); + tcg_temp_free(tcg_ctx, zero_local); + tcg_temp_free(tcg_ctx, minusone_local); + + gen_goto_tb_imm(ctx, 0, ctx->base.pc_next); // no jump, continue with next instr. + gen_set_label(tcg_ctx, l); // branch taken + gen_goto_tb_imm(ctx, 1, ctx->pc - disp16); + + ctx->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED; +} + +static void gen_bit_manipulation(DisasContext *ctx, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGv r1 = tcg_temp_new_i32(tcg_ctx); + TCGv r2 = tcg_temp_new_i32(tcg_ctx); + TCGv tcg_disp = tcg_temp_new_i32(tcg_ctx); + TCGv one = tcg_temp_new_i32(tcg_ctx); + + TCGv temp = tcg_temp_new_i32(tcg_ctx); + TCGv test = tcg_temp_new_i32(tcg_ctx); + TCGv adr = tcg_temp_new_i32(tcg_ctx); + uint32_t disp16 = extract32(ctx->opcode, 16, 16); + + int bit; + + switch(operation){ + case OPC_RH850_SET1_reg2_reg1: + + gen_get_gpr(tcg_ctx, adr, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + tcg_gen_movi_i32(tcg_ctx, one, 0x1); + + tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + + tcg_gen_shl_i32(tcg_ctx, r2, one, r2); + + tcg_gen_and_i32(tcg_ctx, test, temp, r2); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, r2); + + tcg_gen_or_i32(tcg_ctx, temp, temp, r2); + + tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + + break; + case OPC_RH850_SET1_bit3_disp16_reg1: + + gen_get_gpr(tcg_ctx, r1, rs1); + tcg_gen_movi_i32(tcg_ctx, tcg_disp, disp16); + tcg_gen_ext16s_i32(tcg_ctx, tcg_disp, tcg_disp); + tcg_gen_add_i32(tcg_ctx, adr, r1, tcg_disp); + + bit = extract32(ctx->opcode, 11, 3); + + tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + + tcg_gen_andi_i32(tcg_ctx, test, temp, (0x1 << bit)); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, (0x1 << bit)); + + tcg_gen_ori_i32(tcg_ctx, temp, temp, (0x1 << bit)); + + tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + break; + + case OPC_RH850_NOT1_reg2_reg1: + + gen_get_gpr(tcg_ctx, adr, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + tcg_gen_movi_i32(tcg_ctx, one, 0x1); + + tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + + tcg_gen_shl_i32(tcg_ctx, r2, one, r2); // r2 = mask + + tcg_gen_and_i32(tcg_ctx, test, temp, r2); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, r2); + + //test = temp & mask + tcg_gen_and_i32(tcg_ctx, test, temp, r2); + //test = not (test) & mask + tcg_gen_not_i32(tcg_ctx, test, test); + tcg_gen_and_i32(tcg_ctx, test, test, r2); + //temp = temp & not(mask) + tcg_gen_not_i32(tcg_ctx, r2, r2); + tcg_gen_and_i32(tcg_ctx, temp, temp, r2); + //temp = temp or test + tcg_gen_or_i32(tcg_ctx, temp, temp, test); + + tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + break; + + case OPC_RH850_NOT1_bit3_disp16_reg1: + + gen_get_gpr(tcg_ctx, r1, rs1); + tcg_gen_movi_i32(tcg_ctx, tcg_disp, disp16); + tcg_gen_ext16s_i32(tcg_ctx, tcg_disp, tcg_disp); + tcg_gen_add_i32(tcg_ctx, adr, r1, tcg_disp); + + bit = extract32(ctx->opcode, 11, 3); + + tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + + tcg_gen_andi_i32(tcg_ctx, test, temp, (0x1 << bit)); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, (0x1 << bit)); + + tcg_gen_movi_i32(tcg_ctx, r2, (0x1 << bit)); // r2 = mask + + //test = temp & mask + tcg_gen_and_i32(tcg_ctx, test, temp, r2); + //test = not (test) & mask + tcg_gen_not_i32(tcg_ctx, test, test); + tcg_gen_and_i32(tcg_ctx, test, test, r2); + //temp = temp & not(mask) + tcg_gen_not_i32(tcg_ctx, r2, r2); + tcg_gen_and_i32(tcg_ctx, temp, temp, r2); + //temp = temp or test + tcg_gen_or_i32(tcg_ctx, temp, temp, test); + + tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + break; + + case OPC_RH850_CLR1_reg2_reg1: + + gen_get_gpr(tcg_ctx, adr, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + tcg_gen_movi_i32(tcg_ctx, one, 0x1); + + tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + tcg_gen_andi_i32(tcg_ctx, r2, r2, 0x7); + tcg_gen_shl_i32(tcg_ctx, r2, one, r2); + + tcg_gen_and_i32(tcg_ctx, test, temp, r2); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, r2); + + tcg_gen_not_i32(tcg_ctx, r2, r2); + tcg_gen_and_i32(tcg_ctx, temp, temp, r2); + + tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + break; + + case OPC_RH850_CLR1_bit3_disp16_reg1: + + gen_get_gpr(tcg_ctx, r1, rs1); + tcg_gen_movi_i32(tcg_ctx, tcg_disp, disp16); + tcg_gen_ext16s_i32(tcg_ctx, tcg_disp, tcg_disp); + tcg_gen_add_i32(tcg_ctx, adr, r1, tcg_disp); + + bit = extract32(ctx->opcode, 11, 3); + + tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + + tcg_gen_movi_i32(tcg_ctx, test, (0x1 << bit)); + tcg_gen_andi_i32(tcg_ctx, test, temp, (0x1 << bit)); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, (0x1 << bit)); + + tcg_gen_movi_i32(tcg_ctx, test, (0x1 << bit)); + tcg_gen_not_i32(tcg_ctx, test, test); + tcg_gen_and_i32(tcg_ctx, temp, temp, test); + + tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + break; + + case OPC_RH850_TST1_reg2_reg1: + + gen_get_gpr(tcg_ctx, adr, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + tcg_gen_movi_i32(tcg_ctx, one, 0x1); + + tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + + tcg_gen_shl_i32(tcg_ctx, r2, one, r2); + + tcg_gen_and_i32(tcg_ctx, test, temp, r2); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, r2); + break; + + case OPC_RH850_TST1_bit3_disp16_reg1: + + gen_get_gpr(tcg_ctx, r1, rs1); + tcg_gen_movi_i32(tcg_ctx, tcg_disp, disp16); + tcg_gen_ext16s_i32(tcg_ctx, tcg_disp, tcg_disp); + tcg_gen_add_i32(tcg_ctx, adr, r1, tcg_disp); + + bit = extract32(ctx->opcode, 11, 3); + + tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB); + + tcg_gen_movi_i32(tcg_ctx, test, (0x1 << bit)); + tcg_gen_andi_i32(tcg_ctx, test, temp, (0x1 << bit)); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, (0x1 << bit)); + break; + } + + tcg_temp_free_i32(tcg_ctx, r1); + tcg_temp_free_i32(tcg_ctx, r2); + tcg_temp_free_i32(tcg_ctx, tcg_disp); + tcg_temp_free_i32(tcg_ctx, one); + tcg_temp_free_i32(tcg_ctx, temp); + tcg_temp_free_i32(tcg_ctx, test); + tcg_temp_free_i32(tcg_ctx, adr); + +} + +static void gen_update_ispr(DisasContext *ctx, CPURH850State *env) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGLabel *do_not_update = gen_new_label(tcg_ctx); + TCGLabel *clear_bit = gen_new_label(tcg_ctx); + TCGLabel *loop = gen_new_label(tcg_ctx); + TCGv temp = tcg_temp_local_new_i32(tcg_ctx); + TCGv idx = tcg_temp_local_new_i32(tcg_ctx); + TCGv ispr = tcg_temp_local_new_i32(tcg_ctx); + + /* Move ISPR value into intcfg. */ + tcg_gen_mov_i32(tcg_ctx, temp, cpu_sysRegs[BANK_ID_BASIC_2][INTCFG_IDX2]); + + /* And intcfg with 1. */ + tcg_gen_andi_i32(tcg_ctx, temp, temp, 1); + + /* Compare intcfg, jump to do_not_process if 1. */ + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, temp, 0, do_not_update); + + /* INTCFG.ICSP = 0, now check EP (EP == 1 -> do not update ISRP) */ + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_EP, 0, do_not_update); + + /** + * Okay, now update ISPR (clear the highest priority bit). + * We need to loop from bit 0 to bit 15, clear bit and exit loop if bit is + * set. + **/ + + /* Set mask to 1. */ + tcg_gen_movi_i32(tcg_ctx, temp, 1); + tcg_gen_movi_i32(tcg_ctx, idx, 0); + + gen_set_label(tcg_ctx, loop); + + /* Load ISPR. */ + tcg_gen_mov_i32(tcg_ctx, ispr, cpu_sysRegs[BANK_ID_BASIC_2][ISPR_IDX2]); + tcg_gen_and_i32(tcg_ctx, ispr, ispr, temp); + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, ispr, 1, clear_bit); + + /* shift left our mask, exit if done. */ + tcg_gen_shli_i32(tcg_ctx, temp, temp, 1); + tcg_gen_addi_i32(tcg_ctx, idx, idx, 1); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_LT, idx, 2, loop); + + tcg_gen_br(tcg_ctx, do_not_update); + + /* Clear bit. */ + gen_set_label(tcg_ctx, clear_bit); + tcg_gen_xor_i32(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_2][ISPR_IDX2], cpu_sysRegs[BANK_ID_BASIC_2][ISPR_IDX2], temp); + + /* Set label do_not_update here. */ + gen_set_label(tcg_ctx, do_not_update); + + /* Free resources. */ + tcg_temp_free_i32(tcg_ctx, temp); + tcg_temp_free_i32(tcg_ctx, ispr); +} + +static void gen_special(DisasContext *ctx, CPURH850State *env, int rs1, int rs2, int operation) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + TCGLabel *storeReg3; + TCGLabel *cont; + int regID; + int selID = 0; + int imm; + int vector; + + switch(operation){ + case OPC_RH850_CALLT_imm6: { + TCGv temp = tcg_temp_new_i32(tcg_ctx); + TCGv adr = tcg_temp_new_i32(tcg_ctx); + + //setting CTPC to PC+2 + tcg_gen_addi_i32(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][CTPC_IDX], cpu_pc, 0x2); + //setting CPTSW bits 0:4 + flags_to_tcgv_z_cy_ov_s_sat(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][CTPSW_IDX]); + + imm = extract32(ctx->opcode, 0, 6); + tcg_gen_movi_i32(tcg_ctx, adr, imm); + tcg_gen_shli_i32(tcg_ctx, adr, adr, 0x1); + tcg_gen_ext8s_i32(tcg_ctx, adr, adr); + tcg_gen_add_i32(tcg_ctx, adr, cpu_sysRegs[BANK_ID_BASIC_0][CTBP_IDX], adr); + + tcg_gen_qemu_ld16u(tcg_ctx, temp, adr, 0); + + tcg_gen_add_i32(tcg_ctx, cpu_pc, temp, cpu_sysRegs[BANK_ID_BASIC_0][CTBP_IDX]); + ctx->base.is_jmp = DISAS_EXIT_TB; + + tcg_temp_free(tcg_ctx, temp); + tcg_temp_free(tcg_ctx, adr); + } break; + + case OPC_RH850_CAXI_reg1_reg2_reg3: { + TCGv temp = tcg_temp_new_i32(tcg_ctx); + TCGv adr = tcg_temp_new_i32(tcg_ctx); + TCGv r2 = tcg_temp_new(tcg_ctx); + TCGv r3 = tcg_temp_new(tcg_ctx); + + storeReg3 = gen_new_label(tcg_ctx); + gen_get_gpr(tcg_ctx, adr, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + int rs3 = extract32(ctx->opcode, 27, 5); + gen_get_gpr(tcg_ctx, r3, rs3); + tcg_gen_qemu_ld32u(tcg_ctx, temp, adr, 0); + storeReg3 = gen_new_label(tcg_ctx); + cont = gen_new_label(tcg_ctx); + + TCGv local_adr = tcg_temp_local_new_i32(tcg_ctx); + TCGv local_r2 = tcg_temp_local_new_i32(tcg_ctx); + TCGv local_r3 = tcg_temp_local_new_i32(tcg_ctx); + TCGv local_temp = tcg_temp_local_new_i32(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, local_adr, adr); + tcg_gen_mov_i32(tcg_ctx, local_r2, r2); + tcg_gen_mov_i32(tcg_ctx, local_r3, r3); + tcg_gen_mov_i32(tcg_ctx, local_temp, temp); + + gen_flags_on_sub(tcg_ctx, local_r2, local_temp); + + tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, 0x1, storeReg3); + tcg_gen_qemu_st_tl(tcg_ctx, local_temp, local_adr, MEM_IDX, MO_TESL); + tcg_gen_br(tcg_ctx, cont); + + gen_set_label(tcg_ctx, storeReg3); + tcg_gen_qemu_st_tl(tcg_ctx, local_r3, local_adr, MEM_IDX, MO_TESL); + + gen_set_label(tcg_ctx, cont); + gen_set_gpr(tcg_ctx, rs3, local_temp); + + tcg_temp_free(tcg_ctx, temp); + tcg_temp_free(tcg_ctx, adr); + tcg_temp_free(tcg_ctx, r2); + tcg_temp_free(tcg_ctx, r3); + break; + } + + case OPC_RH850_CTRET: { + TCGv temp = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_mov_i32(tcg_ctx, cpu_pc, cpu_sysRegs[BANK_ID_BASIC_0][CTPC_IDX]); + tcgv_to_flags_z_cy_ov_s_sat(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][CTPSW_IDX]); + + ctx->base.is_jmp = DISAS_EXIT_TB; + + tcg_temp_free(tcg_ctx, temp); + } break; + + case OPC_RH850_DI: + tcg_gen_movi_i32(tcg_ctx, cpu_ID, 0x1); + break; + + case OPC_RH850_DISPOSE_imm5_list12: { + TCGv temp = tcg_temp_new_i32(tcg_ctx); + TCGv adr = tcg_temp_new_i32(tcg_ctx); + + int list [12] = {31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20}; + int numOfListItems = sizeof(list) / sizeof(list[0]); + int list12 = extract32(ctx->opcode, 0, 1) | ( (extract32(ctx->opcode, 21, 11)) << 1); + + // reorganising bits that indicate the registers to load + // doing this for easier looping in correct order + int dispList = ((list12 & 0x80) << 4) | + ((list12 & 0x40) << 4) | + ((list12 & 0x20) << 4) | + ((list12 & 0x10) << 4) | + ((list12 & 0x800) >> 4) | + ((list12 & 0x400) >> 4) | + ((list12 & 0x200) >> 4) | + ((list12 & 0x100) >> 4) | + ((list12 & 0x8) << 0) | + ((list12 & 0x4) << 0) | + ((list12 & 0x2) >> 1) | + ((list12 & 0x1) << 1) ; + + int test = 0x1; + gen_get_gpr(tcg_ctx, temp, 3); // stack pointer (sp) register is cpu_gpr[3] + tcg_gen_addi_i32(tcg_ctx, temp, temp, (extract32(ctx->opcode, 1, 5) << 2)); + + TCGv regToLoad = tcg_temp_new_i32(tcg_ctx); + + for(int i=0; iopcode, 0, 1) | ( (extract32(ctx->opcode, 21, 11)) << 1); + TCGv jmpAddr = tcg_temp_new_i32(tcg_ctx); + + // reorganising bits that indicate the registers to load + // doing this for easier looping in correct order + int dispList = ((list12 & 0x80) << 4) | + ((list12 & 0x40) << 4) | + ((list12 & 0x20) << 4) | + ((list12 & 0x10) << 4) | + ((list12 & 0x800) >> 4) | + ((list12 & 0x400) >> 4) | + ((list12 & 0x200) >> 4) | + ((list12 & 0x100) >> 4) | + ((list12 & 0x8) << 0) | + ((list12 & 0x4) << 0) | + ((list12 & 0x2) >> 1) | + ((list12 & 0x1) << 1) ; + + int test = 0x1; + gen_get_gpr(tcg_ctx, temp, 3); // stack pointer (sp) register is cpu_gpr[3] + tcg_gen_addi_i32(tcg_ctx, temp, temp, (extract32(ctx->opcode, 1, 5) << 2)); + + TCGv regToLoad = tcg_temp_new_i32(tcg_ctx); + + for(int i=0; iopcode, 16, 5))); + tcg_gen_mov_i32(tcg_ctx, cpu_pc, jmpAddr); + + gen_set_gpr(tcg_ctx, 3, temp); + ctx->base.is_jmp = DISAS_EXIT_TB; + + tcg_temp_free(tcg_ctx, temp); + tcg_temp_free(tcg_ctx, adr); + } + break; + + case OPC_RH850_EI: + tcg_gen_movi_i32(tcg_ctx, cpu_ID, 0x0); + break; + case OPC_RH850_EIRET: + /* Move EIPC to PC and EIPSW to PSW. */ + tcg_gen_mov_i32(tcg_ctx, cpu_pc, cpu_sysRegs[BANK_ID_BASIC_0][EIPC_IDX]); + tcgv_to_flags(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][EIPSW_IDX]); + + /* Update ISPR. */ + gen_update_ispr(ctx, env); + ctx->base.is_jmp = DISAS_EXIT_TB; + break; + case OPC_RH850_FERET: + tcg_gen_mov_i32(tcg_ctx, cpu_pc, cpu_sysRegs[BANK_ID_BASIC_0][FEPC_IDX]); + tcgv_to_flags(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][FEPSW_IDX]); + ctx->base.is_jmp = DISAS_EXIT_TB; + break; + + case OPC_RH850_FETRAP_vector4: { + + vector = extract32(ctx->opcode, 11, 4); + TCGv_i32 excp = tcg_const_i32(tcg_ctx, RH850_EXCP_FETRAP); + TCGv_i32 cause = tcg_const_i32(tcg_ctx, vector + 0x30); + gen_helper_raise_exception_with_cause(tcg_ctx, tcg_ctx->cpu_env, excp, cause); + tcg_temp_free_i32(tcg_ctx, excp); + tcg_temp_free_i32(tcg_ctx, cause); + ctx->base.is_jmp = DISAS_NORETURN; + } break; + + case OPC_RH850_HALT: + // nop, interupts are not implemented, so HALT would never continue + // tcg_abort(); + break; + + case OPC_RH850_LDSR_reg2_regID_selID: + selID = extract32(ctx->opcode, 27, 5); + regID = rs2; + + // Modify only sytem regs, which exist. Real device executes instruction, but + // value is not stored for system regs, which do not exist. No exception is + // thrown. + if(cpu_sysRegs[selID][regID] != NULL || (selID == BANK_ID_BASIC_0 && regID == PSW_IDX)) { + + TCGv tmp = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, tmp, rs1); + + if(selID == BANK_ID_BASIC_0 && regID == PSW_IDX){ + tcgv_to_flags(tcg_ctx, tmp); + } else { + // clear read-only bits in value, all other bits in sys reg. This way + // read-only bits preserve their value given at reset + tcg_gen_andi_i32(tcg_ctx, tmp, tmp, rh850_sys_reg_read_only_masks[selID][regID]); + tcg_gen_andi_i32(tcg_ctx, cpu_sysRegs[selID][regID], cpu_sysRegs[selID][regID], ~rh850_sys_reg_read_only_masks[selID][regID]); + tcg_gen_or_i32(tcg_ctx, cpu_sysRegs[selID][regID], cpu_sysRegs[selID][regID], tmp); + } + tcg_temp_free(tcg_ctx, tmp); + } + break; + + //case OPC_RH850_LDLW: + //break; + + //case OPC_RH850_NOP: + //break; + + case OPC_RH850_POPSP_rh_rt: { + TCGv temp = tcg_temp_new_i32(tcg_ctx); + TCGv adr = tcg_temp_new_i32(tcg_ctx); + + uint32_t rs3 = extract32(ctx->opcode, 27, 5); + + int numOfRegs = (rs3-rs1)+1; + + gen_get_gpr(tcg_ctx, temp, 3); // stack pointer register is cpu_gpr[3] + TCGv regToLoad = tcg_temp_new_i32(tcg_ctx); + + if(rs1<=rs3){ + + for(int i=0; iopcode, 21, 11) << 1) | (extract32(ctx->opcode, 0, 1) ) ) ; + int numOfListItems = sizeof(list) / sizeof(list[0]); + int prepList = ((list12 & 0x80) >> 7) | + ((list12 & 0x40) >> 5) | + ((list12 & 0x20) >> 3) | + ((list12 & 0x10) >> 1) | + ((list12 & 0x800) >> 7) | + ((list12 & 0x400) >> 5) | + ((list12 & 0x200) >> 3) | + ((list12 & 0x100) >> 1) | + ((list12 & 0x8) << 5) | + ((list12 & 0x4) << 7) | + ((list12 & 0x2) << 10) | + ((list12 & 0x1) << 10) ; + + int test = 0x1; + gen_get_gpr(tcg_ctx, temp, 3); // stack pointer register is cpu_gpr[3] + TCGv regToStore = tcg_temp_new_i32(tcg_ctx); + + for(int i=0; iopcode, 1, 5) << 2)); + gen_set_gpr(tcg_ctx, 3, temp); + + tcg_temp_free(tcg_ctx, temp); + tcg_temp_free(tcg_ctx, adr); + } break; + + case OPC_RH850_PREPARE_list12_imm5_sp:{ + TCGv temp = tcg_temp_new_i32(tcg_ctx); + TCGv adr = tcg_temp_new_i32(tcg_ctx); + + int list [12] = {20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + uint32_t list12 = extract32(ctx->opcode, 0, 1) | ( (extract32(ctx->opcode, 21, 11)) << 1); + int numOfListItems = sizeof(list) / sizeof(list[0]); + int prepList = ((list12 & 0x80) >> 7) | + ((list12 & 0x40) >> 5) | + ((list12 & 0x20) >> 3) | + ((list12 & 0x10) >> 1) | + ((list12 & 0x800) >> 7) | + ((list12 & 0x400) >> 5) | + ((list12 & 0x200) >> 3) | + ((list12 & 0x100) >> 1) | + ((list12 & 0x8) << 5) | + ((list12 & 0x4) << 7) | + ((list12 & 0x2) << 10) | + ((list12 & 0x1) << 10) ; + + uint32_t imm = 0x0; + + int test = 0x1; + int ff = extract32(ctx->opcode, 19, 2); + gen_get_gpr(tcg_ctx, temp, 3); // stack pointer register is cpu_gpr[3] + TCGv regToStore = tcg_temp_new_i32(tcg_ctx); + + for(int i=0; iopcode, 1, 5) << 2)); + + gen_set_gpr(tcg_ctx, 3, temp); + + switch(ff){ + + case 0x0: + gen_set_gpr(tcg_ctx, 30, temp); //moving sp to ep (element pointer is at cpu_gpr[30]) + break; + + case 0x1: + imm = cpu_lduw_code(env, ctx->base.pc_next); // fetching additional 16bits from memory + tcg_gen_movi_i32(tcg_ctx, temp, imm); + tcg_gen_ext16s_i32(tcg_ctx, temp, temp); + gen_set_gpr(tcg_ctx, 30, temp); + ctx->base.pc_next+=2; // increasing PC due to additional fetch + break; + + case 0x2: + imm = cpu_lduw_code(env, ctx->base.pc_next); // fetching additional 16bits from memory + tcg_gen_movi_i32(tcg_ctx, temp, imm); + tcg_gen_shli_i32(tcg_ctx, temp, temp, 0x10); + gen_set_gpr(tcg_ctx, 30, temp); + ctx->base.pc_next+=2; + break; + + case 0x3: + imm = cpu_lduw_code(env, ctx->base.pc_next) | + (cpu_lduw_code(env, ctx->base.pc_next + 2) << 0x10); + // fetching additional 32bits from memory + + tcg_gen_movi_i32(tcg_ctx, temp, imm); + gen_set_gpr(tcg_ctx, 30, temp); + ctx->base.pc_next = ctx->base.pc_next + 4; + break; + } + + tcg_temp_free(tcg_ctx, temp); + tcg_temp_free(tcg_ctx, adr); + } break; + + case OPC_RH850_PUSHSP_rh_rt: { + TCGv temp = tcg_temp_new_i32(tcg_ctx); + TCGv adr = tcg_temp_new_i32(tcg_ctx); + + uint32_t rs3 = extract32(ctx->opcode, 27, 5); + + int numOfRegs = (rs3-rs1)+1; + + gen_get_gpr(tcg_ctx, temp, 3); // stack pointer register is cpu_gpr[3] + TCGv regToStore = tcg_temp_new_i32(tcg_ctx); + if(rs1<=rs3){ + + for(int i=0; icpu_env, excp, cause); + tcg_temp_free_i32(tcg_ctx, excp); + tcg_temp_free_i32(tcg_ctx, cause); + ctx->base.is_jmp = DISAS_NORETURN; + + } break; + + case OPC_RH850_SNOOZE: + break; + + //case OPC_RH850_STCW: + // break; + + case OPC_RH850_STSR_regID_reg2_selID: + regID=rs1; + selID = extract32(ctx->opcode, 27, 5); + if(selID == BANK_ID_BASIC_0 && regID == PSW_IDX){ + TCGv tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_movi_tl(tcg_ctx, tmp, 0); + flags_to_tcgv(tcg_ctx, tmp); + gen_set_gpr(tcg_ctx, rs2, tmp); + tcg_temp_free(tcg_ctx, tmp); + } else { + if (cpu_sysRegs[selID][regID] != NULL) { + gen_set_gpr(tcg_ctx, rs2, cpu_sysRegs[selID][regID]); + } else { + TCGv dat = tcg_temp_local_new(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, dat, 0); + gen_set_gpr(tcg_ctx, rs2, 0); // if sys reg does not exist, write 0 + tcg_temp_free(tcg_ctx, dat); + } + } + break; + + case OPC_RH850_SWITCH_reg1: { + TCGv temp = tcg_temp_new_i32(tcg_ctx); + TCGv adr = tcg_temp_new_i32(tcg_ctx); + + gen_get_gpr(tcg_ctx, adr, rs1); + tcg_gen_shli_i32(tcg_ctx, adr, adr, 0x1); + tcg_gen_add_i32(tcg_ctx, adr, adr, cpu_pc); + tcg_gen_addi_i32(tcg_ctx, adr, adr, 0x2); + + tcg_gen_addi_i32(tcg_ctx, cpu_pc, cpu_pc, 0x2); + tcg_gen_qemu_ld16s(tcg_ctx, temp, adr, MEM_IDX); + tcg_gen_ext16s_i32(tcg_ctx, temp, temp); + tcg_gen_shli_i32(tcg_ctx, temp, temp, 0x1); + tcg_gen_add_i32(tcg_ctx, cpu_pc, cpu_pc, temp); + ctx->base.is_jmp = DISAS_EXIT_TB; + } break; + + // SYNC instructions will not be implemented + case OPC_RH850_SYNCE: + case OPC_RH850_SYNCI: + case OPC_RH850_SYNCM: + case OPC_RH850_SYNCP: + break; + + case OPC_RH850_TRAP: + { + int vector5 = rs1; + TCGv_i32 excp = tcg_const_i32(tcg_ctx, RH850_EXCP_TRAP); + TCGv_i32 cause = tcg_const_i32(tcg_ctx, vector5 + 0x40); + gen_helper_raise_exception_with_cause(tcg_ctx, tcg_ctx->cpu_env, excp, cause); + tcg_temp_free_i32(tcg_ctx, excp); + tcg_temp_free_i32(tcg_ctx, cause); + ctx->base.is_jmp = DISAS_NORETURN; + } + break; + + case OPC_RH850_SYSCALL: + { + int vector = extract32(ctx->opcode, 0, 5) | ((extract32(ctx->opcode, 27, 3)) << 5); + // int vector=5; + TCGv_i32 excp = tcg_const_i32(tcg_ctx, RH850_EXCP_SYSCALL); + TCGv_i32 cause = tcg_const_i32(tcg_ctx, vector + 0x8000); + gen_helper_raise_exception_with_cause(tcg_ctx, tcg_ctx->cpu_env, excp, cause); + tcg_temp_free_i32(tcg_ctx, excp); + tcg_temp_free_i32(tcg_ctx, cause); + ctx->base.is_jmp = DISAS_NORETURN; + } + break; + } +} + +/* Cache operations are not supported on single core emulation. */ +static void gen_cache(DisasContext *ctx, int rs1, int rs2, int operation){ + int cache_op = (extract32(ctx->opcode,11, 2) << 5 ) | (extract32(ctx->opcode, 27, 5)); + switch(cache_op){ + case CHBII: + // printf("CHBII\n"); + break; + case CIBII: + // printf("CIBII\n"); + break; + case CFALI: + // printf("CFALI\n"); + break; + case CISTI: + // printf("CISTI\n"); + break; + case CILDI: + // printf("CILDI\n"); + break; + case CLL: + // printf("CLL\n"); + // this operation is not implemented on single core + break; + } +} + +/* 48-bit RH850 instruction decoding */ +static void decode_RH850_48(CPURH850State *env, DisasContext *ctx) +{ + int rs1, rs3; + uint64_t opcode48; + + rs1 = GET_RS1(ctx->opcode); + rs3 = extract32(ctx->opcode, 27, 5); + + opcode48 = (ctx->opcode1); + opcode48 = (ctx->opcode) | (opcode48 << 0x20); + uint32_t opcode20 = extract32(opcode48,0,20) & 0xfffe0; + + uint32_t disp23 = (ctx->opcode1 << 7) | (extract32(ctx->opcode, 21, 6) << 1); + uint32_t disp32 = (opcode48 >> 16); + + switch(opcode20) { + + + case OPC_RH850_LDB2: + gen_load(ctx, MO_SB, rs3, rs1, disp23, 1); + break; + case OPC_RH850_LDH2: + gen_load(ctx, MO_TESW, rs3, rs1, disp23, 1); + break; + case OPC_RH850_LDW2: + gen_load(ctx, MO_TESL, rs3, rs1, disp23, 1); + break; + case OPC_RH850_LDDW: + gen_load(ctx, MO_TEQ, rs3, rs1, disp23, 1); + break; + case OPC_RH850_LDBU2: + gen_load(ctx, MO_UB, rs3, rs1, disp23, 1); + break; + case OPC_RH850_LDHU2: + gen_load(ctx, MO_TEUW, rs3, rs1, disp23, 1); + break; + + case OPC_RH850_STB2: + gen_store(ctx, MO_SB, rs1, rs3, disp23, 1); + break; + case OPC_RH850_STH2: + gen_store(ctx, MO_TESW, rs1, rs3, disp23, 1); + break; + case OPC_RH850_STW2: + gen_store(ctx, MO_TESL, rs1, rs3, disp23, 1); + break; + case OPC_RH850_STDW: + gen_store(ctx, MO_TEQ, rs1, rs3, disp23, 1); + break; + } + + if (extract32(ctx->opcode, 5, 11) == 0x31) { + gen_arithmetic(ctx, 0, rs1, OPC_RH850_MOV_imm32_reg1); + } else if (extract32(ctx->opcode, 5, 12) == 0x37) { + gen_jmp(ctx, rs1, disp32, OPC_RH850_JMP_disp32_reg1); + } else if (extract32(ctx->opcode, 5, 11) == 0x17) { + if (rs1 == 0x0){ + gen_jmp(ctx, 0, disp32, OPC_RH850_JR_imm32); + + } else { + gen_jmp(ctx, rs1, disp32, OPC_RH850_JARL_disp32_reg1); + } + } +} + +/* 32-bit RH850 instruction decoding */ +static void decode_RH850_32(CPURH850State *env, DisasContext *ctx) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + + int rs1; + int rs2; + int cond; + uint32_t op; + uint32_t formXop; + uint32_t checkXII; + uint32_t check32bitZERO; + target_long imm_32; + target_long ld_imm; + + op = MASK_OP_MAJOR(ctx->opcode); + rs1 = GET_RS1(ctx->opcode); // rs1 is at b0-b4; + rs2 = GET_RS2(ctx->opcode); // rs2 is at b11-b15; + TCGv r1 = tcg_temp_local_new(tcg_ctx); + TCGv r2 = tcg_temp_local_new(tcg_ctx); + imm_32 = GET_IMM_32(ctx->opcode); + ld_imm = extract32(ctx->opcode, 16, 16); + + gen_get_gpr(tcg_ctx, r1, rs1); + gen_get_gpr(tcg_ctx, r2, rs2); + + switch(op){ + + case OPC_RH850_LDB: + gen_load(ctx, MO_SB, rs2, rs1, ld_imm, 0); + break; + + case OPC_RH850_LDH_LDW: + if ( extract32(ctx->opcode, 16, 1) == 0 ){ + gen_load(ctx, MO_TESW, rs2, rs1, ld_imm, 0); // LD.H + } + else{ + gen_load(ctx, MO_TESL, rs2, rs1, ld_imm & 0xfffe, 0); // LD.W + } + break; + + case OPC_RH850_STB: + gen_store(ctx, MO_SB, rs1, rs2, (extract32(ctx->opcode, 16, 16)), 0); + break; + + case OPC_RH850_STH_STW: + if ( extract32(ctx->opcode, 16, 1)==1 ) { + gen_store(ctx, MO_TESL, rs1, rs2, ((extract32(ctx->opcode, 17, 15))) << 1, 0); + //this is STORE WORD + break; + } + gen_store(ctx, MO_TESW, rs1, rs2, ((extract32(ctx->opcode, 17, 15))) << 1, 0); + //this is STORE HALFWORD + break; + + case OPC_RH850_ADDI_imm16_reg1_reg2: + gen_arithmetic(ctx, rs1,rs2, OPC_RH850_ADDI_imm16_reg1_reg2); + break; + + case OPC_RH850_ANDI_imm16_reg1_reg2: + gen_logical(ctx, rs1, rs2, OPC_RH850_ANDI_imm16_reg1_reg2); + break; + + case OPC_RH850_MOVEA: + if ( extract32(ctx->opcode, 11, 5) == 0 ){ + // This is 48bit MOV + // This instruction should be reached first in decode_RH850_48 + } else { + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_MOVEA_imm16_reg1_reg2); + } + break; + + case OPC_RH850_MOVHI_imm16_reg1_reg2: + if(extract32(ctx->opcode, 11, 5)!=0x0){ + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_MOVHI_imm16_reg1_reg2); + } else { + if(extract32(ctx->opcode, 16, 5)==0x0){ + gen_special(ctx, env, rs1, rs2, OPC_RH850_DISPOSE_imm5_list12); + } else { + gen_special(ctx, env, rs1, rs2, OPC_RH850_DISPOSE_imm5_list12_reg1); + } + } + break; + + case OPC_RH850_ORI_imm16_reg1_reg2: + gen_logical(ctx, rs1, rs2, OPC_RH850_ORI_imm16_reg1_reg2); + break; + + case OPC_RH850_SATSUBI_imm16_reg1_reg2: + if(extract32(ctx->opcode, 11, 5)!=0x0){ + gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATSUBI_imm16_reg1_reg2); + } else { + if(extract32(ctx->opcode, 16, 5)==0x0){ + gen_special(ctx, env, rs1, rs2, OPC_RH850_DISPOSE_imm5_list12); + } else { + gen_special(ctx, env, rs1, rs2, OPC_RH850_DISPOSE_imm5_list12_reg1); + } + } + + break; + case OPC_RH850_XORI_imm16_reg1_reg2: + gen_logical(ctx, rs1, rs2, OPC_RH850_XORI_imm16_reg1_reg2); + break; + + case OPC_RH850_LOOP: + if (extract32(ctx->opcode, 11, 5) == 0x0) + gen_loop(ctx, rs1, ld_imm & 0xfffe); // LOOP + else + gen_multiply(ctx, rs1, rs2, OPC_RH850_MULHI_imm16_reg1_reg2); + break; + case OPC_RH850_BIT_MANIPULATION_2: + + switch(extract32(ctx->opcode, 14, 2)){ + case 0: + gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_SET1_bit3_disp16_reg1); + break; + case 1: + gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_NOT1_bit3_disp16_reg1); + break; + case 2: + gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_CLR1_bit3_disp16_reg1); + break; + case 3: + gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_TST1_bit3_disp16_reg1); + break; + } + break; + case OPC_RH850_32bit_1: /* case for opcode = 111111 ; formats IX, X, XI, XII */ + if (extract32(ctx->opcode, 16, 1) == 0x1 ) { + /* BCOND disp17 */ + if (rs2 == 0x0) + { + /* Get condition. */ + cond = extract32(ctx->opcode, 0, 4); + + /* Extract immediate value (16 higher bits of 17 bits set by the instruction). */ + imm_32 = ((extract32(ctx->opcode, 4, 1)<<16) | (extract32(ctx->opcode, 17, 15) << 1)); + + /* Sign-extend value to 32 bits. */ + if ((imm_32 & 0x10000) == 0x10000) + { + imm_32 |= (0x7fff << 17); + } + + gen_branch(env, ctx, cond, rs1, rs2, imm_32); + + break; + } + else + { + /* LD.HU */ + gen_load(ctx, MO_TEUW, rs2, rs1, ld_imm & 0xfffe, 0); + break; + } + } + formXop = MASK_OP_32BIT_SUB(ctx->opcode); //sub groups based on bits b23-b26 + switch(formXop){ + case OPC_RH850_LDSR_RIE_SETF_STSR: + check32bitZERO = extract32(ctx->opcode, 21, 2); + switch(check32bitZERO){ + case 0: + if(extract32(ctx->opcode, 4, 1)==1) + { + gen_special(ctx, env, rs1, rs2, OPC_RH850_RIE); + } + else + { + printf("gen SETF\r\n"); + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SETF_cccc_reg2); + } + break; + case OPC_RH850_LDSR_reg2_regID_selID: + gen_special(ctx, env, rs1, rs2, OPC_RH850_LDSR_reg2_regID_selID); + break; + case OPC_RH850_STSR_regID_reg2_selID: + gen_special(ctx, env, rs1, rs2, OPC_RH850_STSR_regID_reg2_selID); + break; + } + break; + case OPC_RH850_FORMAT_IX: //format IX instructions + formXop = MASK_OP_FORMAT_IX(ctx->opcode); //mask on bits 21, 22 + switch(formXop) + { + case OPC_RH850_BINS_0: + if (extract32(ctx->opcode, 20, 1) == 1) + { + //BINS0 + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BINS); + } + else + { + if (extract32(ctx->opcode, 17, 1) == 0) + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SHR_reg1_reg2); + } + else + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SHR_reg1_reg2_reg3); + } + } + break; + case OPC_RH850_BINS_1: + if (extract32(ctx->opcode, 20, 1) == 1) + { + //BINS1 + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BINS); + } + else + { + if (extract32(ctx->opcode, 17, 1) == 0) + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SAR_reg1_reg2); + } + else + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SAR_reg1_reg2_reg3); + } + } + break; + case OPC_RH850_BINS_2: + if (extract32(ctx->opcode, 20, 1) == 1) + { + //BINS2 + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BINS); + } + else + { + if (extract32(ctx->opcode, 17, 1) == 0) + { + if (extract32(ctx->opcode, 18, 1) == 1) + { + gen_data_manipulation(ctx, rs1, rs2, + OPC_RH850_ROTL_imm5_reg2_reg3); + } + else + { + gen_data_manipulation(ctx, rs1, rs2, + OPC_RH850_SHL_reg1_reg2); + } + } + else + { + if (extract32(ctx->opcode, 18, 1) == 1) + { + gen_data_manipulation(ctx, rs1, rs2, + OPC_RH850_ROTL_reg1_reg2_reg3); + } + else + { + gen_data_manipulation(ctx, rs1, rs2, + OPC_RH850_SHL_reg1_reg2_reg3); + } + } + } + break; + case OPC_RH850_BIT_MANIPULATION: // in format IX + check32bitZERO = extract32(ctx->opcode, 16, 3); + switch(check32bitZERO){ + case OPC_RH850_SET1_reg2_reg1: + gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_SET1_reg2_reg1); + break; + case OPC_RH850_NOT1_reg2_reg1: + gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_NOT1_reg2_reg1); + break; + case OPC_RH850_CLR1_reg2_reg1: + gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_CLR1_reg2_reg1); + break; + case OPC_RH850_TST1_reg2_reg1: + if (extract32(ctx->opcode, 19, 1) == 0){ + gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_TST1_reg2_reg1); + } else { + gen_special(ctx, env, rs1, rs2, OPC_RH850_CAXI_reg1_reg2_reg3); + } + } + break; + } + break; + + + case OPC_RH850_FORMAT_X: //format X instructions + //(+JARL3 - added due to MASK_OP_FORMAT_X matching) + formXop = MASK_OP_FORMAT_X(ctx->opcode); + + switch(formXop){ + + case OPC_RH850_CTRET: + gen_special(ctx, env, rs1, rs2, OPC_RH850_CTRET); + break; + case OPC_RH850_DI: + gen_special(ctx, env, rs1, rs2, OPC_RH850_DI); + break; + case OPC_RH850_EI: + gen_special(ctx, env, rs1, rs2, OPC_RH850_EI); + break; + case OPC_RH850_EIRET: + gen_special(ctx, env, rs1, rs2, OPC_RH850_EIRET); + break; + case OPC_RH850_FERET: + gen_special(ctx, env, rs1, rs2, OPC_RH850_FERET); + break; + case OPC_RH850_HALT: + gen_special(ctx, env, rs1, rs2, OPC_RH850_HALT); + break; + case OPC_RH850_JARL3: + gen_jmp(ctx, rs1, 0, OPC_RH850_JARL_reg1_reg3); + break; + case OPC_RH850_SNOOZE: + gen_special(ctx, env, rs1, rs2, OPC_RH850_SNOOZE); + break; + case OPC_RH850_SYSCALL: + gen_special(ctx, env, rs1, rs2, OPC_RH850_SYSCALL); + break; + case OPC_RH850_TRAP: + gen_special(ctx, env, rs1, rs2, OPC_RH850_TRAP); + break; + case OPC_RH850_PREF: + //printf("PREF \n"); + break; + case OPC_RH850_POPSP_rh_rt: + gen_special(ctx, env, rs1, rs2, OPC_RH850_POPSP_rh_rt); + break; + case OPC_RH850_PUSHSP_rh_rt: + gen_special(ctx, env, rs1, rs2, OPC_RH850_PUSHSP_rh_rt); + break; + default: + if ((extract32(ctx->opcode, 13, 12) == 0xB07)) + { + if ((extract32(ctx->opcode, 27, 5) == 0x1E) && + (extract32(ctx->opcode, 0, 5) == 0x1F)) + { + if ((extract32(ctx->opcode, 23, 4) == 0x2)) // CLL + gen_mutual_exclusion(ctx, extract32(ctx->opcode, 27, 5), rs1, operation_CLL); + } else { + //CACHE; if cacheop bits are 1111110, opcode matches CLL ins, + //then they are THE SAME instruction, so this should be correct + gen_cache(ctx,rs1,rs2, 1); + } + } else + printf("ERROR! \n"); + break; + } + break; + case OPC_RH850_MUL_INSTS: + if (extract32(ctx->opcode, 22, 1) == 0) + { + if (extract32(ctx->opcode, 21, 1) == 0) + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SASF_cccc_reg2); + } + else + { + if (extract32(ctx->opcode, 17, 1) == 1) + { + gen_multiply(ctx, rs1, rs2, OPC_RH850_MULU_reg1_reg2_reg3); + } + else + { + gen_multiply(ctx, rs1, rs2, OPC_RH850_MUL_reg1_reg2_reg3); + } + } + break; + } else if (extract32(ctx->opcode, 22, 1) == 1) + { + if (extract32(ctx->opcode, 17, 1) == 1) + { + gen_multiply(ctx, rs1, rs2, OPC_RH850_MULU_imm9_reg2_reg3); + } + else + { + gen_multiply(ctx, rs1, rs2, OPC_RH850_MUL_imm9_reg2_reg3); + } + break; + } + break; + + case OPC_RH850_FORMAT_XI: // DIV instructions in format XI + formXop = extract32(ctx->opcode, 16, 7); + switch(formXop){ + + case OPC_RH850_DIV_reg1_reg2_reg3: + gen_divide(ctx, rs1, rs2, OPC_RH850_DIV_reg1_reg2_reg3); + //DIV + break; + case OPC_RH850_DIVH_reg1_reg2_reg3: + gen_divide(ctx, rs1, rs2, OPC_RH850_DIVH_reg1_reg2_reg3); + //DIVH 2 + break; + case OPC_RH850_DIVHU_reg1_reg2_reg3: + gen_divide(ctx, rs1, rs2, OPC_RH850_DIVHU_reg1_reg2_reg3); + //DIVHU + break; + + case OPC_RH850_DIVQ: + gen_divide(ctx, rs1, rs2, OPC_RH850_DIV_reg1_reg2_reg3); + //DIVQ => using DIV implementation, will be changed if needed + break; + case OPC_RH850_DIVQU: + gen_divide(ctx, rs1, rs2, OPC_RH850_DIVU_reg1_reg2_reg3); + //DIVQU => using DIVU implementation, will be changed if needed + break; + case OPC_RH850_DIVU_reg1_reg2_reg3: + gen_divide(ctx, rs1, rs2, OPC_RH850_DIVU_reg1_reg2_reg3); + //DIVU + break; + } + break; + + case OPC_RH850_FORMAT_XII: // for opcode = 0110 ; format XII instructions + //excluding MUL and including CMOV + // also LDL.W and STC.W (Format VII) + checkXII = extract32(ctx->opcode, 21, 2); + + switch(checkXII) + { + case 0: + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_CMOV_cccc_imm5_reg2_reg3); + break; + case 1: + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_CMOV_cccc_reg1_reg2_reg3); + break; + case 2: + formXop = extract32(ctx->opcode, 17, 2); + + switch(formXop) + { + case OPC_RH850_BSW_reg2_reg3: + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BSW_reg2_reg3); + break; + case OPC_RH850_BSH_reg2_reg3: + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BSH_reg2_reg3); + break; + case OPC_RH850_HSW_reg2_reg3: + //HSW + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_HSW_reg2_reg3); + break; + case OPC_RH850_HSH_reg2_reg3: + //HSH + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_HSH_reg2_reg3); + break; + } + break; + case 3: //these are SCHOL, SCHOR, SCH1L, SCH1R. Also LDL.W + formXop = extract32(ctx->opcode, 17, 2); + switch(formXop) + { + case OPC_RH850_SCH0R_reg2_reg3: + if (extract32(ctx->opcode, 5, 11) == 0x3F && + extract32(ctx->opcode, 16, 5) == 0x18) + gen_mutual_exclusion(ctx, extract32(ctx->opcode, 27, 5), + rs1, operation_LDL_W); + else + gen_bit_search(ctx, rs2, OPC_RH850_SCH0R_reg2_reg3); + break; + case OPC_RH850_SCH1R_reg2_reg3: + if (extract32(ctx->opcode, 19, 2) == 0x0) + { + gen_bit_search(ctx, rs2, OPC_RH850_SCH1R_reg2_reg3); + } + else if (extract32(ctx->opcode, 5, 11) == 0x3F && + extract32(ctx->opcode, 16, 5) == 0x1a) + gen_mutual_exclusion(ctx, extract32(ctx->opcode, 27, 5), + rs1, operation_STC_W); + break; + case OPC_RH850_SCH0L_reg2_reg3: + gen_bit_search(ctx, rs2, OPC_RH850_SCH0L_reg2_reg3); + break; + case OPC_RH850_SCH1L_reg2_reg3: + gen_bit_search(ctx, rs2, OPC_RH850_SCH1L_reg2_reg3); + break; + } + + } + break; + + case OPC_RH850_ADDIT_ARITH: + formXop = extract32(ctx->opcode, 21, 2); + switch(formXop) + { + + case OPC_RH850_ADF_SATADD3: + if (extract32(ctx->opcode, 16, 5) == 0x1A) + { + gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATADD_reg1_reg2_reg3); + } + else + { + gen_cond_arith(ctx, rs1, rs2, OPC_RH850_ADF_cccc_reg1_reg2_reg3); + } + break; + case OPC_RH850_SBF_SATSUB: + if (extract32(ctx->opcode, 16, 5) == 0x1A) + { + gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATSUB_reg1_reg2_reg3); + } + else + { + gen_cond_arith(ctx, rs1, rs2, OPC_RH850_SBF_cccc_reg1_reg2_reg3); + } + break; + case OPC_RH850_MAC_reg1_reg2_reg3_reg4: + gen_mul_accumulate(ctx, rs1, rs2, OPC_RH850_MAC_reg1_reg2_reg3_reg4); + break; + case OPC_RH850_MACU_reg1_reg2_reg3_reg4: + gen_mul_accumulate(ctx, rs1, rs2, OPC_RH850_MACU_reg1_reg2_reg3_reg4); + break; + } + break; + + /* Floating-point instruction format F:I. */ + case OPC_RH850_FORMAT_FI_CAT0: + { + /* Dispatch to FPU generator (category 0). */ + fpu_decode_cat0_instn(env, ctx); + } + break; + + case OPC_RH850_FORMAT_FI_CAT1: + { + /* Dispatch to FPU generator (category 1). */ + fpu_decode_cat1_instn(env, ctx); + } + break; + } + } + + if (MASK_OP_FORMAT_V_FORMAT_XIII(ctx->opcode) == OPC_RH850_FORMAT_V_XIII){ + if(extract32(ctx->opcode, 16, 1) == 0) + { + uint32_t disp22 = extract32(ctx->opcode, 16, 16) | + (extract32(ctx->opcode, 0, 6) << 16 ); + if( (disp22 & 0x200000) == 0x200000) + { + disp22 = disp22 | (0x3ff << 22); + } + + if (extract32(ctx->opcode, 11, 5) == 0) + { + gen_jmp(ctx, 0, disp22, OPC_RH850_JR_imm22); //JR disp22 + } + else + { + gen_jmp(ctx, 0, disp22, OPC_RH850_JARL_disp22_reg2); + } + } + else + { + if (extract32(ctx->opcode, 11, 5) != 0) + { + //LD.BU + gen_load(ctx, MO_UB, rs2, rs1, (ld_imm & 0xfffe) | extract32(ctx->opcode, 5, 1), 0); + + } + else + { + if (extract32(ctx->opcode, 16, 3) == 0x3){ + gen_special(ctx, env, rs1, rs2, OPC_RH850_PREPARE_list12_imm5_sp); + //PREPARE2 + } + else if (extract32(ctx->opcode, 16, 3) == 0x1){ + gen_special(ctx, env, rs1, rs2, OPC_RH850_PREPARE_list12_imm5); + //PREPARE1 + } + } + } + } + + tcg_temp_free(tcg_ctx, r1); + tcg_temp_free(tcg_ctx, r2); +} + +/* 16-bit RH850 instruction decoding */ +static void decode_RH850_16(CPURH850State *env, DisasContext *ctx) +{ + int rs1; + int rs2; + int cond; + uint32_t op; + uint32_t subOpCheck; + uint32_t imm; + uint32_t disp32 = 0; + + op = MASK_OP_MAJOR(ctx->opcode); + rs1 = GET_RS1(ctx->opcode); // rs1 at bits b0-b4; + rs2 = GET_RS2(ctx->opcode); // rs2 at bits b11-b15; + imm = rs1; + + if((op & 0xf << 7) == OPC_RH850_BCOND ) + { // checking for 4 bit opcode for BCOND + cond = extract32(ctx->opcode, 0, 4); + imm = ( extract32(ctx->opcode, 4, 3) | (extract32(ctx->opcode, 11, 5) << 3)) << 1 ; + + if ( (imm & 0x100) == 0x100){ + imm |= (0x7fffff << 9); + } + gen_branch(env, ctx, cond, rs1, rs2, imm); + + return; + } + + switch(op) + { + case OPC_RH850_16bit_0: + if (rs2 != 0) { + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_MOV_reg1_reg2); + break; + } else { + subOpCheck = MASK_OP_FORMAT_I_0(op); + switch(subOpCheck){ + case OPC_RH850_NOP: + break; + case OPC_RH850_SYNCI: + break; + case OPC_RH850_SYNCE: + break; + case OPC_RH850_SYNCM: + break; + case OPC_RH850_SYNCP: + break; + } + } + break; + + case OPC_RH850_16bit_2: + if (rs2 == 0) + { + if (rs1 == 0) + { + gen_special(ctx, env, rs1, rs2, OPC_RH850_RIE); + break; + } + else + { + gen_special(ctx, env, rs1, rs2, OPC_RH850_SWITCH_reg1); + break; + } + } + else + { + if (rs1 == 0) + { + gen_special(ctx, env, rs1, rs2, OPC_RH850_FETRAP_vector4); + break; + } + else + { + gen_divide(ctx, rs1, rs2, OPC_RH850_DIVH_reg1_reg2); + break; + } + } + break; + + case OPC_RH850_16bit_4: + if (rs2 == 0) + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_ZXB_reg1); + break; + } + else + { + gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATSUBR_reg1_reg2); + break; + } + break; + case OPC_RH850_16bit_5: + if (rs2 == 0) + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SXB_reg1); + break; + } + else + { + gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATSUB_reg1_reg2); + break; + } + break; + case OPC_RH850_16bit_6: + if (rs2 == 0) + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_ZXH_reg1); + break; + } + else + { + gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATADD_reg1_reg2); + break; + } + break; + case OPC_RH850_16bit_7: + if (rs2 == 0) + { + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SXH_reg1); + break; + } + else + { + gen_multiply(ctx, rs1, rs2, OPC_RH850_MULH_reg1_reg2); + break; + } + break; + case OPC_RH850_NOT_reg1_reg2: + gen_logical(ctx, rs1, rs2, OPC_RH850_NOT_reg1_reg2); + break; + // decode properly (handle also case when rs2 != 0), then uncomment + // case OPC_RH850_JMP_DISP: + // JMP opcode: DDDD DDDD DDDD DDDD dddd dddd dddd ddd0 0000 0110 111R RRRR + // disp32 = ctx->opcode >> 16; + + + // this case is already handled in decode_RH850_48() + + case OPC_RH850_16bit_3: + if (rs2 == 0) + { // JMP + gen_jmp(ctx, rs1, disp32, OPC_RH850_JMP_reg1); + break; + } + else + { + if(extract32(rs1,4,1)==1){ + //SLD.HU + gen_load(ctx, MO_TEUW, rs2, 30, extract32(ctx->opcode, 0, 4) << 1, 0); + }else{ + //SLD.BU + gen_load(ctx, MO_UB, rs2, 30, extract32(ctx->opcode, 0, 4), 0); + } + break; + } + break; + case OPC_RH850_OR_reg1_reg2: + gen_logical(ctx, rs1, rs2, OPC_RH850_OR_reg1_reg2); + break; + case OPC_RH850_XOR_reg1_reg2: + gen_logical(ctx, rs1, rs2, OPC_RH850_XOR_reg1_reg2); + break; + case OPC_RH850_AND_reg1_reg2: + gen_logical(ctx, rs1, rs2, OPC_RH850_AND_reg1_reg2); + break; + case OPC_RH850_TST_reg1_reg2: + gen_logical(ctx, rs1, rs2, OPC_RH850_TST_reg1_reg2); + break; + case OPC_RH850_SUBR_reg1_reg2: + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_SUBR_reg1_reg2); + break; + case OPC_RH850_SUB_reg1_reg2: + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_SUB_reg1_reg2); + break; + case OPC_RH850_ADD_reg1_reg2: + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_ADD_reg1_reg2); + break; + case OPC_RH850_CMP_reg1_reg2: + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_CMP_reg1_reg2); + break; + case OPC_RH850_16bit_16: + if (rs2 == 0) + { + gen_special(ctx, env, rs1, rs2, OPC_RH850_CALLT_imm6); + break; + } + else + { + gen_arithmetic(ctx, imm, rs2, OPC_RH850_MOV_imm5_reg2); + break; + } + break; + case OPC_RH850_16bit_17: + if (rs2 == 0) + { + gen_special(ctx, env, rs1, rs2, OPC_RH850_CALLT_imm6); + break; + } + else + { + gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATADD_imm5_reg2); + break; + } + break; + case OPC_RH850_ADD_imm5_reg2: + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_ADD_imm5_reg2); + break; + case OPC_RH850_CMP_imm5_reg2: + gen_arithmetic(ctx, rs1, rs2, OPC_RH850_CMP_imm5_reg2); + break; + case OPC_RH850_SHR_imm5_reg2: + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SHR_imm5_reg2); + break; + case OPC_RH850_SAR_imm5_reg2: + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SAR_imm5_reg2); + break; + case OPC_RH850_SHL_imm5_reg2: + gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SHL_imm5_reg2); + break; + case OPC_RH850_MULH_imm5_reg2: + gen_multiply(ctx, rs1, rs2, OPC_RH850_MULH_imm5_reg2); + break; + } + + //Format IV ; dividing on code bits b7-b10 + uint32_t opIV = (op >> 7); + opIV = opIV << 5; + + switch(opIV) + { + case OPC_RH850_16bit_SLDB: + gen_load(ctx, MO_SB, rs2, 30, extract32(ctx->opcode, 0, 7), 0); + break; + case OPC_RH850_16bit_SLDH: + gen_load(ctx, MO_TESW, rs2, 30, extract32(ctx->opcode, 0, 7) << 1, 0); + break; + case OPC_RH850_16bit_IV10: + if ( extract32(rs1,0,1) == 1 ) { + //SST.W + gen_store(ctx, MO_TEUL, 30, rs2, (extract32(ctx->opcode, 1, 6)) << 2, 0); + /// Note An MAE or MDP exception might occur + /// depending on the result of address calculation. + } + else{ + //SLD.W + gen_load(ctx, MO_TESL, rs2, 30, extract32(ctx->opcode, 1, 6) << 2, 0); + } + break; + case OPC_RH850_16bit_SSTB: + gen_store(ctx, MO_UB, 30, rs2, (extract32(ctx->opcode, 0, 7)), 0); + /// Note An MDP exception might occur depending on the result of address calculation. + break; + case OPC_RH850_16bit_SSTH: + gen_store(ctx, MO_TEUW, 30, rs2, (extract32(ctx->opcode, 0, 7)) << 1, 0); + /// Note An MAE or MDP exception might occur + ///depending on the result of address calculation. + break; + } +} + + +// ################################################################################### +// ################################################################################### +// ################################################################################### + +static void rh850_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + struct uc_struct *uc = cpu->uc; + dc->uc = uc; + + CPURH850State *env = cpu->env_ptr; + dc->env = env; + dc->pc = dc->base.pc_first; +} + +static void rh850_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) +{ +} + +static void rh850_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + TCGContext *tcg_ctx = dc->uc->tcg_ctx; + + tcg_gen_insn_start(tcg_ctx, dc->base.pc_next); +} + +/* + * This f. is called when breakpoint is hit. It should implement + * handling of breakpoint - for example HW breakpoints may be + * handled differently from SW breakpoints (see arm/translate.c). + * However, in RH850 we currently implement only SW breakpoints. + * + * Comment from translator.c: + * The breakpoint_check hook may use DISAS_TOO_MANY to indicate + * that only one more instruction is to be executed. Otherwise + * it should use DISAS_NORETURN when generating an exception, + * but may use a DISAS_TARGET_* value for Something Else. + */ +static bool rh850_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu, + const CPUBreakpoint *bp) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + + gen_exception_debug(dc); + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size below does the right thing. */ + dc->base.pc_next += 2; + dc->base.is_jmp = DISAS_NORETURN; + return true; +} + +/* RH850 instruction translation callback. */ +static void rh850_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + struct uc_struct *uc = dc->uc; + TCGContext *tcg_ctx = uc->tcg_ctx; + TCGOp *tcg_op, *prev_op = NULL; + CPURH850State *env = dc->env; + bool insn_hook = false; + + if (uc_addr_is_exit(dc->uc, dc->base.pc_next)) { + // imitate PGM exception to halt emulation + dcbase->is_jmp = DISAS_UNICORN_HALT; + } + else + { + #if 0 + // Unicorn: trace this instruction on request + if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_CODE, dc->pc)) { + + // Sync PC in advance + tcg_gen_movi_i32(tcg_ctx, cpu_pc, dc->pc); + + // save the last operand + prev_op = tcg_last_op(tcg_ctx); + insn_hook = true; + + gen_uc_tracecode(tcg_ctx, 0xF1F1F1F1, UC_HOOK_CODE_IDX, env->uc, dc->pc); + + // the callback might want to stop emulation immediately + check_exit_request(tcg_ctx); + } + #endif + + dc->opcode = cpu_lduw_code(env, dc->pc); // get opcode from memory + + if ((extract32(dc->opcode, 9, 2) != 0x3) && (extract32(dc->opcode, 5, 11) != 0x17)) { + dc->base.pc_next = dc->pc + 2; + decode_RH850_16(env, dc); //this function includes 32-bit JR and JARL + } else { + dc->opcode = (dc->opcode) | (cpu_lduw_code(env, dc->pc + 2) << 0x10); + if (((extract32(dc->opcode, 6, 11) == 0x41e) && ((extract32(dc->opcode, 17, 2) > 0x1) || + (extract32(dc->opcode, 17, 3) == 0x4))) || + (extract32(dc->opcode, 5, 11) == 0x31) || // 48-bit MOV + (extract32(dc->opcode, 5, 12) == 0x37) || // 48-bit JMP + (extract32(dc->opcode, 5, 11) == 0x17) || // 48-bit JARL & JR + ((extract32(dc->opcode, 5, 11) == 0x3D) && (extract32(dc->opcode, 16, 5) == 0x07)) // 48-bit LD.HU + ) + { + dc->opcode1 = cpu_lduw_code(env, dc->pc + 4); + dc->base.pc_next = dc->pc + 6; + decode_RH850_48(env, dc); + } + else + { + dc->base.pc_next = dc->pc + 4; + decode_RH850_32(env, dc); + } + } + + #if 0 + if (insn_hook) { + // Unicorn: patch the callback to have the proper instruction size. + if (prev_op) { + // As explained further up in the function where prev_op is + // assigned, we move forward in the tail queue, so we're modifying the + // move instruction generated by gen_uc_tracecode() that contains + // the instruction size to assign the proper size (replacing 0xF1F1F1F1). + tcg_op = QTAILQ_NEXT(prev_op, link); + } else { + // this instruction is the first emulated code ever, + // so the instruction operand is the first operand + tcg_op = QTAILQ_FIRST(&tcg_ctx->ops); + } + + tcg_op->args[1] = dc->base.pc_next - dc->pc; + } + #endif + + dc->pc = dc->base.pc_next; + } +} + +static void update_pc_addr(DisasContext *s) +{ + /* psw.addr */ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_pc, s->base.pc_next); +} + +// Emit exit TB code according to base.is_jmp +static void rh850_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + TCGContext *tcg_ctx = dc->uc->tcg_ctx; + + if (dc->base.is_jmp == DISAS_NORETURN) + { + return; + } + if (dc->base.singlestep_enabled) { + if (dc->base.is_jmp == DISAS_NEXT || dc->base.is_jmp == DISAS_TOO_MANY) { + // PC is not loaded inside TB, so we have to do it here in case of + // single stepping + tcg_gen_movi_tl(tcg_ctx, cpu_pc, dc->pc); + } + gen_exception_debug(dc); + } + + switch (dc->base.is_jmp) + { + case DISAS_UNICORN_HALT: + tcg_gen_movi_tl(tcg_ctx, cpu_pc, dc->pc); + gen_exception_halt(dc); + break; + case DISAS_TOO_MANY: + case DISAS_PC_STALE: + case DISAS_PC_STALE_NOCHAIN: + update_pc_addr(dc); + gen_goto_tb_imm(dc, 0, dc->pc); + break; + case DISAS_INDIRECT_JUMP: + /* PC in CPURH850State must have been updated! */ + tcg_gen_lookup_and_goto_ptr(tcg_ctx); + break; + case DISAS_EXIT_TB: + tcg_gen_exit_tb(tcg_ctx, NULL, 0); + break; + case DISAS_NORETURN: + case DISAS_TB_EXIT_ALREADY_GENERATED: + break; + default: + g_assert_not_reached(); + } +} + +static const TranslatorOps rh850_tr_ops = { + .init_disas_context = rh850_tr_init_disas_context, + .tb_start = rh850_tr_tb_start, + .insn_start = rh850_tr_insn_start, + .breakpoint_check = rh850_tr_breakpoint_check, + .translate_insn = rh850_tr_translate_insn, + .tb_stop = rh850_tr_tb_stop, +}; + +/** + * This function translates one translation block (translation block + * is a sequence of instructions without jumps). Translation block + * is the longest translated sequence of instructions. The sequence + * may be shorter, if we are in singlestep mode (1 instruction), if + * breakpoint is detected, ... - see if statements, which break + * while loop below. + */ + +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +{ + DisasContext dc; + translator_loop(&rh850_tr_ops, &dc.base, cpu, tb, max_insns); +} + +void rh850_translate_init(struct uc_struct *uc) +{ + TCGContext *tcg_ctx = uc->tcg_ctx; + int i; + + /* cpu_gpr[0] is a placeholder for the zero register. Do not use it. */ + /* Use the gen_set_gpr and gen_get_gpr helper functions when accessing */ + /* registers, unless you specifically block writes to reg 0 */ + + for (i = 0; i < NUM_GP_REGS; i++) { + cpu_gpr[i] = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, + offsetof(CPURH850State, gpRegs[i]), rh850_gp_regnames[i]); + } + + for (int bankIdx = 0; bankIdx < NUM_SYS_REG_BANKS; bankIdx++) { + for (int regIdx = 0; regIdx < MAX_SYS_REGS_IN_BANK; regIdx++) { + const char *regName = rh850_sys_regnames[bankIdx][regIdx]; + if (regName != NULL) { + cpu_sysRegs[bankIdx][regIdx] = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, + offsetof(CPURH850State, systemRegs[bankIdx][regIdx]), + regName); + } else { + cpu_sysRegs[bankIdx][regIdx] = NULL; // mark register as not present + } + } + } + + for (i = 0; i < 1; i++) { + cpu_sysDatabuffRegs[i] = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, + offsetof(CPURH850State, sysDatabuffRegs[i]), rh850_sys_databuff_regnames[i]); + } + + // PSW register flags + cpu_ZF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, Z_flag), "ZF"); + cpu_SF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, S_flag), "SF"); + cpu_OVF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, OV_flag), "OVF"); + cpu_CYF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, CY_flag), "CYF"); + cpu_SATF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, SAT_flag), "SAT"); + cpu_ID = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, ID_flag), "ID"); + cpu_EP = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, EP_flag), "EP"); + cpu_NP = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, NP_flag), "NP"); + cpu_EBV = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, EBV_flag), "EBV"); + cpu_CU0 = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, CU0_flag), "CU0"); + cpu_CU1 = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, CU1_flag), "CU1"); + cpu_CU2 = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, CU2_flag), "CU2"); + cpu_UM = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, UM_flag), "UM"); + + cpu_pc = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, pc), "pc"); + load_res = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, load_res), "load_res"); + load_val = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, load_val), "load_val"); + + cpu_LLbit = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, cpu_LLbit), "cpu_LLbit"); + cpu_LLAddress = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, cpu_LLAddress), "cpu_LLAddress"); + +} diff --git a/qemu/target/rh850/translate.h b/qemu/target/rh850/translate.h new file mode 100644 index 0000000000..a622c4ce58 --- /dev/null +++ b/qemu/target/rh850/translate.h @@ -0,0 +1,35 @@ +#ifndef _RH850_TRANSLATE_H +#define _RH850_TRANSLATE_H + +#include "cpu.h" +#include "exec/translator.h" +#include "tcg/tcg-op.h" + +/** + * This structure contains data, which is needed to translate a + * sequence of instructions, usually inside one translation + * block. The most important member is therefore 'pc', which + * points to the instruction to be translated. This variable stores + * PC during compile time (guest instructions to TCG instructions). + * We must increment this variable manually during translation + * according to instruction size. + * Note: Consider renaming to TranslationContext, instead of DisasContext, + * because it contains information for translation, not disassembler. + */ +typedef struct DisasContext { + DisasContextBase base; + CPURH850State *env; + target_ulong pc; // pointer to instruction being translated + uint32_t opcode; + uint32_t opcode1; // used for 48 bit instructions + + // Unicorn + struct uc_struct *uc; +} DisasContext; + +void gen_get_gpr(TCGContext *tcg_ctx, TCGv t, int reg_num); +void gen_set_gpr(TCGContext *tcg_ctx, int reg_num_dst, TCGv t); +void gen_set_spr(TCGContext *tcg_ctx, int bank_id, int reg_id, TCGv t); +void gen_get_spr(TCGContext *tcg_ctx, int bank_id, int reg_id, TCGv t); + +#endif /* _RH850_TRANSLATE_H */ \ No newline at end of file diff --git a/qemu/target/rh850/unicorn.c b/qemu/target/rh850/unicorn.c new file mode 100644 index 0000000000..362c92dc8c --- /dev/null +++ b/qemu/target/rh850/unicorn.c @@ -0,0 +1,140 @@ +/* Unicorn Emulator Engine */ +/* By Nguyen Anh Quynh , 2015-2021 */ +/* Modified for Unicorn Engine by Damien Cauquil, 2020 */ + +#include "sysemu/cpus.h" +#include "cpu.h" +#include "unicorn_common.h" +#include "uc_priv.h" +#include "unicorn.h" + +RH850CPU *cpu_rh850_init(struct uc_struct *uc, const char *cpu_model); + +static void rh850_set_pc(struct uc_struct *uc, uint64_t address) +{ + ((CPURH850State *)uc->cpu->env_ptr)->pc = address; +} + +static uint64_t rh850_get_pc(struct uc_struct *uc) +{ + return ((CPURH850State *)uc->cpu->env_ptr)->pc; +} + +static void rh850_release(void *ctx) +{ + + int i; + TCGContext *tcg_ctx = (TCGContext *)ctx; + RH850CPU *cpu = (RH850CPU *)tcg_ctx->uc->cpu; + CPUTLBDesc *d = cpu->neg.tlb.d; + CPUTLBDescFast *f = cpu->neg.tlb.f; + CPUTLBDesc *desc; + CPUTLBDescFast *fast; + + release_common(ctx); + for (i = 0; i < NB_MMU_MODES; i++) { + desc = &(d[i]); + fast = &(f[i]); + g_free(desc->iotlb); + g_free(fast->table); + } +} + +static void reg_reset(struct uc_struct *uc) +{ + CPUArchState *env = uc->cpu->env_ptr; + + memset(env->gpRegs, 0, sizeof(env->gpRegs)); + env->pc = 0; +} + +DEFAULT_VISIBILITY +uc_err reg_read(void *_env, int mode, unsigned int regid, void *value, size_t *size) +{ + int sel_id; + CPURH850State *env = _env; + uc_err ret = UC_ERR_ARG; + + /* PC */ + if (regid == UC_RH850_REG_PC) + { + CHECK_REG_TYPE(uint32_t); + *(uint32_t *)value = env->pc; + } + + /* General purpose register. */ + if ((regid >= UC_RH850_REG_R0) && (regid <= UC_RH850_REG_R31)) + { + CHECK_REG_TYPE(uint32_t); + *(uint32_t *)value = env->gpRegs[regid]; + } + + /* System registers. */ + if ((regid >= UC_RH850_SYSREG_SELID0) && (regid < (UC_RH850_SYSREG_SELID7 + 32))) + { + CHECK_REG_TYPE(uint32_t); + sel_id = (regid - 32)/32; + *(uint32_t *)value = env->systemRegs[sel_id][regid % 32]; + } + + return ret; +} + + +DEFAULT_VISIBILITY +uc_err reg_write(void *_env, int mode, unsigned int regid, const void *value, size_t *size, int *setpc) +{ + int sel_id; + CPURH850State *env = _env; + uc_err ret = UC_ERR_ARG; + + /* PC */ + if (regid == UC_RH850_REG_PC) + { + CHECK_REG_TYPE(uint32_t); + env->pc = *(uint32_t *)value; + *setpc = 1; + } + + /* General purpose register. */ + if ((regid >= UC_RH850_REG_R0) && (regid <= UC_RH850_REG_R31)) + { + CHECK_REG_TYPE(uint32_t); + env->gpRegs[regid] = *(uint32_t *)value; + } + + /* System registers. */ + if ((regid >= UC_RH850_SYSREG_SELID0) && (regid <= (UC_RH850_SYSREG_SELID7 + 32))) + { + CHECK_REG_TYPE(uint32_t); + sel_id = (regid - 32)/32; + env->systemRegs[sel_id][regid % 32] = *(uint32_t *)value; + } + + return ret; +} + +static int rh850_cpus_init(struct uc_struct *uc, const char *cpu_model) +{ + RH850CPU *cpu; + + cpu = cpu_rh850_init(uc, cpu_model); + if (cpu == NULL) { + return -1; + } + return 0; +} + +DEFAULT_VISIBILITY +void rh850_uc_init(struct uc_struct *uc) +{ + uc->reg_read = reg_read; + uc->reg_write = reg_write; + uc->reg_reset = reg_reset; + uc->release = rh850_release; + uc->set_pc = rh850_set_pc; + uc->get_pc = rh850_get_pc; + uc->cpus_init = rh850_cpus_init; + uc->cpu_context_size = offsetof(CPURH850State, uc); + uc_common_init(uc); +} diff --git a/qemu/target/rh850/unicorn.h b/qemu/target/rh850/unicorn.h new file mode 100644 index 0000000000..7ce57301a4 --- /dev/null +++ b/qemu/target/rh850/unicorn.h @@ -0,0 +1,16 @@ +/* Unicorn Emulator Engine */ +/* By Damien Cauquil , 2023 */ + +#ifndef UC_QEMU_TARGET_RH850_H +#define UC_QEMU_TARGET_RH850_H + +// functions to read & write registers +uc_err reg_read_rh850(void *_env, int mode, unsigned int regid, void *value, + size_t *size); +uc_err reg_write_rh850(void *_env, int mode, unsigned int regid, const void *value, + size_t *size, int *setpc); + +void reg_reset_rh850(struct uc_struct *uc); + +void rh850_uc_init(struct uc_struct *uc); +#endif diff --git a/samples/Makefile b/samples/Makefile index cbb3d91fb7..b896aecb43 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -93,6 +93,9 @@ endif ifneq (,$(findstring tricore,$(UNICORN_ARCHS))) SOURCES += sample_tricore.c endif +ifneq (,$(findstring avr,$(UNICORN_ARCHS))) +SOURCES += sample_avr.c +endif BINS = $(SOURCES:.c=$(BIN_EXT)) OBJS = $(SOURCES:.c=.o) diff --git a/samples/sample_avr.c b/samples/sample_avr.c new file mode 100644 index 0000000000..7482bbc204 --- /dev/null +++ b/samples/sample_avr.c @@ -0,0 +1,131 @@ +/* + Created for Unicorn Engine by Glenn Baker , 2024 +*/ + +/* Sample code to demonstrate how to emulate AVR code */ + +#include +#include +#include + +// Code to be emulated +static const uint32_t CODE_BASE = 0x0000; +static const uint8_t CODE[] = + "\x86\x0f" // add r24, r22 + "\x97\x1f" // adc r25, r23 + "\x88\x0f" // add r24, r24 + "\x99\x1f" // adc r25, r25 + "\x01\x96" // adiw r24, 0x01 + "\x08\x95" // ret + ; +enum { + CODE_SIZE = sizeof(CODE) - 1, + CODE_SIZE_ALIGNED = (CODE_SIZE + 0xff) & -0x100, +}; + +static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, + void *user_data) +{ + printf(">>> Tracing basic block at 0x%" PRIx64 ", block size = 0x%x\n", + address, size); +} + +static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, + void *user_data) +{ + printf(">>> Tracing instruction at 0x%" PRIx64 + ", instruction size = 0x%x\n", + address, size); +} + +static bool is_error(uc_err err, const char *what) +{ + if (err != UC_ERR_OK) { + fprintf(stderr, "error: failed on %s() with error %u: %s\n", + what, err, uc_strerror(err)); + return true; + } + return false; +} + +static bool test_avr(void) +{ + uc_engine *uc = NULL; + uc_hook trace1, trace2; + bool success = false; + + uint8_t regs[32]; + int reg_ids[32]; + void *reg_vals[32]; + int i; + + printf("Emulate AVR code\n"); + do { + // Initialize emulator in AVR mode + uc_err err = uc_open(UC_ARCH_AVR, UC_MODE_LITTLE_ENDIAN, &uc); + if (is_error(err, "uc_open")) + break; + + // Map program code + err = uc_mem_map(uc, CODE_BASE, CODE_SIZE_ALIGNED, UC_PROT_READ|UC_PROT_EXEC); + if (is_error(err, "uc_mem_map")) + break; + + // Write machine code to be emulated to memory + err = uc_mem_write(uc, CODE_BASE, CODE, CODE_SIZE); + if (is_error(err, "uc_mem_write")) + break; + + // Tracing all basic blocks with customized callback + err = uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0); + if (is_error(err, "uc_hook_add[UC_HOOK_BLOCK]")) + break; + + // Tracing one instruction at CODE_BASE with customized callback + err = uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code, NULL, CODE_BASE, + CODE_BASE + 1); + if (is_error(err, "uc_hook_add[UC_HOOK_CODE]")) + break; + + // Initialize registers + memset(regs, 0, sizeof(regs)); + regs[25] = 0; regs[24] = 1; + regs[23] = 0; regs[22] = 2; + + for (i = 0; i < 4; i++) { + reg_ids[i] = UC_AVR_REG_R0 + 22 + i; + reg_vals[i] = ®s[22 + i]; + } + err = uc_reg_write_batch(uc, reg_ids, reg_vals, 4); + if (is_error(err, "uc_reg_write_batch")) + break; + + // Emulate machine code in infinite time (last param = 0), or + // when finishing all the code. + err = uc_emu_start(uc, CODE_BASE, CODE_BASE + 4, 0, 0); + if (is_error(err, "uc_emu_start")) + break; + + // now print out some registers + printf(">>> Emulation done. Below is the CPU context\n"); + + uc_reg_read(uc, UC_AVR_REG_R25, ®s[25]); + uc_reg_read(uc, UC_AVR_REG_R24, ®s[24]); + uc_reg_read(uc, UC_AVR_REG_R23, ®s[23]); + uc_reg_read(uc, UC_AVR_REG_R22, ®s[22]); + printf(">>> r25,r24 = 0x%02x%02x\n", regs[25], regs[24]); + if (regs[25] == 0 && regs[24] == 3 && regs[23] == 0 && regs[22] == 2) + success = true; + } while (0); + + if (uc) + uc_close(uc); + return success; +} + +int main(int argc, char **argv, char **envp) +{ + if (!test_avr()) + abort(); + return 0; +} diff --git a/samples/sample_rh850.c b/samples/sample_rh850.c new file mode 100644 index 0000000000..8f74bf5e77 --- /dev/null +++ b/samples/sample_rh850.c @@ -0,0 +1,118 @@ +/* Unicorn Emulator Engine */ +/* By Nguyen Anh Quynh, 2021 */ + +/* Sample code to demonstrate how to emulate S390X code */ + +#include +#include + +// code to be emulated +#define RH850_CODE "\x01\x0e\x06\x00\xc1\x11\x01\x1f\x00\x00\x41\x1f\x00\x00" + +// memory address where emulation starts +#define ADDRESS 0x10000 + +static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, + void *user_data) +{ + printf(">>> Tracing basic block at 0x%" PRIx64 ", block size = 0x%x\n", + address, size); +} + +static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, + void *user_data) +{ + printf(">>> Tracing instruction at 0x%" PRIx64 + ", instruction size = 0x%x\n", + address, size); +} + +static void hook_mem64(uc_engine *uc, uc_mem_type type, uint64_t address, + int size, int64_t value, void *user_data) +{ + uint64_t pc; + switch (type) { + default: + break; + case UC_MEM_READ: + uc_reg_read(uc, UC_RH850_REG_PC, &pc); + printf(">>> Memory read operation at 0x%" PRIx64 "\n", pc); + printf(">>> Memory is being READ at 0x%" PRIx64 ", data size = %u\n", + address, size); + break; + case UC_MEM_WRITE: + uc_reg_read(uc, UC_RH850_REG_PC, &pc); + printf(">>> Memory write operation at 0x%" PRIx64 "\n", pc); + printf(">>> Memory is being WRITE at 0x%" PRIx64 + ", data size = %u, data value = 0x%" PRIx64 "\n", + address, size, value); + break; + } +} + + +static void test_rh850(void) +{ + uc_engine *uc; + uc_hook trace1, trace2, trace3; + uc_err err; + + uint64_t r1 = 0x10000, r2 = 3, r3; + + printf("Emulate RH850 code\n"); + + // Initialize emulator in S390X mode + err = uc_open(UC_ARCH_RH850, UC_MODE_LITTLE_ENDIAN, &uc); + if (err) { + printf("Failed on uc_open() with error returned: %u (%s)\n", err, + uc_strerror(err)); + return; + } + + // map 1MB memory for this emulation + uc_mem_map(uc, ADDRESS, 1024 * 1024, UC_PROT_ALL); + + // write machine code to be emulated to memory + uc_mem_write(uc, ADDRESS, RH850_CODE, sizeof(RH850_CODE) - 1); + + // initialize machine registers + uc_reg_write(uc, UC_RH850_REG_R1, &r1); + uc_reg_write(uc, UC_RH850_REG_R2, &r2); + + // tracing all basic blocks with customized callback + uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0); + + // tracing all instruction + uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code, NULL, 1, 0); + + // tracing mem read + uc_hook_add(uc, &trace3, UC_HOOK_MEM_READ, hook_mem64, NULL, 1, 0); + uc_hook_add(uc, &trace3, UC_HOOK_MEM_WRITE, hook_mem64, NULL, 1, 0); + + // emulate machine code in infinite time (last param = 0), or when + // finishing all the code. + err = uc_emu_start(uc, ADDRESS, ADDRESS + sizeof(RH850_CODE) - 1, 0, 0); + if (err) { + printf("Failed on uc_emu_start() with error returned: %u (%s)\n", err, + uc_strerror(err)); + } + + // now print out some registers + printf(">>> Emulation done. Below is the CPU context\n"); + + uc_reg_read(uc, UC_RH850_REG_R1, &r1); + uc_reg_read(uc, UC_RH850_REG_R2, &r2); + uc_reg_read(uc, UC_RH850_REG_R3, &r3); + + printf(">>> R1 = 0x%" PRIx64 "\t\t>>> R2 = 0x%" PRIx64 "\n", r1, r2); + printf(">>> R3 = 0x%" PRIx64 "\n", r3); + + uc_close(uc); +} + +int main(int argc, char **argv, char **envp) +{ + test_rh850(); + + return 0; +} diff --git a/symbols.sh b/symbols.sh index 4424fb4319..1fb5cdfc7e 100755 --- a/symbols.sh +++ b/symbols.sh @@ -7746,6 +7746,12 @@ tcg_s390_program_interrupt \ tcg_s390_data_exception \ " +rh850_SYMBOLS="restore_state_to_opc \ +helper_tlb_flush \ +helper_uc_rh850_exit \ +gen_intermediate_code \ +" + tricore_SYMBOLS=" helper_fadd \ helper_fsub \ @@ -7759,7 +7765,21 @@ restore_state_to_opc \ helper_uc_tricore_exit \ " -ARCHS="x86_64 arm aarch64 riscv32 riscv64 mips mipsel mips64 mips64el sparc sparc64 m68k ppc ppc64 s390x tricore" +avr_SYMBOLS=" +helper_sleep \ +helper_unsupported \ +helper_debug \ +helper_break \ +helper_inb \ +helper_outb \ +helper_fullrd \ +helper_fullwr \ +helper_wdr \ +gen_intermediate_code \ +restore_state_to_opc \ +" + +ARCHS="x86_64 arm aarch64 riscv32 riscv64 mips mipsel mips64 mips64el rh850 sparc sparc64 m68k ppc ppc64 s390x tricore avr" for arch in $ARCHS; do diff --git a/tests/unit/test_avr.c b/tests/unit/test_avr.c new file mode 100644 index 0000000000..e9e6aecbcc --- /dev/null +++ b/tests/unit/test_avr.c @@ -0,0 +1,268 @@ +#include +#include "unicorn_test.h" + +#define ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0])) + +#define PAGE_SIZE 256 +#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & -PAGE_SIZE) + +enum { + ADDR__init__ = 0x0000, // __init__ + ADDR_test_func = 0x001a, // test_func() + ADDR_test_1 = 0x0030, // test_1() + ADDR_main = 0x0058, // main() + ADDR_abort = 0x0062, // abort() + ADDR_exit = 0x006c, // _exit() + ADDR__stop_program = 0x006e, // __stop_program() + ADDR__data__ = 0x0070, // __data__ + ADDR__data__end = 0x0072, +}; + +enum { + SIZE__init__ = ADDR_test_func - ADDR__init__, + SIZE_test_func = ADDR_test_1 - ADDR_test_func, + SIZE_test_1 = ADDR_main - ADDR_test_1, + SIZE_main = ADDR_abort - ADDR_main, + SIZE_abort = ADDR_exit - ADDR_abort, + SIZE_exit = ADDR__stop_program - ADDR_exit, + SIZE__stop_program = ADDR__data__ - ADDR__stop_program, + SIZE__data__ = ADDR__data__end - ADDR__data__, +}; + +static const uint8_t FLASH[] = + // 00000000 <__ctors_end>: + "\x12\xe0" // ldi r17, 0x02 + "\xa0\xe0" // ldi r26, 0x00 + "\xb2\xe0" // ldi r27, 0x02 + "\xe0\xe7" // ldi r30, 0x70 + "\xf0\xe0" // ldi r31, 0x00 + "\x00\xe0" // ldi r16, 0x00 + "\x0b\xbf" // out 0x3b, r16 + "\x02\xc0" // rjmp .+4 + "\x07\x90" // elpm r0, Z+ + "\x0d\x92" // st X+, r0 + "\xa2\x30" // cpi r26, 0x02 + "\xb1\x07" // cpc r27, r17 + "\xd9\xf7" // brne .-10 + + // 0000001a : + "\x20\x91\x00\x02" // lds r18, 0x0200 + "\x30\x91\x01\x02" // lds r19, 0x0201 + "\x86\x0f" // add r24, r22 + "\x97\x1f" // adc r25, r23 + "\x88\x0f" // add r24, r24 + "\x99\x1f" // adc r25, r25 + "\x82\x0f" // add r24, r18 + "\x93\x1f" // adc r25, r19 + "\x08\x95" // ret + + // 00000030 : + "\x62\xe0" // ldi r22, 0x02 + "\x70\xe0" // ldi r23, 0x00 + "\x81\xe0" // ldi r24, 0x01 + "\x90\xe0" // ldi r25, 0x00 + "\x0e\x94\x0d\x00" // call 0x1a + "\x07\x97" // sbiw r24, 0x07 + "\x11\xf0" // breq .+4 + "\x0e\x94\x31\x00" // call 0x62 + "\x60\xe8" // ldi r22, 0x80 + "\x70\xe0" // ldi r23, 0x00 + "\x80\xe4" // ldi r24, 0x40 + "\x90\xe0" // ldi r25, 0x00 + "\x0e\x94\x0d\x00" // call 0x1a + "\x81\x38" // cpi r24, 0x81 + "\x91\x40" // sbci r25, 0x01 + "\xa9\xf7" // brne .-22 + "\x08\x95" // ret + + // 00000058
: + "\x0e\x94\x18\x00" // call 0x30 + "\x80\xe0" // ldi r24, 0x00 + "\x90\xe0" // ldi r25, 0x00 + "\x08\x95" // ret + + // 00000062 : + "\x81\xe0" // ldi r24, 0x01 + "\x90\xe0" // ldi r25, 0x00 + "\xf8\x94" // cli + "\x0c\x94\x36\x00" // jmp 0x6c + + // 0000006c <_exit>: + "\xf8\x94" // cli + + // 0000006e <__stop_program>: + "\xff\xcf" // rjmp .-2 + + // 0x000070 .data + "\x01\x00" + ; +const uint64_t FLASH_SIZE = sizeof(FLASH); + +const uint64_t MEM_BASE = 0x0200; +const uint64_t MEM_SIZE = 0x0100; + +static void uc_common_setup(uc_engine **uc, uc_cpu_avr cpu_model, + const uint8_t *code, uint64_t code_size) +{ + OK(uc_open(UC_ARCH_AVR, UC_MODE_LITTLE_ENDIAN, uc)); + if (cpu_model != 0) + OK(uc_ctl_set_cpu_model(*uc, cpu_model)); + + OK(uc_mem_map(*uc, UC_AVR_MEM_FLASH, PAGE_ALIGN(code_size), + UC_PROT_READ|UC_PROT_EXEC)); + OK(uc_mem_write(*uc, UC_AVR_MEM_FLASH, code, code_size)); + OK(uc_mem_map(*uc, MEM_BASE, MEM_SIZE, UC_PROT_READ|UC_PROT_WRITE)); +} + +static void test_avr_basic_alu(void) +{ + uc_engine *uc = NULL; + + uint8_t r[32] = {0,}; + uint32_t r_pc; + uint16_t r_func_arg0 = 1, r_func_arg1 = 2, r_func_ret; + r[24] = 1; + r[22] = 2; + + uc_common_setup(&uc, 0, FLASH, FLASH_SIZE); + OK(uc_reg_write(uc, UC_AVR_REG_R24W, &r_func_arg0)); + OK(uc_reg_write(uc, UC_AVR_REG_R22W, &r_func_arg1)); + + const uint64_t code_start = ADDR_test_func + 8; + OK(uc_emu_start(uc, code_start, code_start + 4, 0, 0)); + + OK(uc_reg_read(uc, UC_AVR_REG_PC, &r_pc)); + OK(uc_reg_read(uc, UC_AVR_REG_R25, &r[25])); + OK(uc_reg_read(uc, UC_AVR_REG_R24, &r[24])); + OK(uc_reg_read(uc, UC_AVR_REG_R23, &r[23])); + OK(uc_reg_read(uc, UC_AVR_REG_R22, &r[22])); + + TEST_CHECK(r_pc == code_start + 4); + TEST_CHECK(r[25] == 0 && r[24] == 3); + TEST_CHECK(r[23] == 0 && r[22] == 2); + + OK(uc_reg_read(uc, UC_AVR_REG_R24W, &r_func_ret)); + OK(uc_reg_read(uc, UC_AVR_REG_R22W, &r_func_arg1)); + + TEST_CHECK(r_func_ret == r[24]); + TEST_CHECK(r_func_arg1 == r[22]); + + OK(uc_close(uc)); +} + +typedef struct MEM_HOOK_RESULT_s { + uc_mem_type type; + uint64_t address; + int size; + uint64_t value; +} MEM_HOOK_RESULT; + +typedef struct MEM_HOOK_RESULTS_s { + uint64_t count; + MEM_HOOK_RESULT results[16]; +} MEM_HOOK_RESULTS; + +static bool test_avr_basic_mem_cb_eventmem(uc_engine *uc, uc_mem_type type, + uint64_t address, int size, int64_t value, void *user_data) +{ + MEM_HOOK_RESULTS *const r = user_data; + + uint64_t count = r->count; + if (count >= ARRAY_ELEMS(r->results)) { + TEST_ASSERT(false); + } + + r->results[count].type = type; + r->results[count].address = address; + r->results[count].size = size; + r->results[count].value = value; + r->count++; + return true; +} + +static void test_avr_basic_mem(void) +{ + uc_engine *uc = NULL; + uc_hook eventmem_hook; + MEM_HOOK_RESULTS eventmem_trace = {0}; + + const uint8_t *const DATA = &FLASH[ADDR__data__]; + uint8_t mem[SIZE__data__]; + + uint32_t r_pc; + int i; + + uc_common_setup(&uc, 0, FLASH, FLASH_SIZE); + OK(uc_hook_add(uc, &eventmem_hook, UC_HOOK_MEM_VALID, + test_avr_basic_mem_cb_eventmem, &eventmem_trace, 1, 0)); + + const uint64_t code_start = ADDR__init__; + OK(uc_emu_start(uc, code_start, ADDR__init__ + SIZE__init__, 0, 0)); + + OK(uc_reg_read(uc, UC_AVR_REG_PC, &r_pc)); + TEST_CHECK(r_pc == ADDR__init__ + SIZE__init__); + + // Check SRAM was correctly initialized with data from Flash program memory + OK(uc_mem_read(uc, MEM_BASE, mem, sizeof(mem))); + TEST_CHECK(memcmp(mem, DATA, SIZE__data__) == 0); + + TEST_CHECK(eventmem_trace.count == 2*SIZE__data__); + for (i = 0; i < SIZE__data__; i++) { + const MEM_HOOK_RESULT *const mr = &eventmem_trace.results[2*i]; + TEST_CHECK(mr->type == UC_MEM_READ); + TEST_CHECK(mr->address == (UC_AVR_MEM_FLASH|(ADDR__data__+i))); + TEST_CHECK(mr->size == 1); + TEST_CHECK(mr->value == 0); + + const MEM_HOOK_RESULT *const mw = &eventmem_trace.results[2*i+1]; + TEST_CHECK(mw->type == UC_MEM_WRITE); + TEST_CHECK(mw->address == MEM_BASE+i); + TEST_CHECK(mw->size == 1); + TEST_CHECK(mw->value == DATA[i]); + } + + OK(uc_close(uc)); +} + +static void test_avr_full_exec(void) +{ + uc_engine *uc = NULL; + + uint8_t r[32] = {0,}; + uint32_t r_pc; + uint32_t r_sp; + + uc_common_setup(&uc, 0, FLASH, FLASH_SIZE); + + const uint64_t code_start = ADDR__init__; + OK(uc_emu_start(uc, code_start, ADDR__init__ + SIZE__init__, 0, 0)); + + OK(uc_reg_read(uc, UC_AVR_REG_PC, &r_pc)); + TEST_CHECK(r_pc == ADDR__init__ + SIZE__init__); + + r_sp = MEM_BASE + MEM_SIZE - 1; + OK(uc_reg_write(uc, UC_AVR_REG_SP, &r_sp)); + + const uint64_t exits[] = { + ADDR_main, + ADDR__stop_program + }; + OK(uc_ctl_exits_enable(uc)); + OK(uc_ctl_set_exits(uc, exits, ARRAY_ELEMS(exits))); + + const uint64_t code_main = ADDR_main; + OK(uc_emu_start(uc, code_main, 0, 0, 0)); + + OK(uc_reg_read(uc, UC_AVR_REG_R25, &r[25])); + OK(uc_reg_read(uc, UC_AVR_REG_R24, &r[24])); + TEST_CHECK(r[25] == 0 && r[24] == 0); + + OK(uc_close(uc)); +} + +TEST_LIST = { + {"test_avr_basic_alu", test_avr_basic_alu}, + {"test_avr_basic_mem", test_avr_basic_mem}, + {"test_avr_full_exec", test_avr_full_exec}, + {NULL, NULL} +}; diff --git a/tests/unit/test_rh850.c b/tests/unit/test_rh850.c new file mode 100644 index 0000000000..e02e704167 --- /dev/null +++ b/tests/unit/test_rh850.c @@ -0,0 +1,40 @@ +#include "unicorn_test.h" + +const uint64_t code_start = 0x1000; +const uint64_t code_len = 0x4000; + +static void uc_common_setup(uc_engine **uc, uc_arch arch, uc_mode mode, + const char *code, uint64_t size) +{ + OK(uc_open(arch, mode, uc)); + OK(uc_mem_map(*uc, code_start, code_len, UC_PROT_ALL)); + OK(uc_mem_write(*uc, code_start, code, size)); +} + +static void test_rh850_add(void) +{ + char code[] = "\x01\x0e\x06\x00\xc1\x11"; + uint32_t r1 = 0x1234; + uint32_t r2 = 0x7777; + uint32_t pc; + uc_engine *uc; + + uc_common_setup(&uc, UC_ARCH_RH850, UC_MODE_LITTLE_ENDIAN, code, + sizeof(code) - 1); + OK(uc_reg_write(uc, UC_RH850_REG_R1, &r1)); + OK(uc_reg_write(uc, UC_RH850_REG_R2, &r2)); + + OK(uc_emu_start(uc, code_start, code_start + sizeof(code) - 1, 0, 0)); + + OK(uc_reg_read(uc, UC_RH850_REG_R1, &r1)); + OK(uc_reg_read(uc, UC_RH850_REG_R2, &r2)); + OK(uc_reg_read(uc, UC_RH850_REG_PC, &pc)); + + TEST_CHECK(r1 == 0x1234 + 6); + TEST_CHECK(r2 == 0x89b1); + TEST_CHECK(pc == code_start + sizeof(code) - 1); + + OK(uc_close(uc)); +} + +TEST_LIST = {{"test_rh850_add", test_rh850_add}, {NULL, NULL}}; \ No newline at end of file diff --git a/uc.c b/uc.c index fd29765080..ea415e2171 100644 --- a/uc.c +++ b/uc.c @@ -22,9 +22,11 @@ #include "qemu/target/mips/unicorn.h" #include "qemu/target/sparc/unicorn.h" #include "qemu/target/ppc/unicorn.h" +#include "qemu/target/rh850/unicorn.h" #include "qemu/target/riscv/unicorn.h" #include "qemu/target/s390x/unicorn.h" #include "qemu/target/tricore/unicorn.h" +#include "qemu/target/avr/unicorn.h" #include "qemu/include/tcg/tcg-apple-jit.h" #include "qemu/include/qemu/queue.h" @@ -225,6 +227,10 @@ bool uc_arch_supported(uc_arch arch) case UC_ARCH_X86: return true; #endif +#ifdef UNICORN_HAS_RH850 + case UC_ARCH_RH850: + return true; +#endif #ifdef UNICORN_HAS_RISCV case UC_ARCH_RISCV: return true; @@ -236,6 +242,10 @@ bool uc_arch_supported(uc_arch arch) #ifdef UNICORN_HAS_TRICORE case UC_ARCH_TRICORE: return true; +#endif +#ifdef UNICORN_HAS_AVR + case UC_ARCH_AVR: + return true; #endif /* Invalid or disabled arch */ default: @@ -439,6 +449,15 @@ uc_err uc_open(uc_arch arch, uc_mode mode, uc_engine **result) } break; #endif +#ifdef UNICORN_HAS_RH850 + case UC_ARCH_RH850: + if (mode != UC_MODE_LITTLE_ENDIAN) { + free(uc); + return UC_ERR_MODE; + } + uc->init_arch = rh850_uc_init; + break; +#endif #ifdef UNICORN_HAS_RISCV case UC_ARCH_RISCV: if ((mode & ~UC_MODE_RISCV_MASK) || @@ -473,6 +492,15 @@ uc_err uc_open(uc_arch arch, uc_mode mode, uc_engine **result) } uc->init_arch = uc_init_tricore; break; +#endif +#ifdef UNICORN_HAS_AVR + case UC_ARCH_AVR: + if ((mode & ~UC_MODE_AVR_MASK)) { + free(uc); + return UC_ERR_MODE; + } + uc->init_arch = uc_init_avr; + break; #endif } @@ -1041,6 +1069,11 @@ uc_err uc_emu_start(uc_engine *uc, uint64_t begin, uint64_t until, } break; #endif +#ifdef UNICORN_HAS_RH850 + case UC_ARCH_RH850: + uc_reg_write(uc, UC_RH850_REG_PC, &begin); + break; +#endif #ifdef UNICORN_HAS_RISCV case UC_ARCH_RISCV: if (uc->mode & UC_MODE_RISCV64) { @@ -1059,6 +1092,11 @@ uc_err uc_emu_start(uc_engine *uc, uint64_t begin, uint64_t until, case UC_ARCH_TRICORE: uc_reg_write(uc, UC_TRICORE_REG_PC, &begin_pc32); break; +#endif +#ifdef UNICORN_HAS_AVR + case UC_ARCH_AVR: + uc_reg_write(uc, UC_AVR_REG_PC, &begin_pc32); + break; #endif } @@ -1118,6 +1156,7 @@ uc_err uc_emu_start(uc_engine *uc, uint64_t begin, uint64_t until, if (timeout) { // wait for the timer to finish + printf("Wait VM to finish ...\n"); qemu_thread_join(&uc->timer); } @@ -2280,6 +2319,12 @@ static context_reg_rw_t find_context_reg_rw(uc_arch arch, uc_mode mode) } break; #endif +#ifdef UNICORN_HAS_RH850 + case UC_ARCH_RH850: + rw.read = reg_read_rh850; + rw.write = reg_write_rh850; + break; +#endif #ifdef UNICORN_HAS_RISCV case UC_ARCH_RISCV: if (mode & UC_MODE_RISCV32) { @@ -2302,6 +2347,12 @@ static context_reg_rw_t find_context_reg_rw(uc_arch arch, uc_mode mode) rw.read = reg_read_tricore; rw.write = reg_write_tricore; break; +#endif +#ifdef UNICORN_HAS_AVR + case UC_ARCH_AVR: + rw.read = reg_read_avr; + rw.write = reg_write_avr; + break; #endif } @@ -2731,6 +2782,11 @@ uc_err uc_ctl(uc_engine *uc, uc_control_type control, ...) err = UC_ERR_ARG; break; } + } else if (uc->arch == UC_ARCH_AVR) { + if (!avr_cpu_model_valid(model)) { + err = UC_ERR_ARG; + break; + } } else { err = UC_ERR_ARG; break; From 7f77dcec386abce57a9ca3b9e397813cef13ad84 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sun, 6 Apr 2025 02:59:31 -0400 Subject: [PATCH 4/4] regenerate symbols & add loongarch backend (#1903) (#2148) * regenerate symbols * Squash loongarch --------- Co-authored-by: WangLiangpu --- CMakeLists.txt | 5 + qemu/avr.h | 113 +- qemu/configure | 16 + qemu/include/elf.h | 1 + qemu/rh850.h | 101 +- qemu/tcg/loongarch64/tcg-insn-defs.c.inc | 7004 ++++++++++++++++++++++ qemu/tcg/loongarch64/tcg-target.h | 228 + qemu/tcg/loongarch64/tcg-target.inc.c | 2681 +++++++++ qemu/tcg/loongarch64/tcg-target.opc.h | 3 + 9 files changed, 10106 insertions(+), 46 deletions(-) create mode 100644 qemu/tcg/loongarch64/tcg-insn-defs.c.inc create mode 100644 qemu/tcg/loongarch64/tcg-target.h create mode 100644 qemu/tcg/loongarch64/tcg-target.inc.c create mode 100644 qemu/tcg/loongarch64/tcg-target.opc.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a266c4d76..e6b60aa890 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -279,6 +279,11 @@ else() set(UNICORN_TARGET_ARCH "avr") break() endif() + string(FIND ${UC_COMPILER_MACRO} "loongarch64" UC_RET) + if (${UC_RET} GREATER_EQUAL "0") + set(UNICORN_TARGET_ARCH "loongarch64") + break() + endif() message(FATAL_ERROR "Unknown host compiler: ${CMAKE_C_COMPILER}.") endwhile(TRUE) endif() diff --git a/qemu/avr.h b/qemu/avr.h index bb37176913..a20c033dc7 100644 --- a/qemu/avr.h +++ b/qemu/avr.h @@ -4,6 +4,10 @@ #ifndef UNICORN_ARCH_POSTFIX #define UNICORN_ARCH_POSTFIX _avr #endif +#define unicorn_fill_tlb unicorn_fill_tlb_avr +#define reg_read reg_read_avr +#define reg_write reg_write_avr +#define uc_init uc_init_avr #define uc_add_inline_hook uc_add_inline_hook_avr #define uc_del_inline_hook uc_del_inline_hook_avr #define tb_invalidate_phys_range tb_invalidate_phys_range_avr @@ -38,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_avr #define tcg_gen_shr_i64 tcg_gen_shr_i64_avr #define tcg_gen_st_i64 tcg_gen_st_i64_avr +#define tcg_gen_add_i64 tcg_gen_add_i64_avr +#define tcg_gen_sub_i64 tcg_gen_sub_i64_avr #define tcg_gen_xor_i64 tcg_gen_xor_i64_avr +#define tcg_gen_neg_i64 tcg_gen_neg_i64_avr #define cpu_icount_to_ns cpu_icount_to_ns_avr #define cpu_is_stopped cpu_is_stopped_avr #define cpu_get_ticks cpu_get_ticks_avr @@ -121,7 +128,10 @@ #define memory_map memory_map_avr #define memory_map_io memory_map_io_avr #define memory_map_ptr memory_map_ptr_avr +#define memory_cow memory_cow_avr #define memory_unmap memory_unmap_avr +#define memory_moveout memory_moveout_avr +#define memory_movein memory_movein_avr #define memory_free memory_free_avr #define flatview_unref flatview_unref_avr #define address_space_get_flatview address_space_get_flatview_avr @@ -140,7 +150,9 @@ #define memory_region_get_ram_addr memory_region_get_ram_addr_avr #define memory_region_add_subregion memory_region_add_subregion_avr #define memory_region_del_subregion memory_region_del_subregion_avr +#define memory_region_add_subregion_overlap memory_region_add_subregion_overlap_avr #define memory_region_find memory_region_find_avr +#define memory_region_filter_subregions memory_region_filter_subregions_avr #define memory_listener_register memory_listener_register_avr #define memory_listener_unregister memory_listener_unregister_avr #define address_space_remove_listeners address_space_remove_listeners_avr @@ -148,6 +160,7 @@ #define address_space_destroy address_space_destroy_avr #define memory_region_init_ram memory_region_init_ram_avr #define memory_mapping_list_add_merge_sorted memory_mapping_list_add_merge_sorted_avr +#define find_memory_mapping find_memory_mapping_avr #define exec_inline_op exec_inline_op_avr #define floatx80_default_nan floatx80_default_nan_avr #define float_raise float_raise_avr @@ -364,6 +377,8 @@ #define floatx80_sub floatx80_sub_avr #define floatx80_mul floatx80_mul_avr #define floatx80_div floatx80_div_avr +#define floatx80_modrem floatx80_modrem_avr +#define floatx80_mod floatx80_mod_avr #define floatx80_rem floatx80_rem_avr #define floatx80_sqrt floatx80_sqrt_avr #define floatx80_eq floatx80_eq_avr @@ -638,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_avr #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_avr #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_avr +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_avr #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_avr #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_avr #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_avr @@ -692,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_avr #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_avr #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_avr +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_avr +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_avr #define tcg_gen_gvec_sari tcg_gen_gvec_sari_avr +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_avr +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_avr #define tcg_gen_gvec_shls tcg_gen_gvec_shls_avr #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_avr #define tcg_gen_gvec_sars tcg_gen_gvec_sars_avr +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_avr #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_avr #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_avr #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_avr +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_avr +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_avr #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_avr #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_avr #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_avr @@ -735,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_avr #define tcg_gen_shri_vec tcg_gen_shri_vec_avr #define tcg_gen_sari_vec tcg_gen_sari_vec_avr +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_avr +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_avr #define tcg_gen_cmp_vec tcg_gen_cmp_vec_avr #define tcg_gen_add_vec tcg_gen_add_vec_avr #define tcg_gen_sub_vec tcg_gen_sub_vec_avr @@ -750,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_avr #define tcg_gen_shrv_vec tcg_gen_shrv_vec_avr #define tcg_gen_sarv_vec tcg_gen_sarv_vec_avr +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_avr +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_avr #define tcg_gen_shls_vec tcg_gen_shls_vec_avr #define tcg_gen_shrs_vec tcg_gen_shrs_vec_avr #define tcg_gen_sars_vec tcg_gen_sars_vec_avr +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_avr #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_avr #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_avr #define tb_htable_lookup tb_htable_lookup_avr @@ -764,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_avr #define cpu_loop_exit_atomic cpu_loop_exit_atomic_avr #define tlb_init tlb_init_avr +#define tlb_destroy tlb_destroy_avr #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_avr #define tlb_flush tlb_flush_avr #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_avr @@ -784,6 +813,7 @@ #define tlb_set_page tlb_set_page_avr #define get_page_addr_code_hostp get_page_addr_code_hostp_avr #define get_page_addr_code get_page_addr_code_avr +#define probe_access_flags probe_access_flags_avr #define probe_access probe_access_avr #define tlb_vaddr_to_host tlb_vaddr_to_host_avr #define helper_ret_ldub_mmu helper_ret_ldub_mmu_avr @@ -800,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_avr #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_avr #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_avr -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_avr -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_avr -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_avr -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_avr +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_avr +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_avr +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_avr +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_avr +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_avr +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_avr +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_avr +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_avr #define cpu_ldub_data_ra cpu_ldub_data_ra_avr #define cpu_ldsb_data_ra cpu_ldsb_data_ra_avr -#define cpu_lduw_data_ra cpu_lduw_data_ra_avr -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_avr -#define cpu_ldl_data_ra cpu_ldl_data_ra_avr -#define cpu_ldq_data_ra cpu_ldq_data_ra_avr +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_avr +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_avr +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_avr +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_avr +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_avr +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_avr +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_avr +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_avr #define cpu_ldub_data cpu_ldub_data_avr #define cpu_ldsb_data cpu_ldsb_data_avr -#define cpu_lduw_data cpu_lduw_data_avr -#define cpu_ldsw_data cpu_ldsw_data_avr -#define cpu_ldl_data cpu_ldl_data_avr -#define cpu_ldq_data cpu_ldq_data_avr +#define cpu_lduw_be_data cpu_lduw_be_data_avr +#define cpu_lduw_le_data cpu_lduw_le_data_avr +#define cpu_ldsw_be_data cpu_ldsw_be_data_avr +#define cpu_ldsw_le_data cpu_ldsw_le_data_avr +#define cpu_ldl_be_data cpu_ldl_be_data_avr +#define cpu_ldl_le_data cpu_ldl_le_data_avr +#define cpu_ldq_le_data cpu_ldq_le_data_avr +#define cpu_ldq_be_data cpu_ldq_be_data_avr #define helper_ret_stb_mmu helper_ret_stb_mmu_avr #define helper_le_stw_mmu helper_le_stw_mmu_avr #define helper_be_stw_mmu helper_be_stw_mmu_avr @@ -824,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_avr #define helper_be_stq_mmu helper_be_stq_mmu_avr #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_avr -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_avr -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_avr -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_avr +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_avr +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_avr +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_avr +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_avr +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_avr +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_avr #define cpu_stb_data_ra cpu_stb_data_ra_avr -#define cpu_stw_data_ra cpu_stw_data_ra_avr -#define cpu_stl_data_ra cpu_stl_data_ra_avr -#define cpu_stq_data_ra cpu_stq_data_ra_avr +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_avr +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_avr +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_avr +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_avr +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_avr +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_avr #define cpu_stb_data cpu_stb_data_avr -#define cpu_stw_data cpu_stw_data_avr -#define cpu_stl_data cpu_stl_data_avr -#define cpu_stq_data cpu_stq_data_avr +#define cpu_stw_be_data cpu_stw_be_data_avr +#define cpu_stw_le_data cpu_stw_le_data_avr +#define cpu_stl_be_data cpu_stl_be_data_avr +#define cpu_stl_le_data cpu_stl_le_data_avr +#define cpu_stq_be_data cpu_stq_be_data_avr +#define cpu_stq_le_data cpu_stq_le_data_avr #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_avr #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_avr #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_avr @@ -1091,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_avr #define cpu_ldl_code cpu_ldl_code_avr #define cpu_ldq_code cpu_ldq_code_avr +#define cpu_interrupt_handler cpu_interrupt_handler_avr #define helper_div_i32 helper_div_i32_avr #define helper_rem_i32 helper_rem_i32_avr #define helper_divu_i32 helper_divu_i32_avr @@ -1175,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_avr #define helper_gvec_sar32i helper_gvec_sar32i_avr #define helper_gvec_sar64i helper_gvec_sar64i_avr +#define helper_gvec_rotl8i helper_gvec_rotl8i_avr +#define helper_gvec_rotl16i helper_gvec_rotl16i_avr +#define helper_gvec_rotl32i helper_gvec_rotl32i_avr +#define helper_gvec_rotl64i helper_gvec_rotl64i_avr #define helper_gvec_shl8v helper_gvec_shl8v_avr #define helper_gvec_shl16v helper_gvec_shl16v_avr #define helper_gvec_shl32v helper_gvec_shl32v_avr @@ -1187,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_avr #define helper_gvec_sar32v helper_gvec_sar32v_avr #define helper_gvec_sar64v helper_gvec_sar64v_avr +#define helper_gvec_rotl8v helper_gvec_rotl8v_avr +#define helper_gvec_rotl16v helper_gvec_rotl16v_avr +#define helper_gvec_rotl32v helper_gvec_rotl32v_avr +#define helper_gvec_rotl64v helper_gvec_rotl64v_avr +#define helper_gvec_rotr8v helper_gvec_rotr8v_avr +#define helper_gvec_rotr16v helper_gvec_rotr16v_avr +#define helper_gvec_rotr32v helper_gvec_rotr32v_avr +#define helper_gvec_rotr64v helper_gvec_rotr64v_avr #define helper_gvec_eq8 helper_gvec_eq8_avr #define helper_gvec_ne8 helper_gvec_ne8_avr #define helper_gvec_lt8 helper_gvec_lt8_avr @@ -1279,6 +1343,9 @@ #define gen_helper_vfp_set_fpscr gen_helper_vfp_set_fpscr_avr #define gen_helper_cpsr_read gen_helper_cpsr_read_avr #define gen_helper_cpsr_write gen_helper_cpsr_write_avr +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_avr +#define helper_stqcx_le_parallel helper_stqcx_le_parallel_avr +#define helper_stqcx_be_parallel helper_stqcx_be_parallel_avr #define helper_sleep helper_sleep_avr #define helper_unsupported helper_unsupported_avr #define helper_debug helper_debug_avr @@ -1290,8 +1357,4 @@ #define helper_wdr helper_wdr_avr #define gen_intermediate_code gen_intermediate_code_avr #define restore_state_to_opc restore_state_to_opc_avr - -#define reg_read reg_read_avr -#define reg_write reg_write_avr -#define uc_init uc_init_avr #endif diff --git a/qemu/configure b/qemu/configure index cc5752292f..3085467646 100755 --- a/qemu/configure +++ b/qemu/configure @@ -498,6 +498,8 @@ elif check_define __tricore__ ; then cpu="tricore" elif check_define __AVR__ ; then cpu="avr" +elif check_define __loongarch64 ; then + cpu="loongarch64" else cpu=$(uname -m) fi @@ -545,6 +547,10 @@ case "$cpu" in cpu="avr" supported_cpu="yes" ;; + loongarch64) + cpu="loongarch64" + supported_cpu="yes" + ;; *) # This will result in either an error or falling back to TCI later ARCH=unknown @@ -859,6 +865,11 @@ case "$cpu" in CPU_CFLAGS="-m64 -mcx16" QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS" ;; + loongarch*) + CPU_CFLAGS="" + QEMU_LDFLAGS=" $QEMU_LDFLAGS" + ;; + x32) CPU_CFLAGS="-mx32" QEMU_LDFLAGS="-mx32 $QEMU_LDFLAGS" @@ -2680,6 +2691,11 @@ case "$target_name" in mttcg="yes" TARGET_SYSTBL_ABI=i386 ;; + loongarch64) + mttcg="yes" + TARGET_ARCH=loongarch64 + TARGET_SYSTBL_ABI=common,64 + ;; x86_64) TARGET_BASE_ARCH=i386 TARGET_SYSTBL_ABI=common,64 diff --git a/qemu/include/elf.h b/qemu/include/elf.h index 5b06b55f28..a6bec3d674 100644 --- a/qemu/include/elf.h +++ b/qemu/include/elf.h @@ -176,6 +176,7 @@ typedef struct mips_elf_abiflags_v0 { #define EM_NANOMIPS 249 /* Wave Computing nanoMIPS */ +#define EM_LOONGARCH 258 /* LoongArch */ /* * This is an interim value that we will use until the committee comes * up with a final number. diff --git a/qemu/rh850.h b/qemu/rh850.h index 071393cb7c..f0ba0cabf3 100644 --- a/qemu/rh850.h +++ b/qemu/rh850.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_rh850 #define tcg_gen_shr_i64 tcg_gen_shr_i64_rh850 #define tcg_gen_st_i64 tcg_gen_st_i64_rh850 +#define tcg_gen_add_i64 tcg_gen_add_i64_rh850 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_rh850 #define tcg_gen_xor_i64 tcg_gen_xor_i64_rh850 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_rh850 #define cpu_icount_to_ns cpu_icount_to_ns_rh850 #define cpu_is_stopped cpu_is_stopped_rh850 #define cpu_get_ticks cpu_get_ticks_rh850 @@ -54,6 +57,7 @@ #define vm_start vm_start_rh850 #define address_space_dispatch_compact address_space_dispatch_compact_rh850 #define flatview_translate flatview_translate_rh850 +#define flatview_copy flatview_copy_rh850 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_rh850 #define qemu_get_cpu qemu_get_cpu_rh850 #define cpu_address_space_init cpu_address_space_init_rh850 @@ -90,6 +94,7 @@ #define iotlb_to_section iotlb_to_section_rh850 #define address_space_dispatch_new address_space_dispatch_new_rh850 #define address_space_dispatch_free address_space_dispatch_free_rh850 +#define address_space_dispatch_clear address_space_dispatch_clear_rh850 #define flatview_read_continue flatview_read_continue_rh850 #define address_space_read_full address_space_read_full_rh850 #define address_space_write address_space_write_rh850 @@ -372,6 +377,8 @@ #define floatx80_sub floatx80_sub_rh850 #define floatx80_mul floatx80_mul_rh850 #define floatx80_div floatx80_div_rh850 +#define floatx80_modrem floatx80_modrem_rh850 +#define floatx80_mod floatx80_mod_rh850 #define floatx80_rem floatx80_rem_rh850 #define floatx80_sqrt floatx80_sqrt_rh850 #define floatx80_eq floatx80_eq_rh850 @@ -646,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_rh850 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_rh850 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_rh850 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_rh850 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_rh850 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_rh850 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_rh850 @@ -700,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_rh850 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_rh850 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_rh850 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_rh850 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_rh850 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_rh850 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_rh850 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_rh850 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_rh850 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_rh850 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_rh850 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_rh850 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_rh850 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_rh850 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_rh850 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_rh850 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_rh850 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_rh850 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_rh850 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_rh850 @@ -743,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_rh850 #define tcg_gen_shri_vec tcg_gen_shri_vec_rh850 #define tcg_gen_sari_vec tcg_gen_sari_vec_rh850 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_rh850 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_rh850 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_rh850 #define tcg_gen_add_vec tcg_gen_add_vec_rh850 #define tcg_gen_sub_vec tcg_gen_sub_vec_rh850 @@ -758,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_rh850 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_rh850 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_rh850 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_rh850 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_rh850 #define tcg_gen_shls_vec tcg_gen_shls_vec_rh850 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_rh850 #define tcg_gen_sars_vec tcg_gen_sars_vec_rh850 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_rh850 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_rh850 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_rh850 #define tb_htable_lookup tb_htable_lookup_rh850 @@ -772,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_rh850 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_rh850 #define tlb_init tlb_init_rh850 +#define tlb_destroy tlb_destroy_rh850 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_rh850 #define tlb_flush tlb_flush_rh850 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_rh850 @@ -792,6 +813,7 @@ #define tlb_set_page tlb_set_page_rh850 #define get_page_addr_code_hostp get_page_addr_code_hostp_rh850 #define get_page_addr_code get_page_addr_code_rh850 +#define probe_access_flags probe_access_flags_rh850 #define probe_access probe_access_rh850 #define tlb_vaddr_to_host tlb_vaddr_to_host_rh850 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_rh850 @@ -808,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_rh850 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_rh850 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_rh850 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_rh850 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_rh850 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_rh850 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_rh850 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_rh850 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_rh850 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_rh850 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_rh850 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_rh850 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_rh850 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_rh850 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_rh850 #define cpu_ldub_data_ra cpu_ldub_data_ra_rh850 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_rh850 -#define cpu_lduw_data_ra cpu_lduw_data_ra_rh850 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_rh850 -#define cpu_ldl_data_ra cpu_ldl_data_ra_rh850 -#define cpu_ldq_data_ra cpu_ldq_data_ra_rh850 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_rh850 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_rh850 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_rh850 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_rh850 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_rh850 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_rh850 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_rh850 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_rh850 #define cpu_ldub_data cpu_ldub_data_rh850 #define cpu_ldsb_data cpu_ldsb_data_rh850 -#define cpu_lduw_data cpu_lduw_data_rh850 -#define cpu_ldsw_data cpu_ldsw_data_rh850 -#define cpu_ldl_data cpu_ldl_data_rh850 -#define cpu_ldq_data cpu_ldq_data_rh850 +#define cpu_lduw_be_data cpu_lduw_be_data_rh850 +#define cpu_lduw_le_data cpu_lduw_le_data_rh850 +#define cpu_ldsw_be_data cpu_ldsw_be_data_rh850 +#define cpu_ldsw_le_data cpu_ldsw_le_data_rh850 +#define cpu_ldl_be_data cpu_ldl_be_data_rh850 +#define cpu_ldl_le_data cpu_ldl_le_data_rh850 +#define cpu_ldq_le_data cpu_ldq_le_data_rh850 +#define cpu_ldq_be_data cpu_ldq_be_data_rh850 #define helper_ret_stb_mmu helper_ret_stb_mmu_rh850 #define helper_le_stw_mmu helper_le_stw_mmu_rh850 #define helper_be_stw_mmu helper_be_stw_mmu_rh850 @@ -832,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_rh850 #define helper_be_stq_mmu helper_be_stq_mmu_rh850 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_rh850 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_rh850 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_rh850 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_rh850 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_rh850 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_rh850 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_rh850 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_rh850 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_rh850 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_rh850 #define cpu_stb_data_ra cpu_stb_data_ra_rh850 -#define cpu_stw_data_ra cpu_stw_data_ra_rh850 -#define cpu_stl_data_ra cpu_stl_data_ra_rh850 -#define cpu_stq_data_ra cpu_stq_data_ra_rh850 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_rh850 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_rh850 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_rh850 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_rh850 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_rh850 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_rh850 #define cpu_stb_data cpu_stb_data_rh850 -#define cpu_stw_data cpu_stw_data_rh850 -#define cpu_stl_data cpu_stl_data_rh850 -#define cpu_stq_data cpu_stq_data_rh850 +#define cpu_stw_be_data cpu_stw_be_data_rh850 +#define cpu_stw_le_data cpu_stw_le_data_rh850 +#define cpu_stl_be_data cpu_stl_be_data_rh850 +#define cpu_stl_le_data cpu_stl_le_data_rh850 +#define cpu_stq_be_data cpu_stq_be_data_rh850 +#define cpu_stq_le_data cpu_stq_le_data_rh850 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_rh850 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_rh850 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_rh850 @@ -1099,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_rh850 #define cpu_ldl_code cpu_ldl_code_rh850 #define cpu_ldq_code cpu_ldq_code_rh850 +#define cpu_interrupt_handler cpu_interrupt_handler_rh850 #define helper_div_i32 helper_div_i32_rh850 #define helper_rem_i32 helper_rem_i32_rh850 #define helper_divu_i32 helper_divu_i32_rh850 @@ -1183,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_rh850 #define helper_gvec_sar32i helper_gvec_sar32i_rh850 #define helper_gvec_sar64i helper_gvec_sar64i_rh850 +#define helper_gvec_rotl8i helper_gvec_rotl8i_rh850 +#define helper_gvec_rotl16i helper_gvec_rotl16i_rh850 +#define helper_gvec_rotl32i helper_gvec_rotl32i_rh850 +#define helper_gvec_rotl64i helper_gvec_rotl64i_rh850 #define helper_gvec_shl8v helper_gvec_shl8v_rh850 #define helper_gvec_shl16v helper_gvec_shl16v_rh850 #define helper_gvec_shl32v helper_gvec_shl32v_rh850 @@ -1195,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_rh850 #define helper_gvec_sar32v helper_gvec_sar32v_rh850 #define helper_gvec_sar64v helper_gvec_sar64v_rh850 +#define helper_gvec_rotl8v helper_gvec_rotl8v_rh850 +#define helper_gvec_rotl16v helper_gvec_rotl16v_rh850 +#define helper_gvec_rotl32v helper_gvec_rotl32v_rh850 +#define helper_gvec_rotl64v helper_gvec_rotl64v_rh850 +#define helper_gvec_rotr8v helper_gvec_rotr8v_rh850 +#define helper_gvec_rotr16v helper_gvec_rotr16v_rh850 +#define helper_gvec_rotr32v helper_gvec_rotr32v_rh850 +#define helper_gvec_rotr64v helper_gvec_rotr64v_rh850 #define helper_gvec_eq8 helper_gvec_eq8_rh850 #define helper_gvec_ne8 helper_gvec_ne8_rh850 #define helper_gvec_lt8 helper_gvec_lt8_rh850 @@ -1287,6 +1343,9 @@ #define gen_helper_vfp_set_fpscr gen_helper_vfp_set_fpscr_rh850 #define gen_helper_cpsr_read gen_helper_cpsr_read_rh850 #define gen_helper_cpsr_write gen_helper_cpsr_write_rh850 +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_rh850 +#define helper_stqcx_le_parallel helper_stqcx_le_parallel_rh850 +#define helper_stqcx_be_parallel helper_stqcx_be_parallel_rh850 #define restore_state_to_opc restore_state_to_opc_rh850 #define helper_tlb_flush helper_tlb_flush_rh850 #define helper_uc_rh850_exit helper_uc_rh850_exit_rh850 diff --git a/qemu/tcg/loongarch64/tcg-insn-defs.c.inc b/qemu/tcg/loongarch64/tcg-insn-defs.c.inc new file mode 100644 index 0000000000..ee3b483b02 --- /dev/null +++ b/qemu/tcg/loongarch64/tcg-insn-defs.c.inc @@ -0,0 +1,7004 @@ +/* SPDX-License-Identifier: MIT */ +/* + * LoongArch instruction formats, opcodes, and encoders for TCG use. + * + * This file is auto-generated by genqemutcgdefs from + * https://github.com/loongson-community/loongarch-opcodes, + * from commit 8027da9a8157a8b47fc48ff1def292e09c5668bd. + * DO NOT EDIT. + */ + +typedef enum { + OPC_CLZ_W = 0x00001400, + OPC_CTZ_W = 0x00001c00, + OPC_CLZ_D = 0x00002400, + OPC_CTZ_D = 0x00002c00, + OPC_REVB_2H = 0x00003000, + OPC_REVB_2W = 0x00003800, + OPC_REVB_D = 0x00003c00, + OPC_SEXT_H = 0x00005800, + OPC_SEXT_B = 0x00005c00, + OPC_ADD_W = 0x00100000, + OPC_ADD_D = 0x00108000, + OPC_SUB_W = 0x00110000, + OPC_SUB_D = 0x00118000, + OPC_SLT = 0x00120000, + OPC_SLTU = 0x00128000, + OPC_MASKEQZ = 0x00130000, + OPC_MASKNEZ = 0x00138000, + OPC_NOR = 0x00140000, + OPC_AND = 0x00148000, + OPC_OR = 0x00150000, + OPC_XOR = 0x00158000, + OPC_ORN = 0x00160000, + OPC_ANDN = 0x00168000, + OPC_SLL_W = 0x00170000, + OPC_SRL_W = 0x00178000, + OPC_SRA_W = 0x00180000, + OPC_SLL_D = 0x00188000, + OPC_SRL_D = 0x00190000, + OPC_SRA_D = 0x00198000, + OPC_ROTR_W = 0x001b0000, + OPC_ROTR_D = 0x001b8000, + OPC_MUL_W = 0x001c0000, + OPC_MULH_W = 0x001c8000, + OPC_MULH_WU = 0x001d0000, + OPC_MUL_D = 0x001d8000, + OPC_MULH_D = 0x001e0000, + OPC_MULH_DU = 0x001e8000, + OPC_DIV_W = 0x00200000, + OPC_MOD_W = 0x00208000, + OPC_DIV_WU = 0x00210000, + OPC_MOD_WU = 0x00218000, + OPC_DIV_D = 0x00220000, + OPC_MOD_D = 0x00228000, + OPC_DIV_DU = 0x00230000, + OPC_MOD_DU = 0x00238000, + OPC_SLLI_W = 0x00408000, + OPC_SLLI_D = 0x00410000, + OPC_SRLI_W = 0x00448000, + OPC_SRLI_D = 0x00450000, + OPC_SRAI_W = 0x00488000, + OPC_SRAI_D = 0x00490000, + OPC_ROTRI_W = 0x004c8000, + OPC_ROTRI_D = 0x004d0000, + OPC_BSTRINS_W = 0x00600000, + OPC_BSTRPICK_W = 0x00608000, + OPC_BSTRINS_D = 0x00800000, + OPC_BSTRPICK_D = 0x00c00000, + OPC_SLTI = 0x02000000, + OPC_SLTUI = 0x02400000, + OPC_ADDI_W = 0x02800000, + OPC_ADDI_D = 0x02c00000, + OPC_CU52I_D = 0x03000000, + OPC_ANDI = 0x03400000, + OPC_ORI = 0x03800000, + OPC_XORI = 0x03c00000, + OPC_VFMADD_S = 0x09100000, + OPC_VFMADD_D = 0x09200000, + OPC_VFMSUB_S = 0x09500000, + OPC_VFMSUB_D = 0x09600000, + OPC_VFNMADD_S = 0x09900000, + OPC_VFNMADD_D = 0x09a00000, + OPC_VFNMSUB_S = 0x09d00000, + OPC_VFNMSUB_D = 0x09e00000, + OPC_VFCMP_CAF_S = 0x0c500000, + OPC_VFCMP_SAF_S = 0x0c508000, + OPC_VFCMP_CLT_S = 0x0c510000, + OPC_VFCMP_SLT_S = 0x0c518000, + OPC_VFCMP_CEQ_S = 0x0c520000, + OPC_VFCMP_SEQ_S = 0x0c528000, + OPC_VFCMP_CLE_S = 0x0c530000, + OPC_VFCMP_SLE_S = 0x0c538000, + OPC_VFCMP_CUN_S = 0x0c540000, + OPC_VFCMP_SUN_S = 0x0c548000, + OPC_VFCMP_CULT_S = 0x0c550000, + OPC_VFCMP_SULT_S = 0x0c558000, + OPC_VFCMP_CUEQ_S = 0x0c560000, + OPC_VFCMP_SUEQ_S = 0x0c568000, + OPC_VFCMP_CULE_S = 0x0c570000, + OPC_VFCMP_SULE_S = 0x0c578000, + OPC_VFCMP_CNE_S = 0x0c580000, + OPC_VFCMP_SNE_S = 0x0c588000, + OPC_VFCMP_COR_S = 0x0c5a0000, + OPC_VFCMP_SOR_S = 0x0c5a8000, + OPC_VFCMP_CUNE_S = 0x0c5c0000, + OPC_VFCMP_SUNE_S = 0x0c5c8000, + OPC_VFCMP_CAF_D = 0x0c600000, + OPC_VFCMP_SAF_D = 0x0c608000, + OPC_VFCMP_CLT_D = 0x0c610000, + OPC_VFCMP_SLT_D = 0x0c618000, + OPC_VFCMP_CEQ_D = 0x0c620000, + OPC_VFCMP_SEQ_D = 0x0c628000, + OPC_VFCMP_CLE_D = 0x0c630000, + OPC_VFCMP_SLE_D = 0x0c638000, + OPC_VFCMP_CUN_D = 0x0c640000, + OPC_VFCMP_SUN_D = 0x0c648000, + OPC_VFCMP_CULT_D = 0x0c650000, + OPC_VFCMP_SULT_D = 0x0c658000, + OPC_VFCMP_CUEQ_D = 0x0c660000, + OPC_VFCMP_SUEQ_D = 0x0c668000, + OPC_VFCMP_CULE_D = 0x0c670000, + OPC_VFCMP_SULE_D = 0x0c678000, + OPC_VFCMP_CNE_D = 0x0c680000, + OPC_VFCMP_SNE_D = 0x0c688000, + OPC_VFCMP_COR_D = 0x0c6a0000, + OPC_VFCMP_SOR_D = 0x0c6a8000, + OPC_VFCMP_CUNE_D = 0x0c6c0000, + OPC_VFCMP_SUNE_D = 0x0c6c8000, + OPC_VBITSEL_V = 0x0d100000, + OPC_VSHUF_B = 0x0d500000, + OPC_ADDU16I_D = 0x10000000, + OPC_LU12I_W = 0x14000000, + OPC_CU32I_D = 0x16000000, + OPC_PCADDU2I = 0x18000000, + OPC_PCALAU12I = 0x1a000000, + OPC_PCADDU12I = 0x1c000000, + OPC_PCADDU18I = 0x1e000000, + OPC_LD_B = 0x28000000, + OPC_LD_H = 0x28400000, + OPC_LD_W = 0x28800000, + OPC_LD_D = 0x28c00000, + OPC_ST_B = 0x29000000, + OPC_ST_H = 0x29400000, + OPC_ST_W = 0x29800000, + OPC_ST_D = 0x29c00000, + OPC_LD_BU = 0x2a000000, + OPC_LD_HU = 0x2a400000, + OPC_LD_WU = 0x2a800000, + OPC_VLD = 0x2c000000, + OPC_VST = 0x2c400000, + OPC_VLDREPL_D = 0x30100000, + OPC_VLDREPL_W = 0x30200000, + OPC_VLDREPL_H = 0x30400000, + OPC_VLDREPL_B = 0x30800000, + OPC_VSTELM_D = 0x31100000, + OPC_VSTELM_W = 0x31200000, + OPC_VSTELM_H = 0x31400000, + OPC_VSTELM_B = 0x31800000, + OPC_LDX_B = 0x38000000, + OPC_LDX_H = 0x38040000, + OPC_LDX_W = 0x38080000, + OPC_LDX_D = 0x380c0000, + OPC_STX_B = 0x38100000, + OPC_STX_H = 0x38140000, + OPC_STX_W = 0x38180000, + OPC_STX_D = 0x381c0000, + OPC_LDX_BU = 0x38200000, + OPC_LDX_HU = 0x38240000, + OPC_LDX_WU = 0x38280000, + OPC_VLDX = 0x38400000, + OPC_VSTX = 0x38440000, + OPC_DBAR = 0x38720000, + OPC_JIRL = 0x4c000000, + OPC_B = 0x50000000, + OPC_BL = 0x54000000, + OPC_BEQ = 0x58000000, + OPC_BNE = 0x5c000000, + OPC_BGT = 0x60000000, + OPC_BLE = 0x64000000, + OPC_BGTU = 0x68000000, + OPC_BLEU = 0x6c000000, + OPC_VSEQ_B = 0x70000000, + OPC_VSEQ_H = 0x70008000, + OPC_VSEQ_W = 0x70010000, + OPC_VSEQ_D = 0x70018000, + OPC_VSLE_B = 0x70020000, + OPC_VSLE_H = 0x70028000, + OPC_VSLE_W = 0x70030000, + OPC_VSLE_D = 0x70038000, + OPC_VSLE_BU = 0x70040000, + OPC_VSLE_HU = 0x70048000, + OPC_VSLE_WU = 0x70050000, + OPC_VSLE_DU = 0x70058000, + OPC_VSLT_B = 0x70060000, + OPC_VSLT_H = 0x70068000, + OPC_VSLT_W = 0x70070000, + OPC_VSLT_D = 0x70078000, + OPC_VSLT_BU = 0x70080000, + OPC_VSLT_HU = 0x70088000, + OPC_VSLT_WU = 0x70090000, + OPC_VSLT_DU = 0x70098000, + OPC_VADD_B = 0x700a0000, + OPC_VADD_H = 0x700a8000, + OPC_VADD_W = 0x700b0000, + OPC_VADD_D = 0x700b8000, + OPC_VSUB_B = 0x700c0000, + OPC_VSUB_H = 0x700c8000, + OPC_VSUB_W = 0x700d0000, + OPC_VSUB_D = 0x700d8000, + OPC_VADDWEV_H_B = 0x701e0000, + OPC_VADDWEV_W_H = 0x701e8000, + OPC_VADDWEV_D_W = 0x701f0000, + OPC_VADDWEV_Q_D = 0x701f8000, + OPC_VSUBWEV_H_B = 0x70200000, + OPC_VSUBWEV_W_H = 0x70208000, + OPC_VSUBWEV_D_W = 0x70210000, + OPC_VSUBWEV_Q_D = 0x70218000, + OPC_VADDWOD_H_B = 0x70220000, + OPC_VADDWOD_W_H = 0x70228000, + OPC_VADDWOD_D_W = 0x70230000, + OPC_VADDWOD_Q_D = 0x70238000, + OPC_VSUBWOD_H_B = 0x70240000, + OPC_VSUBWOD_W_H = 0x70248000, + OPC_VSUBWOD_D_W = 0x70250000, + OPC_VSUBWOD_Q_D = 0x70258000, + OPC_VADDWEV_H_BU = 0x702e0000, + OPC_VADDWEV_W_HU = 0x702e8000, + OPC_VADDWEV_D_WU = 0x702f0000, + OPC_VADDWEV_Q_DU = 0x702f8000, + OPC_VSUBWEV_H_BU = 0x70300000, + OPC_VSUBWEV_W_HU = 0x70308000, + OPC_VSUBWEV_D_WU = 0x70310000, + OPC_VSUBWEV_Q_DU = 0x70318000, + OPC_VADDWOD_H_BU = 0x70320000, + OPC_VADDWOD_W_HU = 0x70328000, + OPC_VADDWOD_D_WU = 0x70330000, + OPC_VADDWOD_Q_DU = 0x70338000, + OPC_VSUBWOD_H_BU = 0x70340000, + OPC_VSUBWOD_W_HU = 0x70348000, + OPC_VSUBWOD_D_WU = 0x70350000, + OPC_VSUBWOD_Q_DU = 0x70358000, + OPC_VADDWEV_H_BU_B = 0x703e0000, + OPC_VADDWEV_W_HU_H = 0x703e8000, + OPC_VADDWEV_D_WU_W = 0x703f0000, + OPC_VADDWEV_Q_DU_D = 0x703f8000, + OPC_VADDWOD_H_BU_B = 0x70400000, + OPC_VADDWOD_W_HU_H = 0x70408000, + OPC_VADDWOD_D_WU_W = 0x70410000, + OPC_VADDWOD_Q_DU_D = 0x70418000, + OPC_VSADD_B = 0x70460000, + OPC_VSADD_H = 0x70468000, + OPC_VSADD_W = 0x70470000, + OPC_VSADD_D = 0x70478000, + OPC_VSSUB_B = 0x70480000, + OPC_VSSUB_H = 0x70488000, + OPC_VSSUB_W = 0x70490000, + OPC_VSSUB_D = 0x70498000, + OPC_VSADD_BU = 0x704a0000, + OPC_VSADD_HU = 0x704a8000, + OPC_VSADD_WU = 0x704b0000, + OPC_VSADD_DU = 0x704b8000, + OPC_VSSUB_BU = 0x704c0000, + OPC_VSSUB_HU = 0x704c8000, + OPC_VSSUB_WU = 0x704d0000, + OPC_VSSUB_DU = 0x704d8000, + OPC_VHADDW_H_B = 0x70540000, + OPC_VHADDW_W_H = 0x70548000, + OPC_VHADDW_D_W = 0x70550000, + OPC_VHADDW_Q_D = 0x70558000, + OPC_VHSUBW_H_B = 0x70560000, + OPC_VHSUBW_W_H = 0x70568000, + OPC_VHSUBW_D_W = 0x70570000, + OPC_VHSUBW_Q_D = 0x70578000, + OPC_VHADDW_HU_BU = 0x70580000, + OPC_VHADDW_WU_HU = 0x70588000, + OPC_VHADDW_DU_WU = 0x70590000, + OPC_VHADDW_QU_DU = 0x70598000, + OPC_VHSUBW_HU_BU = 0x705a0000, + OPC_VHSUBW_WU_HU = 0x705a8000, + OPC_VHSUBW_DU_WU = 0x705b0000, + OPC_VHSUBW_QU_DU = 0x705b8000, + OPC_VADDA_B = 0x705c0000, + OPC_VADDA_H = 0x705c8000, + OPC_VADDA_W = 0x705d0000, + OPC_VADDA_D = 0x705d8000, + OPC_VABSD_B = 0x70600000, + OPC_VABSD_H = 0x70608000, + OPC_VABSD_W = 0x70610000, + OPC_VABSD_D = 0x70618000, + OPC_VABSD_BU = 0x70620000, + OPC_VABSD_HU = 0x70628000, + OPC_VABSD_WU = 0x70630000, + OPC_VABSD_DU = 0x70638000, + OPC_VAVG_B = 0x70640000, + OPC_VAVG_H = 0x70648000, + OPC_VAVG_W = 0x70650000, + OPC_VAVG_D = 0x70658000, + OPC_VAVG_BU = 0x70660000, + OPC_VAVG_HU = 0x70668000, + OPC_VAVG_WU = 0x70670000, + OPC_VAVG_DU = 0x70678000, + OPC_VAVGR_B = 0x70680000, + OPC_VAVGR_H = 0x70688000, + OPC_VAVGR_W = 0x70690000, + OPC_VAVGR_D = 0x70698000, + OPC_VAVGR_BU = 0x706a0000, + OPC_VAVGR_HU = 0x706a8000, + OPC_VAVGR_WU = 0x706b0000, + OPC_VAVGR_DU = 0x706b8000, + OPC_VMAX_B = 0x70700000, + OPC_VMAX_H = 0x70708000, + OPC_VMAX_W = 0x70710000, + OPC_VMAX_D = 0x70718000, + OPC_VMIN_B = 0x70720000, + OPC_VMIN_H = 0x70728000, + OPC_VMIN_W = 0x70730000, + OPC_VMIN_D = 0x70738000, + OPC_VMAX_BU = 0x70740000, + OPC_VMAX_HU = 0x70748000, + OPC_VMAX_WU = 0x70750000, + OPC_VMAX_DU = 0x70758000, + OPC_VMIN_BU = 0x70760000, + OPC_VMIN_HU = 0x70768000, + OPC_VMIN_WU = 0x70770000, + OPC_VMIN_DU = 0x70778000, + OPC_VMUL_B = 0x70840000, + OPC_VMUL_H = 0x70848000, + OPC_VMUL_W = 0x70850000, + OPC_VMUL_D = 0x70858000, + OPC_VMUH_B = 0x70860000, + OPC_VMUH_H = 0x70868000, + OPC_VMUH_W = 0x70870000, + OPC_VMUH_D = 0x70878000, + OPC_VMUH_BU = 0x70880000, + OPC_VMUH_HU = 0x70888000, + OPC_VMUH_WU = 0x70890000, + OPC_VMUH_DU = 0x70898000, + OPC_VMULWEV_H_B = 0x70900000, + OPC_VMULWEV_W_H = 0x70908000, + OPC_VMULWEV_D_W = 0x70910000, + OPC_VMULWEV_Q_D = 0x70918000, + OPC_VMULWOD_H_B = 0x70920000, + OPC_VMULWOD_W_H = 0x70928000, + OPC_VMULWOD_D_W = 0x70930000, + OPC_VMULWOD_Q_D = 0x70938000, + OPC_VMULWEV_H_BU = 0x70980000, + OPC_VMULWEV_W_HU = 0x70988000, + OPC_VMULWEV_D_WU = 0x70990000, + OPC_VMULWEV_Q_DU = 0x70998000, + OPC_VMULWOD_H_BU = 0x709a0000, + OPC_VMULWOD_W_HU = 0x709a8000, + OPC_VMULWOD_D_WU = 0x709b0000, + OPC_VMULWOD_Q_DU = 0x709b8000, + OPC_VMULWEV_H_BU_B = 0x70a00000, + OPC_VMULWEV_W_HU_H = 0x70a08000, + OPC_VMULWEV_D_WU_W = 0x70a10000, + OPC_VMULWEV_Q_DU_D = 0x70a18000, + OPC_VMULWOD_H_BU_B = 0x70a20000, + OPC_VMULWOD_W_HU_H = 0x70a28000, + OPC_VMULWOD_D_WU_W = 0x70a30000, + OPC_VMULWOD_Q_DU_D = 0x70a38000, + OPC_VMADD_B = 0x70a80000, + OPC_VMADD_H = 0x70a88000, + OPC_VMADD_W = 0x70a90000, + OPC_VMADD_D = 0x70a98000, + OPC_VMSUB_B = 0x70aa0000, + OPC_VMSUB_H = 0x70aa8000, + OPC_VMSUB_W = 0x70ab0000, + OPC_VMSUB_D = 0x70ab8000, + OPC_VMADDWEV_H_B = 0x70ac0000, + OPC_VMADDWEV_W_H = 0x70ac8000, + OPC_VMADDWEV_D_W = 0x70ad0000, + OPC_VMADDWEV_Q_D = 0x70ad8000, + OPC_VMADDWOD_H_B = 0x70ae0000, + OPC_VMADDWOD_W_H = 0x70ae8000, + OPC_VMADDWOD_D_W = 0x70af0000, + OPC_VMADDWOD_Q_D = 0x70af8000, + OPC_VMADDWEV_H_BU = 0x70b40000, + OPC_VMADDWEV_W_HU = 0x70b48000, + OPC_VMADDWEV_D_WU = 0x70b50000, + OPC_VMADDWEV_Q_DU = 0x70b58000, + OPC_VMADDWOD_H_BU = 0x70b60000, + OPC_VMADDWOD_W_HU = 0x70b68000, + OPC_VMADDWOD_D_WU = 0x70b70000, + OPC_VMADDWOD_Q_DU = 0x70b78000, + OPC_VMADDWEV_H_BU_B = 0x70bc0000, + OPC_VMADDWEV_W_HU_H = 0x70bc8000, + OPC_VMADDWEV_D_WU_W = 0x70bd0000, + OPC_VMADDWEV_Q_DU_D = 0x70bd8000, + OPC_VMADDWOD_H_BU_B = 0x70be0000, + OPC_VMADDWOD_W_HU_H = 0x70be8000, + OPC_VMADDWOD_D_WU_W = 0x70bf0000, + OPC_VMADDWOD_Q_DU_D = 0x70bf8000, + OPC_VDIV_B = 0x70e00000, + OPC_VDIV_H = 0x70e08000, + OPC_VDIV_W = 0x70e10000, + OPC_VDIV_D = 0x70e18000, + OPC_VMOD_B = 0x70e20000, + OPC_VMOD_H = 0x70e28000, + OPC_VMOD_W = 0x70e30000, + OPC_VMOD_D = 0x70e38000, + OPC_VDIV_BU = 0x70e40000, + OPC_VDIV_HU = 0x70e48000, + OPC_VDIV_WU = 0x70e50000, + OPC_VDIV_DU = 0x70e58000, + OPC_VMOD_BU = 0x70e60000, + OPC_VMOD_HU = 0x70e68000, + OPC_VMOD_WU = 0x70e70000, + OPC_VMOD_DU = 0x70e78000, + OPC_VSLL_B = 0x70e80000, + OPC_VSLL_H = 0x70e88000, + OPC_VSLL_W = 0x70e90000, + OPC_VSLL_D = 0x70e98000, + OPC_VSRL_B = 0x70ea0000, + OPC_VSRL_H = 0x70ea8000, + OPC_VSRL_W = 0x70eb0000, + OPC_VSRL_D = 0x70eb8000, + OPC_VSRA_B = 0x70ec0000, + OPC_VSRA_H = 0x70ec8000, + OPC_VSRA_W = 0x70ed0000, + OPC_VSRA_D = 0x70ed8000, + OPC_VROTR_B = 0x70ee0000, + OPC_VROTR_H = 0x70ee8000, + OPC_VROTR_W = 0x70ef0000, + OPC_VROTR_D = 0x70ef8000, + OPC_VSRLR_B = 0x70f00000, + OPC_VSRLR_H = 0x70f08000, + OPC_VSRLR_W = 0x70f10000, + OPC_VSRLR_D = 0x70f18000, + OPC_VSRAR_B = 0x70f20000, + OPC_VSRAR_H = 0x70f28000, + OPC_VSRAR_W = 0x70f30000, + OPC_VSRAR_D = 0x70f38000, + OPC_VSRLN_B_H = 0x70f48000, + OPC_VSRLN_H_W = 0x70f50000, + OPC_VSRLN_W_D = 0x70f58000, + OPC_VSRAN_B_H = 0x70f68000, + OPC_VSRAN_H_W = 0x70f70000, + OPC_VSRAN_W_D = 0x70f78000, + OPC_VSRLRN_B_H = 0x70f88000, + OPC_VSRLRN_H_W = 0x70f90000, + OPC_VSRLRN_W_D = 0x70f98000, + OPC_VSRARN_B_H = 0x70fa8000, + OPC_VSRARN_H_W = 0x70fb0000, + OPC_VSRARN_W_D = 0x70fb8000, + OPC_VSSRLN_B_H = 0x70fc8000, + OPC_VSSRLN_H_W = 0x70fd0000, + OPC_VSSRLN_W_D = 0x70fd8000, + OPC_VSSRAN_B_H = 0x70fe8000, + OPC_VSSRAN_H_W = 0x70ff0000, + OPC_VSSRAN_W_D = 0x70ff8000, + OPC_VSSRLRN_B_H = 0x71008000, + OPC_VSSRLRN_H_W = 0x71010000, + OPC_VSSRLRN_W_D = 0x71018000, + OPC_VSSRARN_B_H = 0x71028000, + OPC_VSSRARN_H_W = 0x71030000, + OPC_VSSRARN_W_D = 0x71038000, + OPC_VSSRLN_BU_H = 0x71048000, + OPC_VSSRLN_HU_W = 0x71050000, + OPC_VSSRLN_WU_D = 0x71058000, + OPC_VSSRAN_BU_H = 0x71068000, + OPC_VSSRAN_HU_W = 0x71070000, + OPC_VSSRAN_WU_D = 0x71078000, + OPC_VSSRLRN_BU_H = 0x71088000, + OPC_VSSRLRN_HU_W = 0x71090000, + OPC_VSSRLRN_WU_D = 0x71098000, + OPC_VSSRARN_BU_H = 0x710a8000, + OPC_VSSRARN_HU_W = 0x710b0000, + OPC_VSSRARN_WU_D = 0x710b8000, + OPC_VBITCLR_B = 0x710c0000, + OPC_VBITCLR_H = 0x710c8000, + OPC_VBITCLR_W = 0x710d0000, + OPC_VBITCLR_D = 0x710d8000, + OPC_VBITSET_B = 0x710e0000, + OPC_VBITSET_H = 0x710e8000, + OPC_VBITSET_W = 0x710f0000, + OPC_VBITSET_D = 0x710f8000, + OPC_VBITREV_B = 0x71100000, + OPC_VBITREV_H = 0x71108000, + OPC_VBITREV_W = 0x71110000, + OPC_VBITREV_D = 0x71118000, + OPC_VPACKEV_B = 0x71160000, + OPC_VPACKEV_H = 0x71168000, + OPC_VPACKEV_W = 0x71170000, + OPC_VPACKEV_D = 0x71178000, + OPC_VPACKOD_B = 0x71180000, + OPC_VPACKOD_H = 0x71188000, + OPC_VPACKOD_W = 0x71190000, + OPC_VPACKOD_D = 0x71198000, + OPC_VILVL_B = 0x711a0000, + OPC_VILVL_H = 0x711a8000, + OPC_VILVL_W = 0x711b0000, + OPC_VILVL_D = 0x711b8000, + OPC_VILVH_B = 0x711c0000, + OPC_VILVH_H = 0x711c8000, + OPC_VILVH_W = 0x711d0000, + OPC_VILVH_D = 0x711d8000, + OPC_VPICKEV_B = 0x711e0000, + OPC_VPICKEV_H = 0x711e8000, + OPC_VPICKEV_W = 0x711f0000, + OPC_VPICKEV_D = 0x711f8000, + OPC_VPICKOD_B = 0x71200000, + OPC_VPICKOD_H = 0x71208000, + OPC_VPICKOD_W = 0x71210000, + OPC_VPICKOD_D = 0x71218000, + OPC_VREPLVE_B = 0x71220000, + OPC_VREPLVE_H = 0x71228000, + OPC_VREPLVE_W = 0x71230000, + OPC_VREPLVE_D = 0x71238000, + OPC_VAND_V = 0x71260000, + OPC_VOR_V = 0x71268000, + OPC_VXOR_V = 0x71270000, + OPC_VNOR_V = 0x71278000, + OPC_VANDN_V = 0x71280000, + OPC_VORN_V = 0x71288000, + OPC_VFRSTP_B = 0x712b0000, + OPC_VFRSTP_H = 0x712b8000, + OPC_VADD_Q = 0x712d0000, + OPC_VSUB_Q = 0x712d8000, + OPC_VSIGNCOV_B = 0x712e0000, + OPC_VSIGNCOV_H = 0x712e8000, + OPC_VSIGNCOV_W = 0x712f0000, + OPC_VSIGNCOV_D = 0x712f8000, + OPC_VFADD_S = 0x71308000, + OPC_VFADD_D = 0x71310000, + OPC_VFSUB_S = 0x71328000, + OPC_VFSUB_D = 0x71330000, + OPC_VFMUL_S = 0x71388000, + OPC_VFMUL_D = 0x71390000, + OPC_VFDIV_S = 0x713a8000, + OPC_VFDIV_D = 0x713b0000, + OPC_VFMAX_S = 0x713c8000, + OPC_VFMAX_D = 0x713d0000, + OPC_VFMIN_S = 0x713e8000, + OPC_VFMIN_D = 0x713f0000, + OPC_VFMAXA_S = 0x71408000, + OPC_VFMAXA_D = 0x71410000, + OPC_VFMINA_S = 0x71428000, + OPC_VFMINA_D = 0x71430000, + OPC_VFCVT_H_S = 0x71460000, + OPC_VFCVT_S_D = 0x71468000, + OPC_VFFINT_S_L = 0x71480000, + OPC_VFTINT_W_D = 0x71498000, + OPC_VFTINTRM_W_D = 0x714a0000, + OPC_VFTINTRP_W_D = 0x714a8000, + OPC_VFTINTRZ_W_D = 0x714b0000, + OPC_VFTINTRNE_W_D = 0x714b8000, + OPC_VSHUF_H = 0x717a8000, + OPC_VSHUF_W = 0x717b0000, + OPC_VSHUF_D = 0x717b8000, + OPC_VSEQI_B = 0x72800000, + OPC_VSEQI_H = 0x72808000, + OPC_VSEQI_W = 0x72810000, + OPC_VSEQI_D = 0x72818000, + OPC_VSLEI_B = 0x72820000, + OPC_VSLEI_H = 0x72828000, + OPC_VSLEI_W = 0x72830000, + OPC_VSLEI_D = 0x72838000, + OPC_VSLEI_BU = 0x72840000, + OPC_VSLEI_HU = 0x72848000, + OPC_VSLEI_WU = 0x72850000, + OPC_VSLEI_DU = 0x72858000, + OPC_VSLTI_B = 0x72860000, + OPC_VSLTI_H = 0x72868000, + OPC_VSLTI_W = 0x72870000, + OPC_VSLTI_D = 0x72878000, + OPC_VSLTI_BU = 0x72880000, + OPC_VSLTI_HU = 0x72888000, + OPC_VSLTI_WU = 0x72890000, + OPC_VSLTI_DU = 0x72898000, + OPC_VADDI_BU = 0x728a0000, + OPC_VADDI_HU = 0x728a8000, + OPC_VADDI_WU = 0x728b0000, + OPC_VADDI_DU = 0x728b8000, + OPC_VSUBI_BU = 0x728c0000, + OPC_VSUBI_HU = 0x728c8000, + OPC_VSUBI_WU = 0x728d0000, + OPC_VSUBI_DU = 0x728d8000, + OPC_VBSLL_V = 0x728e0000, + OPC_VBSRL_V = 0x728e8000, + OPC_VMAXI_B = 0x72900000, + OPC_VMAXI_H = 0x72908000, + OPC_VMAXI_W = 0x72910000, + OPC_VMAXI_D = 0x72918000, + OPC_VMINI_B = 0x72920000, + OPC_VMINI_H = 0x72928000, + OPC_VMINI_W = 0x72930000, + OPC_VMINI_D = 0x72938000, + OPC_VMAXI_BU = 0x72940000, + OPC_VMAXI_HU = 0x72948000, + OPC_VMAXI_WU = 0x72950000, + OPC_VMAXI_DU = 0x72958000, + OPC_VMINI_BU = 0x72960000, + OPC_VMINI_HU = 0x72968000, + OPC_VMINI_WU = 0x72970000, + OPC_VMINI_DU = 0x72978000, + OPC_VFRSTPI_B = 0x729a0000, + OPC_VFRSTPI_H = 0x729a8000, + OPC_VCLO_B = 0x729c0000, + OPC_VCLO_H = 0x729c0400, + OPC_VCLO_W = 0x729c0800, + OPC_VCLO_D = 0x729c0c00, + OPC_VCLZ_B = 0x729c1000, + OPC_VCLZ_H = 0x729c1400, + OPC_VCLZ_W = 0x729c1800, + OPC_VCLZ_D = 0x729c1c00, + OPC_VPCNT_B = 0x729c2000, + OPC_VPCNT_H = 0x729c2400, + OPC_VPCNT_W = 0x729c2800, + OPC_VPCNT_D = 0x729c2c00, + OPC_VNEG_B = 0x729c3000, + OPC_VNEG_H = 0x729c3400, + OPC_VNEG_W = 0x729c3800, + OPC_VNEG_D = 0x729c3c00, + OPC_VMSKLTZ_B = 0x729c4000, + OPC_VMSKLTZ_H = 0x729c4400, + OPC_VMSKLTZ_W = 0x729c4800, + OPC_VMSKLTZ_D = 0x729c4c00, + OPC_VMSKGEZ_B = 0x729c5000, + OPC_VMSKNZ_B = 0x729c6000, + OPC_VSETEQZ_V = 0x729c9800, + OPC_VSETNEZ_V = 0x729c9c00, + OPC_VSETANYEQZ_B = 0x729ca000, + OPC_VSETANYEQZ_H = 0x729ca400, + OPC_VSETANYEQZ_W = 0x729ca800, + OPC_VSETANYEQZ_D = 0x729cac00, + OPC_VSETALLNEZ_B = 0x729cb000, + OPC_VSETALLNEZ_H = 0x729cb400, + OPC_VSETALLNEZ_W = 0x729cb800, + OPC_VSETALLNEZ_D = 0x729cbc00, + OPC_VFLOGB_S = 0x729cc400, + OPC_VFLOGB_D = 0x729cc800, + OPC_VFCLASS_S = 0x729cd400, + OPC_VFCLASS_D = 0x729cd800, + OPC_VFSQRT_S = 0x729ce400, + OPC_VFSQRT_D = 0x729ce800, + OPC_VFRECIP_S = 0x729cf400, + OPC_VFRECIP_D = 0x729cf800, + OPC_VFRSQRT_S = 0x729d0400, + OPC_VFRSQRT_D = 0x729d0800, + OPC_VFRINT_S = 0x729d3400, + OPC_VFRINT_D = 0x729d3800, + OPC_VFRINTRM_S = 0x729d4400, + OPC_VFRINTRM_D = 0x729d4800, + OPC_VFRINTRP_S = 0x729d5400, + OPC_VFRINTRP_D = 0x729d5800, + OPC_VFRINTRZ_S = 0x729d6400, + OPC_VFRINTRZ_D = 0x729d6800, + OPC_VFRINTRNE_S = 0x729d7400, + OPC_VFRINTRNE_D = 0x729d7800, + OPC_VFCVTL_S_H = 0x729de800, + OPC_VFCVTH_S_H = 0x729dec00, + OPC_VFCVTL_D_S = 0x729df000, + OPC_VFCVTH_D_S = 0x729df400, + OPC_VFFINT_S_W = 0x729e0000, + OPC_VFFINT_S_WU = 0x729e0400, + OPC_VFFINT_D_L = 0x729e0800, + OPC_VFFINT_D_LU = 0x729e0c00, + OPC_VFFINTL_D_W = 0x729e1000, + OPC_VFFINTH_D_W = 0x729e1400, + OPC_VFTINT_W_S = 0x729e3000, + OPC_VFTINT_L_D = 0x729e3400, + OPC_VFTINTRM_W_S = 0x729e3800, + OPC_VFTINTRM_L_D = 0x729e3c00, + OPC_VFTINTRP_W_S = 0x729e4000, + OPC_VFTINTRP_L_D = 0x729e4400, + OPC_VFTINTRZ_W_S = 0x729e4800, + OPC_VFTINTRZ_L_D = 0x729e4c00, + OPC_VFTINTRNE_W_S = 0x729e5000, + OPC_VFTINTRNE_L_D = 0x729e5400, + OPC_VFTINT_WU_S = 0x729e5800, + OPC_VFTINT_LU_D = 0x729e5c00, + OPC_VFTINTRZ_WU_S = 0x729e7000, + OPC_VFTINTRZ_LU_D = 0x729e7400, + OPC_VFTINTL_L_S = 0x729e8000, + OPC_VFTINTH_L_S = 0x729e8400, + OPC_VFTINTRML_L_S = 0x729e8800, + OPC_VFTINTRMH_L_S = 0x729e8c00, + OPC_VFTINTRPL_L_S = 0x729e9000, + OPC_VFTINTRPH_L_S = 0x729e9400, + OPC_VFTINTRZL_L_S = 0x729e9800, + OPC_VFTINTRZH_L_S = 0x729e9c00, + OPC_VFTINTRNEL_L_S = 0x729ea000, + OPC_VFTINTRNEH_L_S = 0x729ea400, + OPC_VEXTH_H_B = 0x729ee000, + OPC_VEXTH_W_H = 0x729ee400, + OPC_VEXTH_D_W = 0x729ee800, + OPC_VEXTH_Q_D = 0x729eec00, + OPC_VEXTH_HU_BU = 0x729ef000, + OPC_VEXTH_WU_HU = 0x729ef400, + OPC_VEXTH_DU_WU = 0x729ef800, + OPC_VEXTH_QU_DU = 0x729efc00, + OPC_VREPLGR2VR_B = 0x729f0000, + OPC_VREPLGR2VR_H = 0x729f0400, + OPC_VREPLGR2VR_W = 0x729f0800, + OPC_VREPLGR2VR_D = 0x729f0c00, + OPC_VROTRI_B = 0x72a02000, + OPC_VROTRI_H = 0x72a04000, + OPC_VROTRI_W = 0x72a08000, + OPC_VROTRI_D = 0x72a10000, + OPC_VSRLRI_B = 0x72a42000, + OPC_VSRLRI_H = 0x72a44000, + OPC_VSRLRI_W = 0x72a48000, + OPC_VSRLRI_D = 0x72a50000, + OPC_VSRARI_B = 0x72a82000, + OPC_VSRARI_H = 0x72a84000, + OPC_VSRARI_W = 0x72a88000, + OPC_VSRARI_D = 0x72a90000, + OPC_VINSGR2VR_B = 0x72eb8000, + OPC_VINSGR2VR_H = 0x72ebc000, + OPC_VINSGR2VR_W = 0x72ebe000, + OPC_VINSGR2VR_D = 0x72ebf000, + OPC_VPICKVE2GR_B = 0x72ef8000, + OPC_VPICKVE2GR_H = 0x72efc000, + OPC_VPICKVE2GR_W = 0x72efe000, + OPC_VPICKVE2GR_D = 0x72eff000, + OPC_VPICKVE2GR_BU = 0x72f38000, + OPC_VPICKVE2GR_HU = 0x72f3c000, + OPC_VPICKVE2GR_WU = 0x72f3e000, + OPC_VPICKVE2GR_DU = 0x72f3f000, + OPC_VREPLVEI_B = 0x72f78000, + OPC_VREPLVEI_H = 0x72f7c000, + OPC_VREPLVEI_W = 0x72f7e000, + OPC_VREPLVEI_D = 0x72f7f000, + OPC_VSLLWIL_H_B = 0x73082000, + OPC_VSLLWIL_W_H = 0x73084000, + OPC_VSLLWIL_D_W = 0x73088000, + OPC_VEXTL_Q_D = 0x73090000, + OPC_VSLLWIL_HU_BU = 0x730c2000, + OPC_VSLLWIL_WU_HU = 0x730c4000, + OPC_VSLLWIL_DU_WU = 0x730c8000, + OPC_VEXTL_QU_DU = 0x730d0000, + OPC_VBITCLRI_B = 0x73102000, + OPC_VBITCLRI_H = 0x73104000, + OPC_VBITCLRI_W = 0x73108000, + OPC_VBITCLRI_D = 0x73110000, + OPC_VBITSETI_B = 0x73142000, + OPC_VBITSETI_H = 0x73144000, + OPC_VBITSETI_W = 0x73148000, + OPC_VBITSETI_D = 0x73150000, + OPC_VBITREVI_B = 0x73182000, + OPC_VBITREVI_H = 0x73184000, + OPC_VBITREVI_W = 0x73188000, + OPC_VBITREVI_D = 0x73190000, + OPC_VSAT_B = 0x73242000, + OPC_VSAT_H = 0x73244000, + OPC_VSAT_W = 0x73248000, + OPC_VSAT_D = 0x73250000, + OPC_VSAT_BU = 0x73282000, + OPC_VSAT_HU = 0x73284000, + OPC_VSAT_WU = 0x73288000, + OPC_VSAT_DU = 0x73290000, + OPC_VSLLI_B = 0x732c2000, + OPC_VSLLI_H = 0x732c4000, + OPC_VSLLI_W = 0x732c8000, + OPC_VSLLI_D = 0x732d0000, + OPC_VSRLI_B = 0x73302000, + OPC_VSRLI_H = 0x73304000, + OPC_VSRLI_W = 0x73308000, + OPC_VSRLI_D = 0x73310000, + OPC_VSRAI_B = 0x73342000, + OPC_VSRAI_H = 0x73344000, + OPC_VSRAI_W = 0x73348000, + OPC_VSRAI_D = 0x73350000, + OPC_VSRLNI_B_H = 0x73404000, + OPC_VSRLNI_H_W = 0x73408000, + OPC_VSRLNI_W_D = 0x73410000, + OPC_VSRLNI_D_Q = 0x73420000, + OPC_VSRLRNI_B_H = 0x73444000, + OPC_VSRLRNI_H_W = 0x73448000, + OPC_VSRLRNI_W_D = 0x73450000, + OPC_VSRLRNI_D_Q = 0x73460000, + OPC_VSSRLNI_B_H = 0x73484000, + OPC_VSSRLNI_H_W = 0x73488000, + OPC_VSSRLNI_W_D = 0x73490000, + OPC_VSSRLNI_D_Q = 0x734a0000, + OPC_VSSRLNI_BU_H = 0x734c4000, + OPC_VSSRLNI_HU_W = 0x734c8000, + OPC_VSSRLNI_WU_D = 0x734d0000, + OPC_VSSRLNI_DU_Q = 0x734e0000, + OPC_VSSRLRNI_B_H = 0x73504000, + OPC_VSSRLRNI_H_W = 0x73508000, + OPC_VSSRLRNI_W_D = 0x73510000, + OPC_VSSRLRNI_D_Q = 0x73520000, + OPC_VSSRLRNI_BU_H = 0x73544000, + OPC_VSSRLRNI_HU_W = 0x73548000, + OPC_VSSRLRNI_WU_D = 0x73550000, + OPC_VSSRLRNI_DU_Q = 0x73560000, + OPC_VSRANI_B_H = 0x73584000, + OPC_VSRANI_H_W = 0x73588000, + OPC_VSRANI_W_D = 0x73590000, + OPC_VSRANI_D_Q = 0x735a0000, + OPC_VSRARNI_B_H = 0x735c4000, + OPC_VSRARNI_H_W = 0x735c8000, + OPC_VSRARNI_W_D = 0x735d0000, + OPC_VSRARNI_D_Q = 0x735e0000, + OPC_VSSRANI_B_H = 0x73604000, + OPC_VSSRANI_H_W = 0x73608000, + OPC_VSSRANI_W_D = 0x73610000, + OPC_VSSRANI_D_Q = 0x73620000, + OPC_VSSRANI_BU_H = 0x73644000, + OPC_VSSRANI_HU_W = 0x73648000, + OPC_VSSRANI_WU_D = 0x73650000, + OPC_VSSRANI_DU_Q = 0x73660000, + OPC_VSSRARNI_B_H = 0x73684000, + OPC_VSSRARNI_H_W = 0x73688000, + OPC_VSSRARNI_W_D = 0x73690000, + OPC_VSSRARNI_D_Q = 0x736a0000, + OPC_VSSRARNI_BU_H = 0x736c4000, + OPC_VSSRARNI_HU_W = 0x736c8000, + OPC_VSSRARNI_WU_D = 0x736d0000, + OPC_VSSRARNI_DU_Q = 0x736e0000, + OPC_VEXTRINS_D = 0x73800000, + OPC_VEXTRINS_W = 0x73840000, + OPC_VEXTRINS_H = 0x73880000, + OPC_VEXTRINS_B = 0x738c0000, + OPC_VSHUF4I_B = 0x73900000, + OPC_VSHUF4I_H = 0x73940000, + OPC_VSHUF4I_W = 0x73980000, + OPC_VSHUF4I_D = 0x739c0000, + OPC_VBITSELI_B = 0x73c40000, + OPC_VANDI_B = 0x73d00000, + OPC_VORI_B = 0x73d40000, + OPC_VXORI_B = 0x73d80000, + OPC_VNORI_B = 0x73dc0000, + OPC_VLDI = 0x73e00000, + OPC_VPERMI_W = 0x73e40000, +} LoongArchInsn; + +static int32_t __attribute__((unused)) +encode_d_slot(LoongArchInsn opc, uint32_t d) +{ + return opc | d; +} + +static int32_t __attribute__((unused)) +encode_dj_slots(LoongArchInsn opc, uint32_t d, uint32_t j) +{ + return opc | d | j << 5; +} + +static int32_t __attribute__((unused)) +encode_djk_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k) +{ + return opc | d | j << 5 | k << 10; +} + +static int32_t __attribute__((unused)) +encode_djka_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k, + uint32_t a) +{ + return opc | d | j << 5 | k << 10 | a << 15; +} + +static int32_t __attribute__((unused)) +encode_djkm_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k, + uint32_t m) +{ + return opc | d | j << 5 | k << 10 | m << 16; +} + +static int32_t __attribute__((unused)) +encode_djkn_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k, + uint32_t n) +{ + return opc | d | j << 5 | k << 10 | n << 18; +} + +static int32_t __attribute__((unused)) +encode_dk_slots(LoongArchInsn opc, uint32_t d, uint32_t k) +{ + return opc | d | k << 10; +} + +static int32_t __attribute__((unused)) +encode_cdvj_insn(LoongArchInsn opc, TCGReg cd, TCGReg vj) +{ + tcg_debug_assert(cd >= 0 && cd <= 0x7); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + return encode_dj_slots(opc, cd, vj & 0x1f); +} + +static int32_t __attribute__((unused)) +encode_dj_insn(LoongArchInsn opc, TCGReg d, TCGReg j) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + return encode_dj_slots(opc, d, j); +} + +static int32_t __attribute__((unused)) +encode_djk_insn(LoongArchInsn opc, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(k >= 0 && k <= 0x1f); + return encode_djk_slots(opc, d, j, k); +} + +static int32_t __attribute__((unused)) +encode_djsk12_insn(LoongArchInsn opc, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk12 >= -0x800 && sk12 <= 0x7ff); + return encode_djk_slots(opc, d, j, sk12 & 0xfff); +} + +static int32_t __attribute__((unused)) +encode_djsk16_insn(LoongArchInsn opc, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk16 >= -0x8000 && sk16 <= 0x7fff); + return encode_djk_slots(opc, d, j, sk16 & 0xffff); +} + +static int32_t __attribute__((unused)) +encode_djuk12_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk12) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk12 <= 0xfff); + return encode_djk_slots(opc, d, j, uk12); +} + +static int32_t __attribute__((unused)) +encode_djuk5_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk5) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk5 <= 0x1f); + return encode_djk_slots(opc, d, j, uk5); +} + +static int32_t __attribute__((unused)) +encode_djuk5um5_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk5, + uint32_t um5) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk5 <= 0x1f); + tcg_debug_assert(um5 <= 0x1f); + return encode_djkm_slots(opc, d, j, uk5, um5); +} + +static int32_t __attribute__((unused)) +encode_djuk6_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk6) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk6 <= 0x3f); + return encode_djk_slots(opc, d, j, uk6); +} + +static int32_t __attribute__((unused)) +encode_djuk6um6_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk6, + uint32_t um6) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk6 <= 0x3f); + tcg_debug_assert(um6 <= 0x3f); + return encode_djkm_slots(opc, d, j, uk6, um6); +} + +static int32_t __attribute__((unused)) +encode_dsj20_insn(LoongArchInsn opc, TCGReg d, int32_t sj20) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(sj20 >= -0x80000 && sj20 <= 0x7ffff); + return encode_dj_slots(opc, d, sj20 & 0xfffff); +} + +static int32_t __attribute__((unused)) +encode_dvjuk1_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk1) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk1 <= 0x1); + return encode_djk_slots(opc, d, vj & 0x1f, uk1); +} + +static int32_t __attribute__((unused)) +encode_dvjuk2_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk2) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk2 <= 0x3); + return encode_djk_slots(opc, d, vj & 0x1f, uk2); +} + +static int32_t __attribute__((unused)) +encode_dvjuk3_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk3) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk3 <= 0x7); + return encode_djk_slots(opc, d, vj & 0x1f, uk3); +} + +static int32_t __attribute__((unused)) +encode_dvjuk4_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk4) +{ + tcg_debug_assert(d >= 0 && d <= 0x1f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk4 <= 0xf); + return encode_djk_slots(opc, d, vj & 0x1f, uk4); +} + +static int32_t __attribute__((unused)) +encode_sd10k16_insn(LoongArchInsn opc, int32_t sd10k16) +{ + tcg_debug_assert(sd10k16 >= -0x2000000 && sd10k16 <= 0x1ffffff); + return encode_dk_slots(opc, (sd10k16 >> 16) & 0x3ff, sd10k16 & 0xffff); +} + +static int32_t __attribute__((unused)) +encode_ud15_insn(LoongArchInsn opc, uint32_t ud15) +{ + tcg_debug_assert(ud15 <= 0x7fff); + return encode_d_slot(opc, ud15); +} + +static int32_t __attribute__((unused)) +encode_vdj_insn(LoongArchInsn opc, TCGReg vd, TCGReg j) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + return encode_dj_slots(opc, vd & 0x1f, j); +} + +static int32_t __attribute__((unused)) +encode_vdjk_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, TCGReg k) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(k >= 0 && k <= 0x1f); + return encode_djk_slots(opc, vd & 0x1f, j, k); +} + +static int32_t __attribute__((unused)) +encode_vdjsk10_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk10) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk10 >= -0x200 && sk10 <= 0x1ff); + return encode_djk_slots(opc, vd & 0x1f, j, sk10 & 0x3ff); +} + +static int32_t __attribute__((unused)) +encode_vdjsk11_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk11) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk11 >= -0x400 && sk11 <= 0x3ff); + return encode_djk_slots(opc, vd & 0x1f, j, sk11 & 0x7ff); +} + +static int32_t __attribute__((unused)) +encode_vdjsk12_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk12) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk12 >= -0x800 && sk12 <= 0x7ff); + return encode_djk_slots(opc, vd & 0x1f, j, sk12 & 0xfff); +} + +static int32_t __attribute__((unused)) +encode_vdjsk8un1_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8, + uint32_t un1) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f); + tcg_debug_assert(un1 <= 0x1); + return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un1); +} + +static int32_t __attribute__((unused)) +encode_vdjsk8un2_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8, + uint32_t un2) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f); + tcg_debug_assert(un2 <= 0x3); + return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un2); +} + +static int32_t __attribute__((unused)) +encode_vdjsk8un3_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8, + uint32_t un3) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f); + tcg_debug_assert(un3 <= 0x7); + return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un3); +} + +static int32_t __attribute__((unused)) +encode_vdjsk8un4_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8, + uint32_t un4) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f); + tcg_debug_assert(un4 <= 0xf); + return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un4); +} + +static int32_t __attribute__((unused)) +encode_vdjsk9_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk9) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(sk9 >= -0x100 && sk9 <= 0xff); + return encode_djk_slots(opc, vd & 0x1f, j, sk9 & 0x1ff); +} + +static int32_t __attribute__((unused)) +encode_vdjuk1_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk1) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk1 <= 0x1); + return encode_djk_slots(opc, vd & 0x1f, j, uk1); +} + +static int32_t __attribute__((unused)) +encode_vdjuk2_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk2) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk2 <= 0x3); + return encode_djk_slots(opc, vd & 0x1f, j, uk2); +} + +static int32_t __attribute__((unused)) +encode_vdjuk3_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk3) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk3 <= 0x7); + return encode_djk_slots(opc, vd & 0x1f, j, uk3); +} + +static int32_t __attribute__((unused)) +encode_vdjuk4_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk4) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(j >= 0 && j <= 0x1f); + tcg_debug_assert(uk4 <= 0xf); + return encode_djk_slots(opc, vd & 0x1f, j, uk4); +} + +static int32_t __attribute__((unused)) +encode_vdsj13_insn(LoongArchInsn opc, TCGReg vd, int32_t sj13) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(sj13 >= -0x1000 && sj13 <= 0xfff); + return encode_dj_slots(opc, vd & 0x1f, sj13 & 0x1fff); +} + +static int32_t __attribute__((unused)) +encode_vdvj_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + return encode_dj_slots(opc, vd & 0x1f, vj & 0x1f); +} + +static int32_t __attribute__((unused)) +encode_vdvjk_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg k) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(k >= 0 && k <= 0x1f); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, k); +} + +static int32_t __attribute__((unused)) +encode_vdvjsk5_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(sk5 >= -0x10 && sk5 <= 0xf); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, sk5 & 0x1f); +} + +static int32_t __attribute__((unused)) +encode_vdvjuk1_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk1) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk1 <= 0x1); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk1); +} + +static int32_t __attribute__((unused)) +encode_vdvjuk2_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk2) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk2 <= 0x3); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk2); +} + +static int32_t __attribute__((unused)) +encode_vdvjuk3_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk3 <= 0x7); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk3); +} + +static int32_t __attribute__((unused)) +encode_vdvjuk4_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk4 <= 0xf); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk4); +} + +static int32_t __attribute__((unused)) +encode_vdvjuk5_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk5 <= 0x1f); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk5); +} + +static int32_t __attribute__((unused)) +encode_vdvjuk6_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk6 <= 0x3f); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk6); +} + +static int32_t __attribute__((unused)) +encode_vdvjuk7_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk7 <= 0x7f); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk7); +} + +static int32_t __attribute__((unused)) +encode_vdvjuk8_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(uk8 <= 0xff); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk8); +} + +static int32_t __attribute__((unused)) +encode_vdvjvk_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(vk >= 0x20 && vk <= 0x3f); + return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, vk & 0x1f); +} + +static int32_t __attribute__((unused)) +encode_vdvjvkva_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg vk, + TCGReg va) +{ + tcg_debug_assert(vd >= 0x20 && vd <= 0x3f); + tcg_debug_assert(vj >= 0x20 && vj <= 0x3f); + tcg_debug_assert(vk >= 0x20 && vk <= 0x3f); + tcg_debug_assert(va >= 0x20 && va <= 0x3f); + return encode_djka_slots(opc, vd & 0x1f, vj & 0x1f, vk & 0x1f, va & 0x1f); +} + +/* Emits the `clz.w d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_clz_w(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_CLZ_W, d, j)); +} + +/* Emits the `ctz.w d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ctz_w(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_CTZ_W, d, j)); +} + +/* Emits the `clz.d d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_clz_d(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_CLZ_D, d, j)); +} + +/* Emits the `ctz.d d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ctz_d(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_CTZ_D, d, j)); +} + +/* Emits the `revb.2h d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_revb_2h(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_REVB_2H, d, j)); +} + +/* Emits the `revb.2w d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_revb_2w(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_REVB_2W, d, j)); +} + +/* Emits the `revb.d d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_revb_d(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_REVB_D, d, j)); +} + +/* Emits the `sext.h d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sext_h(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_SEXT_H, d, j)); +} + +/* Emits the `sext.b d, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sext_b(TCGContext *s, TCGReg d, TCGReg j) +{ + tcg_out32(s, encode_dj_insn(OPC_SEXT_B, d, j)); +} + +/* Emits the `add.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_add_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_ADD_W, d, j, k)); +} + +/* Emits the `add.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_add_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_ADD_D, d, j, k)); +} + +/* Emits the `sub.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sub_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SUB_W, d, j, k)); +} + +/* Emits the `sub.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sub_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SUB_D, d, j, k)); +} + +/* Emits the `slt d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_slt(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SLT, d, j, k)); +} + +/* Emits the `sltu d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sltu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SLTU, d, j, k)); +} + +/* Emits the `maskeqz d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_maskeqz(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MASKEQZ, d, j, k)); +} + +/* Emits the `masknez d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_masknez(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MASKNEZ, d, j, k)); +} + +/* Emits the `nor d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_nor(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_NOR, d, j, k)); +} + +/* Emits the `and d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_and(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_AND, d, j, k)); +} + +/* Emits the `or d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_or(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_OR, d, j, k)); +} + +/* Emits the `xor d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_xor(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_XOR, d, j, k)); +} + +/* Emits the `orn d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_orn(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_ORN, d, j, k)); +} + +/* Emits the `andn d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_andn(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_ANDN, d, j, k)); +} + +/* Emits the `sll.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sll_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SLL_W, d, j, k)); +} + +/* Emits the `srl.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_srl_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SRL_W, d, j, k)); +} + +/* Emits the `sra.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sra_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SRA_W, d, j, k)); +} + +/* Emits the `sll.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sll_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SLL_D, d, j, k)); +} + +/* Emits the `srl.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_srl_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SRL_D, d, j, k)); +} + +/* Emits the `sra.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sra_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_SRA_D, d, j, k)); +} + +/* Emits the `rotr.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_rotr_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_ROTR_W, d, j, k)); +} + +/* Emits the `rotr.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_rotr_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_ROTR_D, d, j, k)); +} + +/* Emits the `mul.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mul_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MUL_W, d, j, k)); +} + +/* Emits the `mulh.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mulh_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MULH_W, d, j, k)); +} + +/* Emits the `mulh.wu d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mulh_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MULH_WU, d, j, k)); +} + +/* Emits the `mul.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mul_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MUL_D, d, j, k)); +} + +/* Emits the `mulh.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mulh_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MULH_D, d, j, k)); +} + +/* Emits the `mulh.du d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mulh_du(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MULH_DU, d, j, k)); +} + +/* Emits the `div.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_div_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_DIV_W, d, j, k)); +} + +/* Emits the `mod.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mod_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MOD_W, d, j, k)); +} + +/* Emits the `div.wu d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_div_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_DIV_WU, d, j, k)); +} + +/* Emits the `mod.wu d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mod_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MOD_WU, d, j, k)); +} + +/* Emits the `div.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_div_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_DIV_D, d, j, k)); +} + +/* Emits the `mod.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mod_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MOD_D, d, j, k)); +} + +/* Emits the `div.du d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_div_du(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_DIV_DU, d, j, k)); +} + +/* Emits the `mod.du d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_mod_du(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_MOD_DU, d, j, k)); +} + +/* Emits the `slli.w d, j, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_slli_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5) +{ + tcg_out32(s, encode_djuk5_insn(OPC_SLLI_W, d, j, uk5)); +} + +/* Emits the `slli.d d, j, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_slli_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6) +{ + tcg_out32(s, encode_djuk6_insn(OPC_SLLI_D, d, j, uk6)); +} + +/* Emits the `srli.w d, j, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_srli_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5) +{ + tcg_out32(s, encode_djuk5_insn(OPC_SRLI_W, d, j, uk5)); +} + +/* Emits the `srli.d d, j, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_srli_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6) +{ + tcg_out32(s, encode_djuk6_insn(OPC_SRLI_D, d, j, uk6)); +} + +/* Emits the `srai.w d, j, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_srai_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5) +{ + tcg_out32(s, encode_djuk5_insn(OPC_SRAI_W, d, j, uk5)); +} + +/* Emits the `srai.d d, j, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_srai_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6) +{ + tcg_out32(s, encode_djuk6_insn(OPC_SRAI_D, d, j, uk6)); +} + +/* Emits the `rotri.w d, j, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_rotri_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5) +{ + tcg_out32(s, encode_djuk5_insn(OPC_ROTRI_W, d, j, uk5)); +} + +/* Emits the `rotri.d d, j, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_rotri_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6) +{ + tcg_out32(s, encode_djuk6_insn(OPC_ROTRI_D, d, j, uk6)); +} + +/* Emits the `bstrins.w d, j, uk5, um5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bstrins_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5, + uint32_t um5) +{ + tcg_out32(s, encode_djuk5um5_insn(OPC_BSTRINS_W, d, j, uk5, um5)); +} + +/* Emits the `bstrpick.w d, j, uk5, um5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bstrpick_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5, + uint32_t um5) +{ + tcg_out32(s, encode_djuk5um5_insn(OPC_BSTRPICK_W, d, j, uk5, um5)); +} + +/* Emits the `bstrins.d d, j, uk6, um6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bstrins_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6, + uint32_t um6) +{ + tcg_out32(s, encode_djuk6um6_insn(OPC_BSTRINS_D, d, j, uk6, um6)); +} + +/* Emits the `bstrpick.d d, j, uk6, um6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bstrpick_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6, + uint32_t um6) +{ + tcg_out32(s, encode_djuk6um6_insn(OPC_BSTRPICK_D, d, j, uk6, um6)); +} + +/* Emits the `slti d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_slti(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_SLTI, d, j, sk12)); +} + +/* Emits the `sltui d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_sltui(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_SLTUI, d, j, sk12)); +} + +/* Emits the `addi.w d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_addi_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_ADDI_W, d, j, sk12)); +} + +/* Emits the `addi.d d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_addi_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_ADDI_D, d, j, sk12)); +} + +/* Emits the `cu52i.d d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_cu52i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_CU52I_D, d, j, sk12)); +} + +/* Emits the `andi d, j, uk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_andi(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12) +{ + tcg_out32(s, encode_djuk12_insn(OPC_ANDI, d, j, uk12)); +} + +/* Emits the `ori d, j, uk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12) +{ + tcg_out32(s, encode_djuk12_insn(OPC_ORI, d, j, uk12)); +} + +/* Emits the `xori d, j, uk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12) +{ + tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12)); +} + +/* Emits the `vfmadd.s vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMADD_S, vd, vj, vk, va)); +} + +/* Emits the `vfmadd.d vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMADD_D, vd, vj, vk, va)); +} + +/* Emits the `vfmsub.s vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMSUB_S, vd, vj, vk, va)); +} + +/* Emits the `vfmsub.d vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMSUB_D, vd, vj, vk, va)); +} + +/* Emits the `vfnmadd.s vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfnmadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMADD_S, vd, vj, vk, va)); +} + +/* Emits the `vfnmadd.d vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfnmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMADD_D, vd, vj, vk, va)); +} + +/* Emits the `vfnmsub.s vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfnmsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMSUB_S, vd, vj, vk, va)); +} + +/* Emits the `vfnmsub.d vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfnmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMSUB_D, vd, vj, vk, va)); +} + +/* Emits the `vfcmp.caf.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_caf_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CAF_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.saf.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_saf_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SAF_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.clt.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_clt_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLT_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.slt.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_slt_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLT_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.ceq.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_ceq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CEQ_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.seq.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_seq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SEQ_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.cle.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cle_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLE_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.sle.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sle_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLE_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.cun.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cun_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUN_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.sun.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sun_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUN_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.cult.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cult_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULT_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.sult.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sult_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULT_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.cueq.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cueq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUEQ_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.sueq.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sueq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUEQ_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.cule.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cule_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULE_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.sule.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sule_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULE_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.cne.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cne_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CNE_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.sne.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sne_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SNE_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.cor.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cor_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_COR_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.sor.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sor_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SOR_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.cune.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cune_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUNE_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.sune.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sune_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUNE_S, vd, vj, vk)); +} + +/* Emits the `vfcmp.caf.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_caf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CAF_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.saf.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_saf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SAF_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.clt.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_clt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLT_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.slt.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_slt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLT_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.ceq.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_ceq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CEQ_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.seq.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_seq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SEQ_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.cle.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLE_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.sle.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLE_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.cun.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cun_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUN_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.sun.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sun_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUN_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.cult.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cult_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULT_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.sult.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sult_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULT_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.cueq.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cueq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUEQ_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.sueq.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sueq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUEQ_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.cule.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cule_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULE_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.sule.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sule_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULE_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.cne.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cne_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CNE_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.sne.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sne_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SNE_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.cor.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cor_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_COR_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.sor.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sor_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SOR_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.cune.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_cune_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUNE_D, vd, vj, vk)); +} + +/* Emits the `vfcmp.sune.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcmp_sune_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUNE_D, vd, vj, vk)); +} + +/* Emits the `vbitsel.v vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitsel_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VBITSEL_V, vd, vj, vk, va)); +} + +/* Emits the `vshuf.b vd, vj, vk, va` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vshuf_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va) +{ + tcg_out32(s, encode_vdvjvkva_insn(OPC_VSHUF_B, vd, vj, vk, va)); +} + +/* Emits the `addu16i.d d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16)); +} + +/* Emits the `lu12i.w d, sj20` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20) +{ + tcg_out32(s, encode_dsj20_insn(OPC_LU12I_W, d, sj20)); +} + +/* Emits the `cu32i.d d, sj20` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_cu32i_d(TCGContext *s, TCGReg d, int32_t sj20) +{ + tcg_out32(s, encode_dsj20_insn(OPC_CU32I_D, d, sj20)); +} + +/* Emits the `pcaddu2i d, sj20` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_pcaddu2i(TCGContext *s, TCGReg d, int32_t sj20) +{ + tcg_out32(s, encode_dsj20_insn(OPC_PCADDU2I, d, sj20)); +} + +/* Emits the `pcalau12i d, sj20` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_pcalau12i(TCGContext *s, TCGReg d, int32_t sj20) +{ + tcg_out32(s, encode_dsj20_insn(OPC_PCALAU12I, d, sj20)); +} + +/* Emits the `pcaddu12i d, sj20` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_pcaddu12i(TCGContext *s, TCGReg d, int32_t sj20) +{ + tcg_out32(s, encode_dsj20_insn(OPC_PCADDU12I, d, sj20)); +} + +/* Emits the `pcaddu18i d, sj20` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_pcaddu18i(TCGContext *s, TCGReg d, int32_t sj20) +{ + tcg_out32(s, encode_dsj20_insn(OPC_PCADDU18I, d, sj20)); +} + +/* Emits the `ld.b d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ld_b(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_LD_B, d, j, sk12)); +} + +/* Emits the `ld.h d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ld_h(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_LD_H, d, j, sk12)); +} + +/* Emits the `ld.w d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ld_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_LD_W, d, j, sk12)); +} + +/* Emits the `ld.d d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ld_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_LD_D, d, j, sk12)); +} + +/* Emits the `st.b d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_st_b(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_ST_B, d, j, sk12)); +} + +/* Emits the `st.h d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_st_h(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_ST_H, d, j, sk12)); +} + +/* Emits the `st.w d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_st_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_ST_W, d, j, sk12)); +} + +/* Emits the `st.d d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_st_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_ST_D, d, j, sk12)); +} + +/* Emits the `ld.bu d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ld_bu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_LD_BU, d, j, sk12)); +} + +/* Emits the `ld.hu d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ld_hu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_LD_HU, d, j, sk12)); +} + +/* Emits the `ld.wu d, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ld_wu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_djsk12_insn(OPC_LD_WU, d, j, sk12)); +} + +/* Emits the `vld vd, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vld(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_vdjsk12_insn(OPC_VLD, vd, j, sk12)); +} + +/* Emits the `vst vd, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vst(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_vdjsk12_insn(OPC_VST, vd, j, sk12)); +} + +/* Emits the `vldrepl.d vd, j, sk9` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vldrepl_d(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk9) +{ + tcg_out32(s, encode_vdjsk9_insn(OPC_VLDREPL_D, vd, j, sk9)); +} + +/* Emits the `vldrepl.w vd, j, sk10` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vldrepl_w(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk10) +{ + tcg_out32(s, encode_vdjsk10_insn(OPC_VLDREPL_W, vd, j, sk10)); +} + +/* Emits the `vldrepl.h vd, j, sk11` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vldrepl_h(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk11) +{ + tcg_out32(s, encode_vdjsk11_insn(OPC_VLDREPL_H, vd, j, sk11)); +} + +/* Emits the `vldrepl.b vd, j, sk12` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vldrepl_b(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12) +{ + tcg_out32(s, encode_vdjsk12_insn(OPC_VLDREPL_B, vd, j, sk12)); +} + +/* Emits the `vstelm.d vd, j, sk8, un1` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vstelm_d(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8, + uint32_t un1) +{ + tcg_out32(s, encode_vdjsk8un1_insn(OPC_VSTELM_D, vd, j, sk8, un1)); +} + +/* Emits the `vstelm.w vd, j, sk8, un2` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vstelm_w(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8, + uint32_t un2) +{ + tcg_out32(s, encode_vdjsk8un2_insn(OPC_VSTELM_W, vd, j, sk8, un2)); +} + +/* Emits the `vstelm.h vd, j, sk8, un3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vstelm_h(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8, + uint32_t un3) +{ + tcg_out32(s, encode_vdjsk8un3_insn(OPC_VSTELM_H, vd, j, sk8, un3)); +} + +/* Emits the `vstelm.b vd, j, sk8, un4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vstelm_b(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8, + uint32_t un4) +{ + tcg_out32(s, encode_vdjsk8un4_insn(OPC_VSTELM_B, vd, j, sk8, un4)); +} + +/* Emits the `ldx.b d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ldx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_LDX_B, d, j, k)); +} + +/* Emits the `ldx.h d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ldx_h(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_LDX_H, d, j, k)); +} + +/* Emits the `ldx.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ldx_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_LDX_W, d, j, k)); +} + +/* Emits the `ldx.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ldx_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_LDX_D, d, j, k)); +} + +/* Emits the `stx.b d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_stx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_STX_B, d, j, k)); +} + +/* Emits the `stx.h d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_stx_h(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_STX_H, d, j, k)); +} + +/* Emits the `stx.w d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_stx_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_STX_W, d, j, k)); +} + +/* Emits the `stx.d d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_stx_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_STX_D, d, j, k)); +} + +/* Emits the `ldx.bu d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ldx_bu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_LDX_BU, d, j, k)); +} + +/* Emits the `ldx.hu d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ldx_hu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_LDX_HU, d, j, k)); +} + +/* Emits the `ldx.wu d, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ldx_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_djk_insn(OPC_LDX_WU, d, j, k)); +} + +/* Emits the `vldx vd, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vldx(TCGContext *s, TCGReg vd, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_vdjk_insn(OPC_VLDX, vd, j, k)); +} + +/* Emits the `vstx vd, j, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vstx(TCGContext *s, TCGReg vd, TCGReg j, TCGReg k) +{ + tcg_out32(s, encode_vdjk_insn(OPC_VSTX, vd, j, k)); +} + +/* Emits the `dbar ud15` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_dbar(TCGContext *s, uint32_t ud15) +{ + tcg_out32(s, encode_ud15_insn(OPC_DBAR, ud15)); +} + +/* Emits the `jirl d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_jirl(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_JIRL, d, j, sk16)); +} + +/* Emits the `b sd10k16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_b(TCGContext *s, int32_t sd10k16) +{ + tcg_out32(s, encode_sd10k16_insn(OPC_B, sd10k16)); +} + +/* Emits the `bl sd10k16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bl(TCGContext *s, int32_t sd10k16) +{ + tcg_out32(s, encode_sd10k16_insn(OPC_BL, sd10k16)); +} + +/* Emits the `beq d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_beq(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_BEQ, d, j, sk16)); +} + +/* Emits the `bne d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bne(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_BNE, d, j, sk16)); +} + +/* Emits the `bgt d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bgt(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_BGT, d, j, sk16)); +} + +/* Emits the `ble d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_ble(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_BLE, d, j, sk16)); +} + +/* Emits the `bgtu d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bgtu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_BGTU, d, j, sk16)); +} + +/* Emits the `bleu d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_bleu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_BLEU, d, j, sk16)); +} + +/* Emits the `vseq.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseq_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_B, vd, vj, vk)); +} + +/* Emits the `vseq.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseq_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_H, vd, vj, vk)); +} + +/* Emits the `vseq.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseq_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_W, vd, vj, vk)); +} + +/* Emits the `vseq.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_D, vd, vj, vk)); +} + +/* Emits the `vsle.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsle_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_B, vd, vj, vk)); +} + +/* Emits the `vsle.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsle_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_H, vd, vj, vk)); +} + +/* Emits the `vsle.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsle_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_W, vd, vj, vk)); +} + +/* Emits the `vsle.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_D, vd, vj, vk)); +} + +/* Emits the `vsle.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsle_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_BU, vd, vj, vk)); +} + +/* Emits the `vsle.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsle_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_HU, vd, vj, vk)); +} + +/* Emits the `vsle.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsle_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_WU, vd, vj, vk)); +} + +/* Emits the `vsle.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsle_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_DU, vd, vj, vk)); +} + +/* Emits the `vslt.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslt_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_B, vd, vj, vk)); +} + +/* Emits the `vslt.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslt_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_H, vd, vj, vk)); +} + +/* Emits the `vslt.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslt_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_W, vd, vj, vk)); +} + +/* Emits the `vslt.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_D, vd, vj, vk)); +} + +/* Emits the `vslt.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslt_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_BU, vd, vj, vk)); +} + +/* Emits the `vslt.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslt_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_HU, vd, vj, vk)); +} + +/* Emits the `vslt.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslt_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_WU, vd, vj, vk)); +} + +/* Emits the `vslt.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslt_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_DU, vd, vj, vk)); +} + +/* Emits the `vadd.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_B, vd, vj, vk)); +} + +/* Emits the `vadd.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_H, vd, vj, vk)); +} + +/* Emits the `vadd.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_W, vd, vj, vk)); +} + +/* Emits the `vadd.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_D, vd, vj, vk)); +} + +/* Emits the `vsub.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_B, vd, vj, vk)); +} + +/* Emits the `vsub.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_H, vd, vj, vk)); +} + +/* Emits the `vsub.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_W, vd, vj, vk)); +} + +/* Emits the `vsub.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_D, vd, vj, vk)); +} + +/* Emits the `vaddwev.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_B, vd, vj, vk)); +} + +/* Emits the `vaddwev.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_H, vd, vj, vk)); +} + +/* Emits the `vaddwev.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_W, vd, vj, vk)); +} + +/* Emits the `vaddwev.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_D, vd, vj, vk)); +} + +/* Emits the `vsubwev.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_H_B, vd, vj, vk)); +} + +/* Emits the `vsubwev.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_W_H, vd, vj, vk)); +} + +/* Emits the `vsubwev.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_D_W, vd, vj, vk)); +} + +/* Emits the `vsubwev.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_Q_D, vd, vj, vk)); +} + +/* Emits the `vaddwod.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_B, vd, vj, vk)); +} + +/* Emits the `vaddwod.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_H, vd, vj, vk)); +} + +/* Emits the `vaddwod.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_W, vd, vj, vk)); +} + +/* Emits the `vaddwod.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_D, vd, vj, vk)); +} + +/* Emits the `vsubwod.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_H_B, vd, vj, vk)); +} + +/* Emits the `vsubwod.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_W_H, vd, vj, vk)); +} + +/* Emits the `vsubwod.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_D_W, vd, vj, vk)); +} + +/* Emits the `vsubwod.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_Q_D, vd, vj, vk)); +} + +/* Emits the `vaddwev.h.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_BU, vd, vj, vk)); +} + +/* Emits the `vaddwev.w.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_HU, vd, vj, vk)); +} + +/* Emits the `vaddwev.d.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_WU, vd, vj, vk)); +} + +/* Emits the `vaddwev.q.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_DU, vd, vj, vk)); +} + +/* Emits the `vsubwev.h.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_H_BU, vd, vj, vk)); +} + +/* Emits the `vsubwev.w.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_W_HU, vd, vj, vk)); +} + +/* Emits the `vsubwev.d.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_D_WU, vd, vj, vk)); +} + +/* Emits the `vsubwev.q.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_Q_DU, vd, vj, vk)); +} + +/* Emits the `vaddwod.h.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_BU, vd, vj, vk)); +} + +/* Emits the `vaddwod.w.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_HU, vd, vj, vk)); +} + +/* Emits the `vaddwod.d.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_WU, vd, vj, vk)); +} + +/* Emits the `vaddwod.q.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_DU, vd, vj, vk)); +} + +/* Emits the `vsubwod.h.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_H_BU, vd, vj, vk)); +} + +/* Emits the `vsubwod.w.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_W_HU, vd, vj, vk)); +} + +/* Emits the `vsubwod.d.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_D_WU, vd, vj, vk)); +} + +/* Emits the `vsubwod.q.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_Q_DU, vd, vj, vk)); +} + +/* Emits the `vaddwev.h.bu.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_BU_B, vd, vj, vk)); +} + +/* Emits the `vaddwev.w.hu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_HU_H, vd, vj, vk)); +} + +/* Emits the `vaddwev.d.wu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_WU_W, vd, vj, vk)); +} + +/* Emits the `vaddwev.q.du.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_DU_D, vd, vj, vk)); +} + +/* Emits the `vaddwod.h.bu.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_BU_B, vd, vj, vk)); +} + +/* Emits the `vaddwod.w.hu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_HU_H, vd, vj, vk)); +} + +/* Emits the `vaddwod.d.wu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_WU_W, vd, vj, vk)); +} + +/* Emits the `vaddwod.q.du.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_DU_D, vd, vj, vk)); +} + +/* Emits the `vsadd.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_B, vd, vj, vk)); +} + +/* Emits the `vsadd.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_H, vd, vj, vk)); +} + +/* Emits the `vsadd.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_W, vd, vj, vk)); +} + +/* Emits the `vsadd.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_D, vd, vj, vk)); +} + +/* Emits the `vssub.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_B, vd, vj, vk)); +} + +/* Emits the `vssub.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_H, vd, vj, vk)); +} + +/* Emits the `vssub.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_W, vd, vj, vk)); +} + +/* Emits the `vssub.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_D, vd, vj, vk)); +} + +/* Emits the `vsadd.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsadd_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_BU, vd, vj, vk)); +} + +/* Emits the `vsadd.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsadd_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_HU, vd, vj, vk)); +} + +/* Emits the `vsadd.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsadd_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_WU, vd, vj, vk)); +} + +/* Emits the `vsadd.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsadd_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_DU, vd, vj, vk)); +} + +/* Emits the `vssub.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssub_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_BU, vd, vj, vk)); +} + +/* Emits the `vssub.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssub_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_HU, vd, vj, vk)); +} + +/* Emits the `vssub.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssub_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_WU, vd, vj, vk)); +} + +/* Emits the `vssub.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssub_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_DU, vd, vj, vk)); +} + +/* Emits the `vhaddw.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhaddw_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_H_B, vd, vj, vk)); +} + +/* Emits the `vhaddw.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhaddw_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_W_H, vd, vj, vk)); +} + +/* Emits the `vhaddw.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhaddw_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_D_W, vd, vj, vk)); +} + +/* Emits the `vhaddw.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhaddw_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_Q_D, vd, vj, vk)); +} + +/* Emits the `vhsubw.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhsubw_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_H_B, vd, vj, vk)); +} + +/* Emits the `vhsubw.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhsubw_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_W_H, vd, vj, vk)); +} + +/* Emits the `vhsubw.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhsubw_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_D_W, vd, vj, vk)); +} + +/* Emits the `vhsubw.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhsubw_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_Q_D, vd, vj, vk)); +} + +/* Emits the `vhaddw.hu.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhaddw_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_HU_BU, vd, vj, vk)); +} + +/* Emits the `vhaddw.wu.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhaddw_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_WU_HU, vd, vj, vk)); +} + +/* Emits the `vhaddw.du.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhaddw_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_DU_WU, vd, vj, vk)); +} + +/* Emits the `vhaddw.qu.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhaddw_qu_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_QU_DU, vd, vj, vk)); +} + +/* Emits the `vhsubw.hu.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhsubw_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_HU_BU, vd, vj, vk)); +} + +/* Emits the `vhsubw.wu.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhsubw_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_WU_HU, vd, vj, vk)); +} + +/* Emits the `vhsubw.du.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhsubw_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_DU_WU, vd, vj, vk)); +} + +/* Emits the `vhsubw.qu.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vhsubw_qu_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_QU_DU, vd, vj, vk)); +} + +/* Emits the `vadda.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadda_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_B, vd, vj, vk)); +} + +/* Emits the `vadda.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadda_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_H, vd, vj, vk)); +} + +/* Emits the `vadda.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadda_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_W, vd, vj, vk)); +} + +/* Emits the `vadda.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadda_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_D, vd, vj, vk)); +} + +/* Emits the `vabsd.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vabsd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_B, vd, vj, vk)); +} + +/* Emits the `vabsd.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vabsd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_H, vd, vj, vk)); +} + +/* Emits the `vabsd.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vabsd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_W, vd, vj, vk)); +} + +/* Emits the `vabsd.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vabsd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_D, vd, vj, vk)); +} + +/* Emits the `vabsd.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vabsd_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_BU, vd, vj, vk)); +} + +/* Emits the `vabsd.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vabsd_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_HU, vd, vj, vk)); +} + +/* Emits the `vabsd.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vabsd_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_WU, vd, vj, vk)); +} + +/* Emits the `vabsd.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vabsd_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_DU, vd, vj, vk)); +} + +/* Emits the `vavg.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavg_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_B, vd, vj, vk)); +} + +/* Emits the `vavg.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavg_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_H, vd, vj, vk)); +} + +/* Emits the `vavg.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavg_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_W, vd, vj, vk)); +} + +/* Emits the `vavg.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavg_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_D, vd, vj, vk)); +} + +/* Emits the `vavg.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavg_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_BU, vd, vj, vk)); +} + +/* Emits the `vavg.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavg_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_HU, vd, vj, vk)); +} + +/* Emits the `vavg.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavg_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_WU, vd, vj, vk)); +} + +/* Emits the `vavg.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavg_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_DU, vd, vj, vk)); +} + +/* Emits the `vavgr.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavgr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_B, vd, vj, vk)); +} + +/* Emits the `vavgr.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavgr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_H, vd, vj, vk)); +} + +/* Emits the `vavgr.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavgr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_W, vd, vj, vk)); +} + +/* Emits the `vavgr.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavgr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_D, vd, vj, vk)); +} + +/* Emits the `vavgr.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavgr_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_BU, vd, vj, vk)); +} + +/* Emits the `vavgr.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavgr_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_HU, vd, vj, vk)); +} + +/* Emits the `vavgr.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavgr_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_WU, vd, vj, vk)); +} + +/* Emits the `vavgr.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vavgr_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_DU, vd, vj, vk)); +} + +/* Emits the `vmax.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmax_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_B, vd, vj, vk)); +} + +/* Emits the `vmax.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmax_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_H, vd, vj, vk)); +} + +/* Emits the `vmax.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmax_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_W, vd, vj, vk)); +} + +/* Emits the `vmax.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmax_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_D, vd, vj, vk)); +} + +/* Emits the `vmin.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmin_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_B, vd, vj, vk)); +} + +/* Emits the `vmin.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmin_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_H, vd, vj, vk)); +} + +/* Emits the `vmin.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmin_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_W, vd, vj, vk)); +} + +/* Emits the `vmin.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmin_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_D, vd, vj, vk)); +} + +/* Emits the `vmax.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmax_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_BU, vd, vj, vk)); +} + +/* Emits the `vmax.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmax_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_HU, vd, vj, vk)); +} + +/* Emits the `vmax.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmax_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_WU, vd, vj, vk)); +} + +/* Emits the `vmax.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmax_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_DU, vd, vj, vk)); +} + +/* Emits the `vmin.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmin_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_BU, vd, vj, vk)); +} + +/* Emits the `vmin.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmin_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_HU, vd, vj, vk)); +} + +/* Emits the `vmin.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmin_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_WU, vd, vj, vk)); +} + +/* Emits the `vmin.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmin_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_DU, vd, vj, vk)); +} + +/* Emits the `vmul.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmul_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_B, vd, vj, vk)); +} + +/* Emits the `vmul.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmul_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_H, vd, vj, vk)); +} + +/* Emits the `vmul.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmul_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_W, vd, vj, vk)); +} + +/* Emits the `vmul.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmul_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_D, vd, vj, vk)); +} + +/* Emits the `vmuh.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmuh_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_B, vd, vj, vk)); +} + +/* Emits the `vmuh.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmuh_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_H, vd, vj, vk)); +} + +/* Emits the `vmuh.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmuh_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_W, vd, vj, vk)); +} + +/* Emits the `vmuh.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmuh_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_D, vd, vj, vk)); +} + +/* Emits the `vmuh.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmuh_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_BU, vd, vj, vk)); +} + +/* Emits the `vmuh.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmuh_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_HU, vd, vj, vk)); +} + +/* Emits the `vmuh.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmuh_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_WU, vd, vj, vk)); +} + +/* Emits the `vmuh.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmuh_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_DU, vd, vj, vk)); +} + +/* Emits the `vmulwev.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_B, vd, vj, vk)); +} + +/* Emits the `vmulwev.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_H, vd, vj, vk)); +} + +/* Emits the `vmulwev.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_W, vd, vj, vk)); +} + +/* Emits the `vmulwev.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_D, vd, vj, vk)); +} + +/* Emits the `vmulwod.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_B, vd, vj, vk)); +} + +/* Emits the `vmulwod.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_H, vd, vj, vk)); +} + +/* Emits the `vmulwod.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_W, vd, vj, vk)); +} + +/* Emits the `vmulwod.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_D, vd, vj, vk)); +} + +/* Emits the `vmulwev.h.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_BU, vd, vj, vk)); +} + +/* Emits the `vmulwev.w.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_HU, vd, vj, vk)); +} + +/* Emits the `vmulwev.d.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_WU, vd, vj, vk)); +} + +/* Emits the `vmulwev.q.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_DU, vd, vj, vk)); +} + +/* Emits the `vmulwod.h.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_BU, vd, vj, vk)); +} + +/* Emits the `vmulwod.w.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_HU, vd, vj, vk)); +} + +/* Emits the `vmulwod.d.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_WU, vd, vj, vk)); +} + +/* Emits the `vmulwod.q.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_DU, vd, vj, vk)); +} + +/* Emits the `vmulwev.h.bu.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_BU_B, vd, vj, vk)); +} + +/* Emits the `vmulwev.w.hu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_HU_H, vd, vj, vk)); +} + +/* Emits the `vmulwev.d.wu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_WU_W, vd, vj, vk)); +} + +/* Emits the `vmulwev.q.du.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_DU_D, vd, vj, vk)); +} + +/* Emits the `vmulwod.h.bu.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_BU_B, vd, vj, vk)); +} + +/* Emits the `vmulwod.w.hu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_HU_H, vd, vj, vk)); +} + +/* Emits the `vmulwod.d.wu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_WU_W, vd, vj, vk)); +} + +/* Emits the `vmulwod.q.du.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmulwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_DU_D, vd, vj, vk)); +} + +/* Emits the `vmadd.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_B, vd, vj, vk)); +} + +/* Emits the `vmadd.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_H, vd, vj, vk)); +} + +/* Emits the `vmadd.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_W, vd, vj, vk)); +} + +/* Emits the `vmadd.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_D, vd, vj, vk)); +} + +/* Emits the `vmsub.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmsub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_B, vd, vj, vk)); +} + +/* Emits the `vmsub.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmsub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_H, vd, vj, vk)); +} + +/* Emits the `vmsub.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmsub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_W, vd, vj, vk)); +} + +/* Emits the `vmsub.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_D, vd, vj, vk)); +} + +/* Emits the `vmaddwev.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_B, vd, vj, vk)); +} + +/* Emits the `vmaddwev.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_H, vd, vj, vk)); +} + +/* Emits the `vmaddwev.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_W, vd, vj, vk)); +} + +/* Emits the `vmaddwev.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_D, vd, vj, vk)); +} + +/* Emits the `vmaddwod.h.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_B, vd, vj, vk)); +} + +/* Emits the `vmaddwod.w.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_H, vd, vj, vk)); +} + +/* Emits the `vmaddwod.d.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_W, vd, vj, vk)); +} + +/* Emits the `vmaddwod.q.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_D, vd, vj, vk)); +} + +/* Emits the `vmaddwev.h.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_BU, vd, vj, vk)); +} + +/* Emits the `vmaddwev.w.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_HU, vd, vj, vk)); +} + +/* Emits the `vmaddwev.d.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_WU, vd, vj, vk)); +} + +/* Emits the `vmaddwev.q.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_DU, vd, vj, vk)); +} + +/* Emits the `vmaddwod.h.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_BU, vd, vj, vk)); +} + +/* Emits the `vmaddwod.w.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_HU, vd, vj, vk)); +} + +/* Emits the `vmaddwod.d.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_WU, vd, vj, vk)); +} + +/* Emits the `vmaddwod.q.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_DU, vd, vj, vk)); +} + +/* Emits the `vmaddwev.h.bu.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_BU_B, vd, vj, vk)); +} + +/* Emits the `vmaddwev.w.hu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_HU_H, vd, vj, vk)); +} + +/* Emits the `vmaddwev.d.wu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_WU_W, vd, vj, vk)); +} + +/* Emits the `vmaddwev.q.du.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_DU_D, vd, vj, vk)); +} + +/* Emits the `vmaddwod.h.bu.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_BU_B, vd, vj, vk)); +} + +/* Emits the `vmaddwod.w.hu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_HU_H, vd, vj, vk)); +} + +/* Emits the `vmaddwod.d.wu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_WU_W, vd, vj, vk)); +} + +/* Emits the `vmaddwod.q.du.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaddwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_DU_D, vd, vj, vk)); +} + +/* Emits the `vdiv.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vdiv_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_B, vd, vj, vk)); +} + +/* Emits the `vdiv.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vdiv_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_H, vd, vj, vk)); +} + +/* Emits the `vdiv.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vdiv_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_W, vd, vj, vk)); +} + +/* Emits the `vdiv.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vdiv_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_D, vd, vj, vk)); +} + +/* Emits the `vmod.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_B, vd, vj, vk)); +} + +/* Emits the `vmod.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_H, vd, vj, vk)); +} + +/* Emits the `vmod.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_W, vd, vj, vk)); +} + +/* Emits the `vmod.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_D, vd, vj, vk)); +} + +/* Emits the `vdiv.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vdiv_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_BU, vd, vj, vk)); +} + +/* Emits the `vdiv.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vdiv_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_HU, vd, vj, vk)); +} + +/* Emits the `vdiv.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vdiv_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_WU, vd, vj, vk)); +} + +/* Emits the `vdiv.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vdiv_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_DU, vd, vj, vk)); +} + +/* Emits the `vmod.bu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmod_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_BU, vd, vj, vk)); +} + +/* Emits the `vmod.hu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmod_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_HU, vd, vj, vk)); +} + +/* Emits the `vmod.wu vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmod_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_WU, vd, vj, vk)); +} + +/* Emits the `vmod.du vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmod_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_DU, vd, vj, vk)); +} + +/* Emits the `vsll.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsll_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_B, vd, vj, vk)); +} + +/* Emits the `vsll.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsll_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_H, vd, vj, vk)); +} + +/* Emits the `vsll.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsll_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_W, vd, vj, vk)); +} + +/* Emits the `vsll.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsll_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_D, vd, vj, vk)); +} + +/* Emits the `vsrl.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrl_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_B, vd, vj, vk)); +} + +/* Emits the `vsrl.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrl_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_H, vd, vj, vk)); +} + +/* Emits the `vsrl.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrl_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_W, vd, vj, vk)); +} + +/* Emits the `vsrl.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrl_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_D, vd, vj, vk)); +} + +/* Emits the `vsra.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsra_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_B, vd, vj, vk)); +} + +/* Emits the `vsra.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsra_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_H, vd, vj, vk)); +} + +/* Emits the `vsra.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsra_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_W, vd, vj, vk)); +} + +/* Emits the `vsra.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsra_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_D, vd, vj, vk)); +} + +/* Emits the `vrotr.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vrotr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_B, vd, vj, vk)); +} + +/* Emits the `vrotr.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vrotr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_H, vd, vj, vk)); +} + +/* Emits the `vrotr.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vrotr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_W, vd, vj, vk)); +} + +/* Emits the `vrotr.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vrotr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_D, vd, vj, vk)); +} + +/* Emits the `vsrlr.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_B, vd, vj, vk)); +} + +/* Emits the `vsrlr.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_H, vd, vj, vk)); +} + +/* Emits the `vsrlr.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_W, vd, vj, vk)); +} + +/* Emits the `vsrlr.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_D, vd, vj, vk)); +} + +/* Emits the `vsrar.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrar_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_B, vd, vj, vk)); +} + +/* Emits the `vsrar.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrar_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_H, vd, vj, vk)); +} + +/* Emits the `vsrar.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrar_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_W, vd, vj, vk)); +} + +/* Emits the `vsrar.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrar_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_D, vd, vj, vk)); +} + +/* Emits the `vsrln.b.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrln_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_B_H, vd, vj, vk)); +} + +/* Emits the `vsrln.h.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrln_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_H_W, vd, vj, vk)); +} + +/* Emits the `vsrln.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrln_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_W_D, vd, vj, vk)); +} + +/* Emits the `vsran.b.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsran_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_B_H, vd, vj, vk)); +} + +/* Emits the `vsran.h.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsran_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_H_W, vd, vj, vk)); +} + +/* Emits the `vsran.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsran_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_W_D, vd, vj, vk)); +} + +/* Emits the `vsrlrn.b.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlrn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_B_H, vd, vj, vk)); +} + +/* Emits the `vsrlrn.h.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlrn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_H_W, vd, vj, vk)); +} + +/* Emits the `vsrlrn.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlrn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_W_D, vd, vj, vk)); +} + +/* Emits the `vsrarn.b.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrarn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_B_H, vd, vj, vk)); +} + +/* Emits the `vsrarn.h.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrarn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_H_W, vd, vj, vk)); +} + +/* Emits the `vsrarn.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrarn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_W_D, vd, vj, vk)); +} + +/* Emits the `vssrln.b.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrln_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_B_H, vd, vj, vk)); +} + +/* Emits the `vssrln.h.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrln_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_H_W, vd, vj, vk)); +} + +/* Emits the `vssrln.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrln_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_W_D, vd, vj, vk)); +} + +/* Emits the `vssran.b.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssran_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_B_H, vd, vj, vk)); +} + +/* Emits the `vssran.h.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssran_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_H_W, vd, vj, vk)); +} + +/* Emits the `vssran.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssran_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_W_D, vd, vj, vk)); +} + +/* Emits the `vssrlrn.b.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_B_H, vd, vj, vk)); +} + +/* Emits the `vssrlrn.h.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_H_W, vd, vj, vk)); +} + +/* Emits the `vssrlrn.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_W_D, vd, vj, vk)); +} + +/* Emits the `vssrarn.b.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_B_H, vd, vj, vk)); +} + +/* Emits the `vssrarn.h.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_H_W, vd, vj, vk)); +} + +/* Emits the `vssrarn.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_W_D, vd, vj, vk)); +} + +/* Emits the `vssrln.bu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrln_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_BU_H, vd, vj, vk)); +} + +/* Emits the `vssrln.hu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrln_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_HU_W, vd, vj, vk)); +} + +/* Emits the `vssrln.wu.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrln_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_WU_D, vd, vj, vk)); +} + +/* Emits the `vssran.bu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssran_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_BU_H, vd, vj, vk)); +} + +/* Emits the `vssran.hu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssran_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_HU_W, vd, vj, vk)); +} + +/* Emits the `vssran.wu.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssran_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_WU_D, vd, vj, vk)); +} + +/* Emits the `vssrlrn.bu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrn_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_BU_H, vd, vj, vk)); +} + +/* Emits the `vssrlrn.hu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrn_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_HU_W, vd, vj, vk)); +} + +/* Emits the `vssrlrn.wu.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrn_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_WU_D, vd, vj, vk)); +} + +/* Emits the `vssrarn.bu.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarn_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_BU_H, vd, vj, vk)); +} + +/* Emits the `vssrarn.hu.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarn_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_HU_W, vd, vj, vk)); +} + +/* Emits the `vssrarn.wu.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarn_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_WU_D, vd, vj, vk)); +} + +/* Emits the `vbitclr.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitclr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_B, vd, vj, vk)); +} + +/* Emits the `vbitclr.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitclr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_H, vd, vj, vk)); +} + +/* Emits the `vbitclr.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitclr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_W, vd, vj, vk)); +} + +/* Emits the `vbitclr.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitclr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_D, vd, vj, vk)); +} + +/* Emits the `vbitset.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitset_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_B, vd, vj, vk)); +} + +/* Emits the `vbitset.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitset_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_H, vd, vj, vk)); +} + +/* Emits the `vbitset.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitset_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_W, vd, vj, vk)); +} + +/* Emits the `vbitset.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitset_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_D, vd, vj, vk)); +} + +/* Emits the `vbitrev.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitrev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_B, vd, vj, vk)); +} + +/* Emits the `vbitrev.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitrev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_H, vd, vj, vk)); +} + +/* Emits the `vbitrev.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitrev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_W, vd, vj, vk)); +} + +/* Emits the `vbitrev.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitrev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_D, vd, vj, vk)); +} + +/* Emits the `vpackev.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpackev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_B, vd, vj, vk)); +} + +/* Emits the `vpackev.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpackev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_H, vd, vj, vk)); +} + +/* Emits the `vpackev.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpackev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_W, vd, vj, vk)); +} + +/* Emits the `vpackev.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpackev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_D, vd, vj, vk)); +} + +/* Emits the `vpackod.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpackod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_B, vd, vj, vk)); +} + +/* Emits the `vpackod.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpackod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_H, vd, vj, vk)); +} + +/* Emits the `vpackod.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpackod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_W, vd, vj, vk)); +} + +/* Emits the `vpackod.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpackod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_D, vd, vj, vk)); +} + +/* Emits the `vilvl.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vilvl_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_B, vd, vj, vk)); +} + +/* Emits the `vilvl.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vilvl_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_H, vd, vj, vk)); +} + +/* Emits the `vilvl.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vilvl_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_W, vd, vj, vk)); +} + +/* Emits the `vilvl.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vilvl_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_D, vd, vj, vk)); +} + +/* Emits the `vilvh.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vilvh_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_B, vd, vj, vk)); +} + +/* Emits the `vilvh.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vilvh_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_H, vd, vj, vk)); +} + +/* Emits the `vilvh.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vilvh_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_W, vd, vj, vk)); +} + +/* Emits the `vilvh.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vilvh_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_D, vd, vj, vk)); +} + +/* Emits the `vpickev.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_B, vd, vj, vk)); +} + +/* Emits the `vpickev.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_H, vd, vj, vk)); +} + +/* Emits the `vpickev.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_W, vd, vj, vk)); +} + +/* Emits the `vpickev.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_D, vd, vj, vk)); +} + +/* Emits the `vpickod.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_B, vd, vj, vk)); +} + +/* Emits the `vpickod.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_H, vd, vj, vk)); +} + +/* Emits the `vpickod.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_W, vd, vj, vk)); +} + +/* Emits the `vpickod.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_D, vd, vj, vk)); +} + +/* Emits the `vreplve.b vd, vj, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplve_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k) +{ + tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_B, vd, vj, k)); +} + +/* Emits the `vreplve.h vd, vj, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplve_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k) +{ + tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_H, vd, vj, k)); +} + +/* Emits the `vreplve.w vd, vj, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplve_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k) +{ + tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_W, vd, vj, k)); +} + +/* Emits the `vreplve.d vd, vj, k` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplve_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k) +{ + tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_D, vd, vj, k)); +} + +/* Emits the `vand.v vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vand_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VAND_V, vd, vj, vk)); +} + +/* Emits the `vor.v vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VOR_V, vd, vj, vk)); +} + +/* Emits the `vxor.v vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vxor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VXOR_V, vd, vj, vk)); +} + +/* Emits the `vnor.v vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vnor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VNOR_V, vd, vj, vk)); +} + +/* Emits the `vandn.v vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vandn_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VANDN_V, vd, vj, vk)); +} + +/* Emits the `vorn.v vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vorn_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VORN_V, vd, vj, vk)); +} + +/* Emits the `vfrstp.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrstp_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFRSTP_B, vd, vj, vk)); +} + +/* Emits the `vfrstp.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrstp_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFRSTP_H, vd, vj, vk)); +} + +/* Emits the `vadd.q vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vadd_q(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_Q, vd, vj, vk)); +} + +/* Emits the `vsub.q vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsub_q(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_Q, vd, vj, vk)); +} + +/* Emits the `vsigncov.b vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsigncov_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_B, vd, vj, vk)); +} + +/* Emits the `vsigncov.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsigncov_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_H, vd, vj, vk)); +} + +/* Emits the `vsigncov.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsigncov_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_W, vd, vj, vk)); +} + +/* Emits the `vsigncov.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsigncov_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_D, vd, vj, vk)); +} + +/* Emits the `vfadd.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFADD_S, vd, vj, vk)); +} + +/* Emits the `vfadd.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFADD_D, vd, vj, vk)); +} + +/* Emits the `vfsub.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFSUB_S, vd, vj, vk)); +} + +/* Emits the `vfsub.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFSUB_D, vd, vj, vk)); +} + +/* Emits the `vfmul.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmul_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMUL_S, vd, vj, vk)); +} + +/* Emits the `vfmul.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmul_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMUL_D, vd, vj, vk)); +} + +/* Emits the `vfdiv.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfdiv_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFDIV_S, vd, vj, vk)); +} + +/* Emits the `vfdiv.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfdiv_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFDIV_D, vd, vj, vk)); +} + +/* Emits the `vfmax.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmax_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAX_S, vd, vj, vk)); +} + +/* Emits the `vfmax.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmax_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAX_D, vd, vj, vk)); +} + +/* Emits the `vfmin.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmin_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMIN_S, vd, vj, vk)); +} + +/* Emits the `vfmin.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmin_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMIN_D, vd, vj, vk)); +} + +/* Emits the `vfmaxa.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmaxa_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAXA_S, vd, vj, vk)); +} + +/* Emits the `vfmaxa.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmaxa_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAXA_D, vd, vj, vk)); +} + +/* Emits the `vfmina.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmina_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMINA_S, vd, vj, vk)); +} + +/* Emits the `vfmina.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfmina_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFMINA_D, vd, vj, vk)); +} + +/* Emits the `vfcvt.h.s vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcvt_h_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCVT_H_S, vd, vj, vk)); +} + +/* Emits the `vfcvt.s.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcvt_s_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFCVT_S_D, vd, vj, vk)); +} + +/* Emits the `vffint.s.l vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vffint_s_l(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFFINT_S_L, vd, vj, vk)); +} + +/* Emits the `vftint.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftint_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINT_W_D, vd, vj, vk)); +} + +/* Emits the `vftintrm.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrm_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRM_W_D, vd, vj, vk)); +} + +/* Emits the `vftintrp.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrp_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRP_W_D, vd, vj, vk)); +} + +/* Emits the `vftintrz.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrz_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRZ_W_D, vd, vj, vk)); +} + +/* Emits the `vftintrne.w.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrne_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRNE_W_D, vd, vj, vk)); +} + +/* Emits the `vshuf.h vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vshuf_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_H, vd, vj, vk)); +} + +/* Emits the `vshuf.w vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vshuf_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_W, vd, vj, vk)); +} + +/* Emits the `vshuf.d vd, vj, vk` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vshuf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk) +{ + tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_D, vd, vj, vk)); +} + +/* Emits the `vseqi.b vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseqi_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_B, vd, vj, sk5)); +} + +/* Emits the `vseqi.h vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseqi_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_H, vd, vj, sk5)); +} + +/* Emits the `vseqi.w vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseqi_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_W, vd, vj, sk5)); +} + +/* Emits the `vseqi.d vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseqi_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_D, vd, vj, sk5)); +} + +/* Emits the `vslei.b vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslei_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_B, vd, vj, sk5)); +} + +/* Emits the `vslei.h vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslei_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_H, vd, vj, sk5)); +} + +/* Emits the `vslei.w vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslei_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_W, vd, vj, sk5)); +} + +/* Emits the `vslei.d vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslei_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_D, vd, vj, sk5)); +} + +/* Emits the `vslei.bu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslei_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_BU, vd, vj, uk5)); +} + +/* Emits the `vslei.hu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslei_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_HU, vd, vj, uk5)); +} + +/* Emits the `vslei.wu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslei_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_WU, vd, vj, uk5)); +} + +/* Emits the `vslei.du vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslei_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_DU, vd, vj, uk5)); +} + +/* Emits the `vslti.b vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslti_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_B, vd, vj, sk5)); +} + +/* Emits the `vslti.h vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslti_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_H, vd, vj, sk5)); +} + +/* Emits the `vslti.w vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslti_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_W, vd, vj, sk5)); +} + +/* Emits the `vslti.d vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslti_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_D, vd, vj, sk5)); +} + +/* Emits the `vslti.bu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslti_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_BU, vd, vj, uk5)); +} + +/* Emits the `vslti.hu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslti_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_HU, vd, vj, uk5)); +} + +/* Emits the `vslti.wu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslti_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_WU, vd, vj, uk5)); +} + +/* Emits the `vslti.du vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslti_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_DU, vd, vj, uk5)); +} + +/* Emits the `vaddi.bu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_BU, vd, vj, uk5)); +} + +/* Emits the `vaddi.hu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_HU, vd, vj, uk5)); +} + +/* Emits the `vaddi.wu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_WU, vd, vj, uk5)); +} + +/* Emits the `vaddi.du vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vaddi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_DU, vd, vj, uk5)); +} + +/* Emits the `vsubi.bu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_BU, vd, vj, uk5)); +} + +/* Emits the `vsubi.hu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_HU, vd, vj, uk5)); +} + +/* Emits the `vsubi.wu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_WU, vd, vj, uk5)); +} + +/* Emits the `vsubi.du vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsubi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_DU, vd, vj, uk5)); +} + +/* Emits the `vbsll.v vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbsll_v(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VBSLL_V, vd, vj, uk5)); +} + +/* Emits the `vbsrl.v vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbsrl_v(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VBSRL_V, vd, vj, uk5)); +} + +/* Emits the `vmaxi.b vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaxi_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_B, vd, vj, sk5)); +} + +/* Emits the `vmaxi.h vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaxi_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_H, vd, vj, sk5)); +} + +/* Emits the `vmaxi.w vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaxi_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_W, vd, vj, sk5)); +} + +/* Emits the `vmaxi.d vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaxi_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_D, vd, vj, sk5)); +} + +/* Emits the `vmini.b vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmini_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_B, vd, vj, sk5)); +} + +/* Emits the `vmini.h vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmini_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_H, vd, vj, sk5)); +} + +/* Emits the `vmini.w vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmini_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_W, vd, vj, sk5)); +} + +/* Emits the `vmini.d vd, vj, sk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmini_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5) +{ + tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_D, vd, vj, sk5)); +} + +/* Emits the `vmaxi.bu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaxi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_BU, vd, vj, uk5)); +} + +/* Emits the `vmaxi.hu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaxi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_HU, vd, vj, uk5)); +} + +/* Emits the `vmaxi.wu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaxi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_WU, vd, vj, uk5)); +} + +/* Emits the `vmaxi.du vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmaxi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_DU, vd, vj, uk5)); +} + +/* Emits the `vmini.bu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmini_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_BU, vd, vj, uk5)); +} + +/* Emits the `vmini.hu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmini_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_HU, vd, vj, uk5)); +} + +/* Emits the `vmini.wu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmini_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_WU, vd, vj, uk5)); +} + +/* Emits the `vmini.du vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmini_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_DU, vd, vj, uk5)); +} + +/* Emits the `vfrstpi.b vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrstpi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VFRSTPI_B, vd, vj, uk5)); +} + +/* Emits the `vfrstpi.h vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrstpi_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VFRSTPI_H, vd, vj, uk5)); +} + +/* Emits the `vclo.b vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vclo_b(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VCLO_B, vd, vj)); +} + +/* Emits the `vclo.h vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vclo_h(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VCLO_H, vd, vj)); +} + +/* Emits the `vclo.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vclo_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VCLO_W, vd, vj)); +} + +/* Emits the `vclo.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vclo_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VCLO_D, vd, vj)); +} + +/* Emits the `vclz.b vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vclz_b(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_B, vd, vj)); +} + +/* Emits the `vclz.h vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vclz_h(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_H, vd, vj)); +} + +/* Emits the `vclz.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vclz_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_W, vd, vj)); +} + +/* Emits the `vclz.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vclz_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_D, vd, vj)); +} + +/* Emits the `vpcnt.b vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpcnt_b(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_B, vd, vj)); +} + +/* Emits the `vpcnt.h vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpcnt_h(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_H, vd, vj)); +} + +/* Emits the `vpcnt.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpcnt_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_W, vd, vj)); +} + +/* Emits the `vpcnt.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpcnt_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_D, vd, vj)); +} + +/* Emits the `vneg.b vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vneg_b(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VNEG_B, vd, vj)); +} + +/* Emits the `vneg.h vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vneg_h(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VNEG_H, vd, vj)); +} + +/* Emits the `vneg.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vneg_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VNEG_W, vd, vj)); +} + +/* Emits the `vneg.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vneg_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VNEG_D, vd, vj)); +} + +/* Emits the `vmskltz.b vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmskltz_b(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_B, vd, vj)); +} + +/* Emits the `vmskltz.h vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmskltz_h(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_H, vd, vj)); +} + +/* Emits the `vmskltz.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmskltz_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_W, vd, vj)); +} + +/* Emits the `vmskltz.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmskltz_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_D, vd, vj)); +} + +/* Emits the `vmskgez.b vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmskgez_b(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VMSKGEZ_B, vd, vj)); +} + +/* Emits the `vmsknz.b vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vmsknz_b(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VMSKNZ_B, vd, vj)); +} + +/* Emits the `vseteqz.v cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vseteqz_v(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETEQZ_V, cd, vj)); +} + +/* Emits the `vsetnez.v cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetnez_v(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETNEZ_V, cd, vj)); +} + +/* Emits the `vsetanyeqz.b cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetanyeqz_b(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_B, cd, vj)); +} + +/* Emits the `vsetanyeqz.h cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetanyeqz_h(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_H, cd, vj)); +} + +/* Emits the `vsetanyeqz.w cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetanyeqz_w(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_W, cd, vj)); +} + +/* Emits the `vsetanyeqz.d cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetanyeqz_d(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_D, cd, vj)); +} + +/* Emits the `vsetallnez.b cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetallnez_b(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_B, cd, vj)); +} + +/* Emits the `vsetallnez.h cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetallnez_h(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_H, cd, vj)); +} + +/* Emits the `vsetallnez.w cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetallnez_w(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_W, cd, vj)); +} + +/* Emits the `vsetallnez.d cd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsetallnez_d(TCGContext *s, TCGReg cd, TCGReg vj) +{ + tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_D, cd, vj)); +} + +/* Emits the `vflogb.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vflogb_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFLOGB_S, vd, vj)); +} + +/* Emits the `vflogb.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vflogb_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFLOGB_D, vd, vj)); +} + +/* Emits the `vfclass.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfclass_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFCLASS_S, vd, vj)); +} + +/* Emits the `vfclass.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfclass_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFCLASS_D, vd, vj)); +} + +/* Emits the `vfsqrt.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfsqrt_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFSQRT_S, vd, vj)); +} + +/* Emits the `vfsqrt.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfsqrt_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFSQRT_D, vd, vj)); +} + +/* Emits the `vfrecip.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrecip_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRECIP_S, vd, vj)); +} + +/* Emits the `vfrecip.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrecip_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRECIP_D, vd, vj)); +} + +/* Emits the `vfrsqrt.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrsqrt_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRSQRT_S, vd, vj)); +} + +/* Emits the `vfrsqrt.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrsqrt_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRSQRT_D, vd, vj)); +} + +/* Emits the `vfrint.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrint_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINT_S, vd, vj)); +} + +/* Emits the `vfrint.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrint_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINT_D, vd, vj)); +} + +/* Emits the `vfrintrm.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrintrm_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRM_S, vd, vj)); +} + +/* Emits the `vfrintrm.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrintrm_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRM_D, vd, vj)); +} + +/* Emits the `vfrintrp.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrintrp_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRP_S, vd, vj)); +} + +/* Emits the `vfrintrp.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrintrp_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRP_D, vd, vj)); +} + +/* Emits the `vfrintrz.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrintrz_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRZ_S, vd, vj)); +} + +/* Emits the `vfrintrz.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrintrz_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRZ_D, vd, vj)); +} + +/* Emits the `vfrintrne.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrintrne_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRNE_S, vd, vj)); +} + +/* Emits the `vfrintrne.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfrintrne_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRNE_D, vd, vj)); +} + +/* Emits the `vfcvtl.s.h vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcvtl_s_h(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFCVTL_S_H, vd, vj)); +} + +/* Emits the `vfcvth.s.h vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcvth_s_h(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFCVTH_S_H, vd, vj)); +} + +/* Emits the `vfcvtl.d.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcvtl_d_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFCVTL_D_S, vd, vj)); +} + +/* Emits the `vfcvth.d.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vfcvth_d_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFCVTH_D_S, vd, vj)); +} + +/* Emits the `vffint.s.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vffint_s_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_S_W, vd, vj)); +} + +/* Emits the `vffint.s.wu vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vffint_s_wu(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_S_WU, vd, vj)); +} + +/* Emits the `vffint.d.l vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vffint_d_l(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_D_L, vd, vj)); +} + +/* Emits the `vffint.d.lu vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vffint_d_lu(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_D_LU, vd, vj)); +} + +/* Emits the `vffintl.d.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vffintl_d_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFFINTL_D_W, vd, vj)); +} + +/* Emits the `vffinth.d.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vffinth_d_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFFINTH_D_W, vd, vj)); +} + +/* Emits the `vftint.w.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftint_w_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_W_S, vd, vj)); +} + +/* Emits the `vftint.l.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftint_l_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_L_D, vd, vj)); +} + +/* Emits the `vftintrm.w.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrm_w_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRM_W_S, vd, vj)); +} + +/* Emits the `vftintrm.l.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrm_l_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRM_L_D, vd, vj)); +} + +/* Emits the `vftintrp.w.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrp_w_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRP_W_S, vd, vj)); +} + +/* Emits the `vftintrp.l.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrp_l_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRP_L_D, vd, vj)); +} + +/* Emits the `vftintrz.w.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrz_w_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_W_S, vd, vj)); +} + +/* Emits the `vftintrz.l.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrz_l_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_L_D, vd, vj)); +} + +/* Emits the `vftintrne.w.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrne_w_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNE_W_S, vd, vj)); +} + +/* Emits the `vftintrne.l.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrne_l_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNE_L_D, vd, vj)); +} + +/* Emits the `vftint.wu.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftint_wu_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_WU_S, vd, vj)); +} + +/* Emits the `vftint.lu.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftint_lu_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_LU_D, vd, vj)); +} + +/* Emits the `vftintrz.wu.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrz_wu_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_WU_S, vd, vj)); +} + +/* Emits the `vftintrz.lu.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrz_lu_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_LU_D, vd, vj)); +} + +/* Emits the `vftintl.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintl_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTL_L_S, vd, vj)); +} + +/* Emits the `vftinth.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftinth_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTH_L_S, vd, vj)); +} + +/* Emits the `vftintrml.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrml_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRML_L_S, vd, vj)); +} + +/* Emits the `vftintrmh.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrmh_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRMH_L_S, vd, vj)); +} + +/* Emits the `vftintrpl.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrpl_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRPL_L_S, vd, vj)); +} + +/* Emits the `vftintrph.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrph_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRPH_L_S, vd, vj)); +} + +/* Emits the `vftintrzl.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrzl_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZL_L_S, vd, vj)); +} + +/* Emits the `vftintrzh.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrzh_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZH_L_S, vd, vj)); +} + +/* Emits the `vftintrnel.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrnel_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNEL_L_S, vd, vj)); +} + +/* Emits the `vftintrneh.l.s vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vftintrneh_l_s(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNEH_L_S, vd, vj)); +} + +/* Emits the `vexth.h.b vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vexth_h_b(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_H_B, vd, vj)); +} + +/* Emits the `vexth.w.h vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vexth_w_h(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_W_H, vd, vj)); +} + +/* Emits the `vexth.d.w vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vexth_d_w(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_D_W, vd, vj)); +} + +/* Emits the `vexth.q.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vexth_q_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_Q_D, vd, vj)); +} + +/* Emits the `vexth.hu.bu vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vexth_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_HU_BU, vd, vj)); +} + +/* Emits the `vexth.wu.hu vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vexth_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_WU_HU, vd, vj)); +} + +/* Emits the `vexth.du.wu vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vexth_du_wu(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_DU_WU, vd, vj)); +} + +/* Emits the `vexth.qu.du vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vexth_qu_du(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_QU_DU, vd, vj)); +} + +/* Emits the `vreplgr2vr.b vd, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplgr2vr_b(TCGContext *s, TCGReg vd, TCGReg j) +{ + tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_B, vd, j)); +} + +/* Emits the `vreplgr2vr.h vd, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplgr2vr_h(TCGContext *s, TCGReg vd, TCGReg j) +{ + tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_H, vd, j)); +} + +/* Emits the `vreplgr2vr.w vd, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplgr2vr_w(TCGContext *s, TCGReg vd, TCGReg j) +{ + tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_W, vd, j)); +} + +/* Emits the `vreplgr2vr.d vd, j` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplgr2vr_d(TCGContext *s, TCGReg vd, TCGReg j) +{ + tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_D, vd, j)); +} + +/* Emits the `vrotri.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vrotri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VROTRI_B, vd, vj, uk3)); +} + +/* Emits the `vrotri.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vrotri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VROTRI_H, vd, vj, uk4)); +} + +/* Emits the `vrotri.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vrotri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VROTRI_W, vd, vj, uk5)); +} + +/* Emits the `vrotri.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vrotri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VROTRI_D, vd, vj, uk6)); +} + +/* Emits the `vsrlri.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRLRI_B, vd, vj, uk3)); +} + +/* Emits the `vsrlri.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLRI_H, vd, vj, uk4)); +} + +/* Emits the `vsrlri.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLRI_W, vd, vj, uk5)); +} + +/* Emits the `vsrlri.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLRI_D, vd, vj, uk6)); +} + +/* Emits the `vsrari.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrari_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRARI_B, vd, vj, uk3)); +} + +/* Emits the `vsrari.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrari_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRARI_H, vd, vj, uk4)); +} + +/* Emits the `vsrari.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrari_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRARI_W, vd, vj, uk5)); +} + +/* Emits the `vsrari.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrari_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRARI_D, vd, vj, uk6)); +} + +/* Emits the `vinsgr2vr.b vd, j, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vinsgr2vr_b(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk4) +{ + tcg_out32(s, encode_vdjuk4_insn(OPC_VINSGR2VR_B, vd, j, uk4)); +} + +/* Emits the `vinsgr2vr.h vd, j, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vinsgr2vr_h(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk3) +{ + tcg_out32(s, encode_vdjuk3_insn(OPC_VINSGR2VR_H, vd, j, uk3)); +} + +/* Emits the `vinsgr2vr.w vd, j, uk2` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vinsgr2vr_w(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk2) +{ + tcg_out32(s, encode_vdjuk2_insn(OPC_VINSGR2VR_W, vd, j, uk2)); +} + +/* Emits the `vinsgr2vr.d vd, j, uk1` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vinsgr2vr_d(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk1) +{ + tcg_out32(s, encode_vdjuk1_insn(OPC_VINSGR2VR_D, vd, j, uk1)); +} + +/* Emits the `vpickve2gr.b d, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickve2gr_b(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_dvjuk4_insn(OPC_VPICKVE2GR_B, d, vj, uk4)); +} + +/* Emits the `vpickve2gr.h d, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickve2gr_h(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_dvjuk3_insn(OPC_VPICKVE2GR_H, d, vj, uk3)); +} + +/* Emits the `vpickve2gr.w d, vj, uk2` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickve2gr_w(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk2) +{ + tcg_out32(s, encode_dvjuk2_insn(OPC_VPICKVE2GR_W, d, vj, uk2)); +} + +/* Emits the `vpickve2gr.d d, vj, uk1` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickve2gr_d(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk1) +{ + tcg_out32(s, encode_dvjuk1_insn(OPC_VPICKVE2GR_D, d, vj, uk1)); +} + +/* Emits the `vpickve2gr.bu d, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickve2gr_bu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_dvjuk4_insn(OPC_VPICKVE2GR_BU, d, vj, uk4)); +} + +/* Emits the `vpickve2gr.hu d, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickve2gr_hu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_dvjuk3_insn(OPC_VPICKVE2GR_HU, d, vj, uk3)); +} + +/* Emits the `vpickve2gr.wu d, vj, uk2` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickve2gr_wu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk2) +{ + tcg_out32(s, encode_dvjuk2_insn(OPC_VPICKVE2GR_WU, d, vj, uk2)); +} + +/* Emits the `vpickve2gr.du d, vj, uk1` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpickve2gr_du(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk1) +{ + tcg_out32(s, encode_dvjuk1_insn(OPC_VPICKVE2GR_DU, d, vj, uk1)); +} + +/* Emits the `vreplvei.b vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplvei_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VREPLVEI_B, vd, vj, uk4)); +} + +/* Emits the `vreplvei.h vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplvei_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VREPLVEI_H, vd, vj, uk3)); +} + +/* Emits the `vreplvei.w vd, vj, uk2` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplvei_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk2) +{ + tcg_out32(s, encode_vdvjuk2_insn(OPC_VREPLVEI_W, vd, vj, uk2)); +} + +/* Emits the `vreplvei.d vd, vj, uk1` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vreplvei_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk1) +{ + tcg_out32(s, encode_vdvjuk1_insn(OPC_VREPLVEI_D, vd, vj, uk1)); +} + +/* Emits the `vsllwil.h.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsllwil_h_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLWIL_H_B, vd, vj, uk3)); +} + +/* Emits the `vsllwil.w.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsllwil_w_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLWIL_W_H, vd, vj, uk4)); +} + +/* Emits the `vsllwil.d.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsllwil_d_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLWIL_D_W, vd, vj, uk5)); +} + +/* Emits the `vextl.q.d vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vextl_q_d(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTL_Q_D, vd, vj)); +} + +/* Emits the `vsllwil.hu.bu vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsllwil_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLWIL_HU_BU, vd, vj, uk3)); +} + +/* Emits the `vsllwil.wu.hu vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsllwil_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLWIL_WU_HU, vd, vj, uk4)); +} + +/* Emits the `vsllwil.du.wu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsllwil_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLWIL_DU_WU, vd, vj, uk5)); +} + +/* Emits the `vextl.qu.du vd, vj` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vextl_qu_du(TCGContext *s, TCGReg vd, TCGReg vj) +{ + tcg_out32(s, encode_vdvj_insn(OPC_VEXTL_QU_DU, vd, vj)); +} + +/* Emits the `vbitclri.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitclri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITCLRI_B, vd, vj, uk3)); +} + +/* Emits the `vbitclri.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitclri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITCLRI_H, vd, vj, uk4)); +} + +/* Emits the `vbitclri.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitclri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITCLRI_W, vd, vj, uk5)); +} + +/* Emits the `vbitclri.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitclri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITCLRI_D, vd, vj, uk6)); +} + +/* Emits the `vbitseti.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitseti_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITSETI_B, vd, vj, uk3)); +} + +/* Emits the `vbitseti.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitseti_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITSETI_H, vd, vj, uk4)); +} + +/* Emits the `vbitseti.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitseti_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITSETI_W, vd, vj, uk5)); +} + +/* Emits the `vbitseti.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitseti_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITSETI_D, vd, vj, uk6)); +} + +/* Emits the `vbitrevi.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitrevi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITREVI_B, vd, vj, uk3)); +} + +/* Emits the `vbitrevi.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitrevi_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITREVI_H, vd, vj, uk4)); +} + +/* Emits the `vbitrevi.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitrevi_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITREVI_W, vd, vj, uk5)); +} + +/* Emits the `vbitrevi.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitrevi_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITREVI_D, vd, vj, uk6)); +} + +/* Emits the `vsat.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsat_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSAT_B, vd, vj, uk3)); +} + +/* Emits the `vsat.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsat_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSAT_H, vd, vj, uk4)); +} + +/* Emits the `vsat.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsat_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSAT_W, vd, vj, uk5)); +} + +/* Emits the `vsat.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsat_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSAT_D, vd, vj, uk6)); +} + +/* Emits the `vsat.bu vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsat_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSAT_BU, vd, vj, uk3)); +} + +/* Emits the `vsat.hu vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsat_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSAT_HU, vd, vj, uk4)); +} + +/* Emits the `vsat.wu vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsat_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSAT_WU, vd, vj, uk5)); +} + +/* Emits the `vsat.du vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsat_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSAT_DU, vd, vj, uk6)); +} + +/* Emits the `vslli.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLI_B, vd, vj, uk3)); +} + +/* Emits the `vslli.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslli_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLI_H, vd, vj, uk4)); +} + +/* Emits the `vslli.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslli_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLI_W, vd, vj, uk5)); +} + +/* Emits the `vslli.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vslli_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSLLI_D, vd, vj, uk6)); +} + +/* Emits the `vsrli.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRLI_B, vd, vj, uk3)); +} + +/* Emits the `vsrli.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrli_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLI_H, vd, vj, uk4)); +} + +/* Emits the `vsrli.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrli_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLI_W, vd, vj, uk5)); +} + +/* Emits the `vsrli.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrli_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLI_D, vd, vj, uk6)); +} + +/* Emits the `vsrai.b vd, vj, uk3` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrai_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3) +{ + tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRAI_B, vd, vj, uk3)); +} + +/* Emits the `vsrai.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrai_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRAI_H, vd, vj, uk4)); +} + +/* Emits the `vsrai.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrai_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRAI_W, vd, vj, uk5)); +} + +/* Emits the `vsrai.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrai_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRAI_D, vd, vj, uk6)); +} + +/* Emits the `vsrlni.b.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLNI_B_H, vd, vj, uk4)); +} + +/* Emits the `vsrlni.h.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLNI_H_W, vd, vj, uk5)); +} + +/* Emits the `vsrlni.w.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLNI_W_D, vd, vj, uk6)); +} + +/* Emits the `vsrlni.d.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRLNI_D_Q, vd, vj, uk7)); +} + +/* Emits the `vsrlrni.b.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlrni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLRNI_B_H, vd, vj, uk4)); +} + +/* Emits the `vsrlrni.h.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlrni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLRNI_H_W, vd, vj, uk5)); +} + +/* Emits the `vsrlrni.w.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlrni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLRNI_W_D, vd, vj, uk6)); +} + +/* Emits the `vsrlrni.d.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrlrni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRLRNI_D_Q, vd, vj, uk7)); +} + +/* Emits the `vssrlni.b.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLNI_B_H, vd, vj, uk4)); +} + +/* Emits the `vssrlni.h.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLNI_H_W, vd, vj, uk5)); +} + +/* Emits the `vssrlni.w.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLNI_W_D, vd, vj, uk6)); +} + +/* Emits the `vssrlni.d.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLNI_D_Q, vd, vj, uk7)); +} + +/* Emits the `vssrlni.bu.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLNI_BU_H, vd, vj, uk4)); +} + +/* Emits the `vssrlni.hu.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLNI_HU_W, vd, vj, uk5)); +} + +/* Emits the `vssrlni.wu.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLNI_WU_D, vd, vj, uk6)); +} + +/* Emits the `vssrlni.du.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLNI_DU_Q, vd, vj, uk7)); +} + +/* Emits the `vssrlrni.b.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLRNI_B_H, vd, vj, uk4)); +} + +/* Emits the `vssrlrni.h.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLRNI_H_W, vd, vj, uk5)); +} + +/* Emits the `vssrlrni.w.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLRNI_W_D, vd, vj, uk6)); +} + +/* Emits the `vssrlrni.d.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLRNI_D_Q, vd, vj, uk7)); +} + +/* Emits the `vssrlrni.bu.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLRNI_BU_H, vd, vj, uk4)); +} + +/* Emits the `vssrlrni.hu.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLRNI_HU_W, vd, vj, uk5)); +} + +/* Emits the `vssrlrni.wu.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLRNI_WU_D, vd, vj, uk6)); +} + +/* Emits the `vssrlrni.du.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrlrni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLRNI_DU_Q, vd, vj, uk7)); +} + +/* Emits the `vsrani.b.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrani_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRANI_B_H, vd, vj, uk4)); +} + +/* Emits the `vsrani.h.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrani_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRANI_H_W, vd, vj, uk5)); +} + +/* Emits the `vsrani.w.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrani_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRANI_W_D, vd, vj, uk6)); +} + +/* Emits the `vsrani.d.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrani_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRANI_D_Q, vd, vj, uk7)); +} + +/* Emits the `vsrarni.b.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrarni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRARNI_B_H, vd, vj, uk4)); +} + +/* Emits the `vsrarni.h.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrarni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRARNI_H_W, vd, vj, uk5)); +} + +/* Emits the `vsrarni.w.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrarni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRARNI_W_D, vd, vj, uk6)); +} + +/* Emits the `vsrarni.d.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vsrarni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRARNI_D_Q, vd, vj, uk7)); +} + +/* Emits the `vssrani.b.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrani_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRANI_B_H, vd, vj, uk4)); +} + +/* Emits the `vssrani.h.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrani_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRANI_H_W, vd, vj, uk5)); +} + +/* Emits the `vssrani.w.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrani_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRANI_W_D, vd, vj, uk6)); +} + +/* Emits the `vssrani.d.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrani_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRANI_D_Q, vd, vj, uk7)); +} + +/* Emits the `vssrani.bu.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrani_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRANI_BU_H, vd, vj, uk4)); +} + +/* Emits the `vssrani.hu.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrani_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRANI_HU_W, vd, vj, uk5)); +} + +/* Emits the `vssrani.wu.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrani_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRANI_WU_D, vd, vj, uk6)); +} + +/* Emits the `vssrani.du.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrani_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRANI_DU_Q, vd, vj, uk7)); +} + +/* Emits the `vssrarni.b.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRARNI_B_H, vd, vj, uk4)); +} + +/* Emits the `vssrarni.h.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRARNI_H_W, vd, vj, uk5)); +} + +/* Emits the `vssrarni.w.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRARNI_W_D, vd, vj, uk6)); +} + +/* Emits the `vssrarni.d.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRARNI_D_Q, vd, vj, uk7)); +} + +/* Emits the `vssrarni.bu.h vd, vj, uk4` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4) +{ + tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRARNI_BU_H, vd, vj, uk4)); +} + +/* Emits the `vssrarni.hu.w vd, vj, uk5` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5) +{ + tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRARNI_HU_W, vd, vj, uk5)); +} + +/* Emits the `vssrarni.wu.d vd, vj, uk6` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6) +{ + tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRARNI_WU_D, vd, vj, uk6)); +} + +/* Emits the `vssrarni.du.q vd, vj, uk7` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vssrarni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7) +{ + tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRARNI_DU_Q, vd, vj, uk7)); +} + +/* Emits the `vextrins.d vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vextrins_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_D, vd, vj, uk8)); +} + +/* Emits the `vextrins.w vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vextrins_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_W, vd, vj, uk8)); +} + +/* Emits the `vextrins.h vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vextrins_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_H, vd, vj, uk8)); +} + +/* Emits the `vextrins.b vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vextrins_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_B, vd, vj, uk8)); +} + +/* Emits the `vshuf4i.b vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vshuf4i_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_B, vd, vj, uk8)); +} + +/* Emits the `vshuf4i.h vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vshuf4i_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_H, vd, vj, uk8)); +} + +/* Emits the `vshuf4i.w vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vshuf4i_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_W, vd, vj, uk8)); +} + +/* Emits the `vshuf4i.d vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vshuf4i_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_D, vd, vj, uk8)); +} + +/* Emits the `vbitseli.b vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vbitseli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VBITSELI_B, vd, vj, uk8)); +} + +/* Emits the `vandi.b vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vandi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VANDI_B, vd, vj, uk8)); +} + +/* Emits the `vori.b vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VORI_B, vd, vj, uk8)); +} + +/* Emits the `vxori.b vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vxori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VXORI_B, vd, vj, uk8)); +} + +/* Emits the `vnori.b vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vnori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VNORI_B, vd, vj, uk8)); +} + +/* Emits the `vldi vd, sj13` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vldi(TCGContext *s, TCGReg vd, int32_t sj13) +{ + tcg_out32(s, encode_vdsj13_insn(OPC_VLDI, vd, sj13)); +} + +/* Emits the `vpermi.w vd, vj, uk8` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_vpermi_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8) +{ + tcg_out32(s, encode_vdvjuk8_insn(OPC_VPERMI_W, vd, vj, uk8)); +} + +/* End of generated code. */ diff --git a/qemu/tcg/loongarch64/tcg-target.h b/qemu/tcg/loongarch64/tcg-target.h new file mode 100644 index 0000000000..60990426e6 --- /dev/null +++ b/qemu/tcg/loongarch64/tcg-target.h @@ -0,0 +1,228 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2021 WANG Xuerui + * + * Based on tcg/riscv/tcg-target.h + * + * Copyright (c) 2018 SiFive, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef LOONGARCH_TCG_TARGET_H +#define LOONGARCH_TCG_TARGET_H + +#define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_NB_REGS 64 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 + +/* + * Loongson removed the (incomplete) 32-bit support from kernel and toolchain + * for the initial upstreaming of this architecture, so don't bother and just + * support the LP64* ABI for now. + */ +#if defined(__loongarch64) +# define TCG_TARGET_REG_BITS 64 +#else +# error unsupported LoongArch register size +#endif + +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) + +typedef enum { + TCG_REG_ZERO, + TCG_REG_RA, + TCG_REG_TP, + TCG_REG_SP, + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, + TCG_REG_A6, + TCG_REG_A7, + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + TCG_REG_T7, + TCG_REG_T8, + TCG_REG_RESERVED, + TCG_REG_S9, + TCG_REG_S0, + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + + TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, + TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, + TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, + TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, + TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, + TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, + TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, + TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, + + /* aliases */ + TCG_AREG0 = TCG_REG_S0, + TCG_REG_TMP0 = TCG_REG_T8, + TCG_REG_TMP1 = TCG_REG_T7, + TCG_REG_TMP2 = TCG_REG_T6, + TCG_VEC_TMP0 = TCG_REG_V23, +} TCGReg; + +extern bool use_lsx_instructions; + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_SP +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL + +/* optional instructions */ +#define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 0 +#define TCG_TARGET_HAS_div_i32 1 +#define TCG_TARGET_HAS_rem_i32 1 +#define TCG_TARGET_HAS_div2_i32 0 +#define TCG_TARGET_HAS_rot_i32 1 +#define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_extract_i32 1 +#define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract2_i32 0 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 1 +#define TCG_TARGET_HAS_mulsh_i32 1 +#define TCG_TARGET_HAS_ext8s_i32 1 +#define TCG_TARGET_HAS_ext16s_i32 1 +#define TCG_TARGET_HAS_ext8u_i32 1 +#define TCG_TARGET_HAS_ext16u_i32 1 +#define TCG_TARGET_HAS_bswap16_i32 1 +#define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_not_i32 1 +#define TCG_TARGET_HAS_neg_i32 0 +#define TCG_TARGET_HAS_andc_i32 1 +#define TCG_TARGET_HAS_orc_i32 1 +#define TCG_TARGET_HAS_eqv_i32 0 +#define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_nor_i32 1 +#define TCG_TARGET_HAS_clz_i32 1 +#define TCG_TARGET_HAS_ctz_i32 1 +#define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_brcond2 0 +#define TCG_TARGET_HAS_setcond2 0 +#define TCG_TARGET_HAS_qemu_st8_i32 0 +#define TCG_TARGET_HAS_goto_ptr 1 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 + +/* 64-bit operations */ +#define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 0 +#define TCG_TARGET_HAS_div_i64 1 +#define TCG_TARGET_HAS_rem_i64 1 +#define TCG_TARGET_HAS_div2_i64 0 +#define TCG_TARGET_HAS_rot_i64 1 +#define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_extract_i64 1 +#define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract2_i64 0 +#define TCG_TARGET_HAS_extr_i64_i32 1 +#define TCG_TARGET_HAS_ext8s_i64 1 +#define TCG_TARGET_HAS_ext16s_i64 1 +#define TCG_TARGET_HAS_ext32s_i64 1 +#define TCG_TARGET_HAS_ext8u_i64 1 +#define TCG_TARGET_HAS_ext16u_i64 1 +#define TCG_TARGET_HAS_ext32u_i64 1 +#define TCG_TARGET_HAS_bswap16_i64 1 +#define TCG_TARGET_HAS_bswap32_i64 1 +#define TCG_TARGET_HAS_bswap64_i64 1 +#define TCG_TARGET_HAS_not_i64 1 +#define TCG_TARGET_HAS_neg_i64 0 +#define TCG_TARGET_HAS_andc_i64 1 +#define TCG_TARGET_HAS_orc_i64 1 +#define TCG_TARGET_HAS_eqv_i64 0 +#define TCG_TARGET_HAS_nand_i64 0 +#define TCG_TARGET_HAS_nor_i64 1 +#define TCG_TARGET_HAS_clz_i64 1 +#define TCG_TARGET_HAS_ctz_i64 1 +#define TCG_TARGET_HAS_ctpop_i64 0 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 1 +#define TCG_TARGET_HAS_mulsh_i64 1 +#define TCG_TARGET_HAS_direct_jump 0 + +#define TCG_TARGET_HAS_qemu_ldst_i128 use_lsx_instructions + +#define TCG_TARGET_HAS_v64 0 +#define TCG_TARGET_HAS_v128 use_lsx_instructions +#define TCG_TARGET_HAS_v256 0 + +#define TCG_TARGET_HAS_not_vec 1 +#define TCG_TARGET_HAS_neg_vec 1 +#define TCG_TARGET_HAS_abs_vec 0 +#define TCG_TARGET_HAS_andc_vec 1 +#define TCG_TARGET_HAS_orc_vec 1 +#define TCG_TARGET_HAS_nand_vec 0 +#define TCG_TARGET_HAS_nor_vec 1 +#define TCG_TARGET_HAS_eqv_vec 0 +#define TCG_TARGET_HAS_mul_vec 1 +#define TCG_TARGET_HAS_shi_vec 1 +#define TCG_TARGET_HAS_shs_vec 0 +#define TCG_TARGET_HAS_shv_vec 1 +#define TCG_TARGET_HAS_roti_vec 1 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 1 +#define TCG_TARGET_HAS_sat_vec 1 +#define TCG_TARGET_HAS_minmax_vec 1 +#define TCG_TARGET_HAS_bitsel_vec 1 +#define TCG_TARGET_HAS_cmpsel_vec 0 + +#define TCG_TARGET_DEFAULT_MO (0) +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + __builtin___clear_cache((char *)start, (char *)stop); +} + +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); + +#define TCG_TARGET_NEED_LDST_LABELS + +#endif /* LOONGARCH_TCG_TARGET_H */ diff --git a/qemu/tcg/loongarch64/tcg-target.inc.c b/qemu/tcg/loongarch64/tcg-target.inc.c new file mode 100644 index 0000000000..aed5e007a6 --- /dev/null +++ b/qemu/tcg/loongarch64/tcg-target.inc.c @@ -0,0 +1,2681 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2021 WANG Xuerui + * + * Based on tcg/riscv/tcg-target.c.inc + * + * Copyright (c) 2018 SiFive, Inc + * Copyright (c) 2008-2009 Arnaud Patard + * Copyright (c) 2009 Aurelien Jarno + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "../tcg-ldst.inc.c" +#include + +bool use_lsx_instructions; + +#ifdef CONFIG_DEBUG_TCG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "zero", + "ra", + "tp", + "sp", + "a0", + "a1", + "a2", + "a3", + "a4", + "a5", + "a6", + "a7", + "t0", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "t8", + "r21", /* reserved in the LP64* ABI, hence no ABI name */ + "s9", + "s0", + "s1", + "s2", + "s3", + "s4", + "s5", + "s6", + "s7", + "s8", + "vr0", + "vr1", + "vr2", + "vr3", + "vr4", + "vr5", + "vr6", + "vr7", + "vr8", + "vr9", + "vr10", + "vr11", + "vr12", + "vr13", + "vr14", + "vr15", + "vr16", + "vr17", + "vr18", + "vr19", + "vr20", + "vr21", + "vr22", + "vr23", + "vr24", + "vr25", + "vr26", + "vr27", + "vr28", + "vr29", + "vr30", + "vr31", +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { + /* Registers preserved across calls */ + /* TCG_REG_S0 reserved for TCG_AREG0 */ + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_S9, + + /* Registers (potentially) clobbered across calls */ + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + TCG_REG_T7, + TCG_REG_T8, + + /* Argument registers, opposite order of allocation. */ + TCG_REG_A7, + TCG_REG_A6, + TCG_REG_A5, + TCG_REG_A4, + TCG_REG_A3, + TCG_REG_A2, + TCG_REG_A1, + TCG_REG_A0, + + /* Vector registers */ + TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, + TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, + TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, + TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, + TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, + TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, + /* V24 - V31 are caller-saved, and skipped. */ +}; + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, + TCG_REG_A6, + TCG_REG_A7, +}; + +static const TCGReg tcg_target_call_oarg_regs[2] = { + TCG_REG_A0, + TCG_REG_A1 +}; + +#ifndef CONFIG_SOFTMMU +#define USE_GUEST_BASE (guest_base != 0) +#define TCG_GUEST_BASE_REG TCG_REG_S1 +#endif + +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_S12 0x200 +#define TCG_CT_CONST_S32 0x400 +#define TCG_CT_CONST_U12 0x800 +#define TCG_CT_CONST_C12 0x1000 +#define TCG_CT_CONST_WSZ 0x2000 +#define TCG_CT_CONST_VCMP 0x4000 +#define TCG_CT_CONST_VADD 0x8000 + +#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) +#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) + +static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) +{ + return sextract64(val, pos, len); +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct; + ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return true; + } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return true; + } + if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { + return true; + } + if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return true; + } + if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { + return true; + } + if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) { + return true; + } + if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { + return true; + } +#if 0 + int64_t vec_val = sextract64(val, 0, 8 << vece); + if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) { + return true; + } + if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) { + return true; + } +#else + /* tcg does not pass vece to us */ + if ((ct & TCG_CT_CONST_VADD) || (ct & TCG_CT_CONST_VCMP)) { + return true; + } +#endif + + return false; +} + +/* parse target specific constraints */ +static const char *target_parse_constraint(TCGArgConstraint *ct, + const char *ct_str, TCGType type) +{ + switch(*ct_str++) { + case 'r': + ct->ct |= TCG_CT_REG; + ct->u.regs = ALL_GENERAL_REGS; + break; + case 'l': + ct->ct |= TCG_CT_REG; + ct->u.regs = ALL_GENERAL_REGS; +#ifdef CONFIG_SOFTMMU + tcg_regset_reset_reg(ct->u.regs, TCG_AREG0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_TMP0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_TMP1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_TMP2); +#endif + break; + case 'w': + ct->ct |= TCG_CT_REG; + ct->u.regs = ALL_VECTOR_REGS; + break; + case 'I': + ct->ct |= TCG_CT_CONST_S12; + break; + case 'J': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'U': + ct->ct |= TCG_CT_CONST_U12; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; + case 'C': + ct->ct |= TCG_CT_CONST_C12; + break; + case 'W': + ct->ct |= TCG_CT_CONST_WSZ; + break; + case 'M': + ct->ct |= TCG_CT_CONST_VCMP; + break; + case 'A': + ct->ct |= TCG_CT_CONST_VADD; + break; + default: + return NULL; + } + return ct_str; +} + +/* + * Relocations + */ + +/* + * Relocation records defined in LoongArch ELF psABI v1.00 is way too + * complicated; a whopping stack machine is needed to stuff the fields, at + * the very least one SOP_PUSH and one SOP_POP (of the correct format) are + * needed. + * + * Hence, define our own simpler relocation types. Numbers are chosen as to + * not collide with potential future additions to the true ELF relocation + * type enum. + */ + +/* Field Sk16, shifted right by 2; suitable for conditional jumps */ +#define R_LOONGARCH_BR_SK16 256 +/* Field Sd10k16, shifted right by 2; suitable for B and BL */ +#define R_LOONGARCH_BR_SD10K16 257 + +static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target) +{ + intptr_t offset = (intptr_t)target - (intptr_t)src_rw; + + tcg_debug_assert((offset & 3) == 0); + offset >>= 2; + if (offset == sextreg(offset, 0, 16)) { + *src_rw = deposit64(*src_rw, 10, 16, offset); + return true; + } + + return false; +} + +static bool reloc_br_sd10k16(tcg_insn_unit *src_rw, + const tcg_insn_unit *target) +{ + intptr_t offset = (intptr_t)target - (intptr_t)src_rw; + + tcg_debug_assert((offset & 3) == 0); + offset >>= 2; + if (offset == sextreg(offset, 0, 26)) { + *src_rw = deposit64(*src_rw, 0, 10, offset >> 16); /* slot d10 */ + *src_rw = deposit64(*src_rw, 10, 16, offset); /* slot k16 */ + return true; + } + + return false; +} + +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + tcg_debug_assert(addend == 0); + switch (type) { + case R_LOONGARCH_BR_SK16: + return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value); + case R_LOONGARCH_BR_SD10K16: + return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value); + default: + g_assert_not_reached(); + } +} + +#include "tcg-insn-defs.c.inc" + +/* + * TCG intrinsics + */ + +static void tcg_out_mb(TCGContext *s, TCGArg a0) +{ + /* Baseline LoongArch only has the full barrier, unfortunately. */ + tcg_out_opc_dbar(s, 0); +} + +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + if (ret == arg) { + return true; + } + switch (type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + /* + * Conventional register-register move used in LoongArch is + * `or dst, src, zero`. + */ + tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO); + break; + default: + g_assert_not_reached(); + } + return true; +} + +/* Loads a 32-bit immediate into rd, sign-extended. */ +static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) +{ + tcg_target_long lo = sextreg(val, 0, 12); + tcg_target_long hi12 = sextreg(val, 12, 20); + + /* Single-instruction cases. */ + if (hi12 == 0) { + /* val fits in uimm12: ori rd, zero, val */ + tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); + return; + } + if (hi12 == sextreg(lo, 12, 20)) { + /* val fits in simm12: addi.w rd, zero, val */ + tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); + return; + } + + /* High bits must be set; load with lu12i.w + optional ori. */ + tcg_out_opc_lu12i_w(s, rd, hi12); + if (lo != 0) { + tcg_out_opc_ori(s, rd, rd, lo & 0xfff); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + tcg_target_long val) +{ + /* + * LoongArch conventionally loads 64-bit immediates in at most 4 steps, + * with dedicated instructions for filling the respective bitfields + * below: + * + * 6 5 4 3 + * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 + * +-----------------------+---------------------------------------+... + * | hi52 | hi32 | + * +-----------------------+---------------------------------------+... + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * ...+-------------------------------------+-------------------------+ + * | hi12 | lo | + * ...+-------------------------------------+-------------------------+ + * + * Check if val belong to one of the several fast cases, before falling + * back to the slow path. + */ + + intptr_t pc_offset; + tcg_target_long val_lo, val_hi, pc_hi, offset_hi; + tcg_target_long hi12, hi32, hi52; + + /* Value fits in signed i32. */ + if (type == TCG_TYPE_I32 || val == (int32_t)val) { + tcg_out_movi_i32(s, rd, val); + return; + } + + /* PC-relative cases. */ + pc_offset = tcg_pcrel_diff(s, (void *)val); + if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) { + /* Single pcaddu2i. */ + tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2); + return; + } + + if (pc_offset == (int32_t)pc_offset) { + /* Offset within 32 bits; load with pcalau12i + ori. */ + val_lo = sextreg(val, 0, 12); + val_hi = val >> 12; + pc_hi = (val - pc_offset) >> 12; + offset_hi = val_hi - pc_hi; + + tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20)); + tcg_out_opc_pcalau12i(s, rd, offset_hi); + if (val_lo != 0) { + tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff); + } + return; + } + + hi12 = sextreg(val, 12, 20); + hi32 = sextreg(val, 32, 20); + hi52 = sextreg(val, 52, 12); + + /* Single cu52i.d case. */ + if ((hi52 != 0) && (ctz64(val) >= 52)) { + tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); + return; + } + + /* Slow path. Initialize the low 32 bits, then concat high bits. */ + tcg_out_movi_i32(s, rd, val); + + /* Load hi32 and hi52 explicitly when they are unexpected values. */ + if (hi32 != sextreg(hi12, 20, 20)) { + tcg_out_opc_cu32i_d(s, rd, hi32); + } + + if (hi52 != sextreg(hi32, 20, 12)) { + tcg_out_opc_cu52i_d(s, rd, rd, hi52); + } +} + +static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, tcg_target_long imm) +{ + tcg_target_long lo12 = sextreg(imm, 0, 12); + tcg_target_long hi16 = sextreg(imm - lo12, 16, 16); + + /* + * Note that there's a hole in between hi16 and lo12: + * + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * ...+-------------------------------+-------+-----------------------+ + * | hi16 | | lo12 | + * ...+-------------------------------+-------+-----------------------+ + * + * For bits within that hole, it's more efficient to use LU12I and ADD. + */ + if (imm == (hi16 << 16) + lo12) { + if (hi16) { + tcg_out_opc_addu16i_d(s, rd, rs, hi16); + rs = rd; + } + if (type == TCG_TYPE_I32) { + tcg_out_opc_addi_w(s, rd, rs, lo12); + } else if (lo12) { + tcg_out_opc_addi_d(s, rd, rs, lo12); + } else { + tcg_out_mov(s, type, rd, rs); + } + } else { + tcg_out_movi(s, type, TCG_REG_TMP0, imm); + if (type == TCG_TYPE_I32) { + tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0); + } else { + tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0); + } + } +} + +static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) +{ + return false; +} + +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + +static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_andi(s, ret, arg, 0xff); +} + +static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15); +} + +static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31); +} + +static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_sext_b(s, ret, arg); +} + +static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_sext_h(s, ret, arg); +} + +static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_addi_w(s, ret, arg, 0); +} + +static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (ret != arg) { + tcg_out_ext32s(s, ret, arg); + } +} + +static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_ext32u(s, ret, arg); +} + +static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_ext32s(s, ret, arg); +} + +static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, + TCGReg a0, TCGReg a1, TCGReg a2, + bool c2, bool is_32bit) +{ + if (c2) { + /* + * Fast path: semantics already satisfied due to constraint and + * insn behavior, single instruction is enough. + */ + tcg_debug_assert(a2 == (is_32bit ? 32 : 64)); + /* all clz/ctz insns belong to DJ-format */ + tcg_out32(s, encode_dj_insn(opc, a0, a1)); + return; + } + + tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1)); + /* a0 = a1 ? REG_TMP0 : a2 */ + tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); + tcg_out_opc_masknez(s, a0, a2, a1); + tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); +} + +#define SETCOND_INV TCG_TARGET_NB_REGS +#define SETCOND_NEZ (SETCOND_INV << 1) +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) + +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) +{ + int flags = 0; + + switch (cond) { + case TCG_COND_EQ: /* -> NE */ + case TCG_COND_GE: /* -> LT */ + case TCG_COND_GEU: /* -> LTU */ + case TCG_COND_GT: /* -> LE */ + case TCG_COND_GTU: /* -> LEU */ + cond = tcg_invert_cond(cond); + flags ^= SETCOND_INV; + break; + default: + break; + } + + switch (cond) { + case TCG_COND_LE: + case TCG_COND_LEU: + /* + * If we have a constant input, the most efficient way to implement + * LE is by adding 1 and using LT. Watch out for wrap around for LEU. + * We don't need to care for this for LE because the constant input + * is still constrained to int32_t, and INT32_MAX+1 is representable + * in the 64-bit temporary register. + */ + if (c2) { + if (cond == TCG_COND_LEU) { + /* unsigned <= -1 is true */ + if (arg2 == -1) { + tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); + return ret; + } + cond = TCG_COND_LTU; + } else { + cond = TCG_COND_LT; + } + arg2 += 1; + } else { + TCGReg tmp = arg2; + arg2 = arg1; + arg1 = tmp; + cond = tcg_swap_cond(cond); /* LE -> GE */ + cond = tcg_invert_cond(cond); /* GE -> LT */ + flags ^= SETCOND_INV; + } + break; + default: + break; + } + + switch (cond) { + case TCG_COND_NE: + flags |= SETCOND_NEZ; + if (!c2) { + tcg_out_opc_xor(s, ret, arg1, arg2); + } else if (arg2 == 0) { + ret = arg1; + } else if (arg2 >= 0 && arg2 <= 0xfff) { + tcg_out_opc_xori(s, ret, arg1, arg2); + } else { + tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2); + } + break; + + case TCG_COND_LT: + case TCG_COND_LTU: + if (c2) { + if (arg2 >= -0x800 && arg2 <= 0x7ff) { + if (cond == TCG_COND_LT) { + tcg_out_opc_slti(s, ret, arg1, arg2); + } else { + tcg_out_opc_sltui(s, ret, arg1, arg2); + } + break; + } + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); + arg2 = TCG_REG_TMP0; + } + if (cond == TCG_COND_LT) { + tcg_out_opc_slt(s, ret, arg1, arg2); + } else { + tcg_out_opc_sltu(s, ret, arg1, arg2); + } + break; + + default: + g_assert_not_reached(); + break; + } + + return ret | flags; +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) +{ + int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); + + if (tmpflags != ret) { + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; + + switch (tmpflags & SETCOND_FLAGS) { + case SETCOND_INV: + /* Intermediate result is boolean: simply invert. */ + tcg_out_opc_xori(s, ret, tmp, 1); + break; + case SETCOND_NEZ: + /* Intermediate result is zero/non-zero: test != 0. */ + tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); + break; + case SETCOND_NEZ | SETCOND_INV: + /* Intermediate result is zero/non-zero: test == 0. */ + tcg_out_opc_sltui(s, ret, tmp, 1); + break; + default: + g_assert_not_reached(); + } + } +} + +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, tcg_target_long c2, bool const2, + TCGReg v1, TCGReg v2) +{ + int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2); + TCGReg t; + + /* Standardize the test below to t != 0. */ + if (tmpflags & SETCOND_INV) { + t = v1, v1 = v2, v2 = t; + } + + t = tmpflags & ~SETCOND_FLAGS; + if (v1 == TCG_REG_ZERO) { + tcg_out_opc_masknez(s, ret, v2, t); + } else if (v2 == TCG_REG_ZERO) { + tcg_out_opc_maskeqz(s, ret, v1, t); + } else { + tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */ + tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */ + tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2); + } +} + +/* + * Branch helpers + */ + +static const struct { + LoongArchInsn op; + bool swap; +} tcg_brcond_to_loongarch[] = { + [TCG_COND_EQ] = { OPC_BEQ, false }, + [TCG_COND_NE] = { OPC_BNE, false }, + [TCG_COND_LT] = { OPC_BGT, true }, + [TCG_COND_GE] = { OPC_BLE, true }, + [TCG_COND_LE] = { OPC_BLE, false }, + [TCG_COND_GT] = { OPC_BGT, false }, + [TCG_COND_LTU] = { OPC_BGTU, true }, + [TCG_COND_GEU] = { OPC_BLEU, true }, + [TCG_COND_LEU] = { OPC_BLEU, false }, + [TCG_COND_GTU] = { OPC_BGTU, false } +}; + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, TCGLabel *l) +{ + LoongArchInsn op = tcg_brcond_to_loongarch[cond].op; + + tcg_debug_assert(op != 0); + + if (tcg_brcond_to_loongarch[cond].swap) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + + /* all conditional branch insns belong to DJSk16-format */ + tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0); + tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0)); +} + +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) +{ + TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; + ptrdiff_t offset = tcg_pcrel_diff(s, (void *)arg); + + tcg_debug_assert((offset & 3) == 0); + if (offset == sextreg(offset, 0, 28)) { + /* short jump: +/- 256MiB */ + if (tail) { + tcg_out_opc_b(s, offset >> 2); + } else { + tcg_out_opc_bl(s, offset >> 2); + } + } else if (offset == sextreg(offset, 0, 38)) { + /* long jump: +/- 256GiB */ + tcg_target_long lo = sextreg(offset, 0, 18); + tcg_target_long hi = offset - lo; + tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18); + tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); + } else { + /* far jump: 64-bit */ + tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18); + tcg_target_long hi = (tcg_target_long)arg - lo; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi); + tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2); + } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +{ + tcg_out_call_int(s, target, false); +} + +/* + * Load/store helpers + */ + +static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, + TCGReg addr, intptr_t offset) +{ + intptr_t imm12 = sextreg(offset, 0, 12); + + if (offset != imm12) { + intptr_t diff = tcg_pcrel_diff(s, (void *)offset); + + if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { + imm12 = sextreg(diff, 0, 12); + tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); + if (addr != TCG_REG_ZERO) { + tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr); + } + } + addr = TCG_REG_TMP2; + } + + switch (opc) { + case OPC_LD_B: + case OPC_LD_BU: + case OPC_LD_H: + case OPC_LD_HU: + case OPC_LD_W: + case OPC_LD_WU: + case OPC_LD_D: + case OPC_ST_B: + case OPC_ST_H: + case OPC_ST_W: + case OPC_ST_D: + tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12)); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + bool is_32bit = type == TCG_TYPE_I32; + tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2); +} + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + bool is_32bit = type == TCG_TYPE_I32; + tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2); +} + +static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + if (val == 0) { + tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); + return true; + } + return false; +} + +/* + * Load/store helpers for SoftMMU, and qemu_ld/st implementations + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +#if TCG_TARGET_REG_BITS == 64 + [MO_LESL] = helper_le_ldsl_mmu, + [MO_BESL] = helper_be_ldsl_mmu, +#endif +}; + +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * I is where we want to put this argument, and is updated and returned + * for the next call. ARG is the argument itself. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use). + */ + +static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) +{ + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); + } + return i + 1; +} + +static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_REG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_andi(s, tmp, arg, 0xff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_REG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_andi(s, tmp, arg, 0xffff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) +{ + TCGReg tmp = TCG_REG_TMP0; + if (arg == 0) { + tmp = TCG_REG_ZERO; + } else { + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); + } + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) +{ + tcg_out_opc_b(s, 0); + return reloc_br_sd10k16(s->code_ptr - 1, target); +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + MemOp opc = get_memop(oi); + MemOp size = opc & MO_SIZE; + TCGType type = l->type; + + /* resolve label address */ + if (!reloc_br_sk16(l->label_ptr[0], (s->code_ptr))) { + return false; + } + + /* call load helper */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr); + + tcg_out_call(s, qemu_ld_helpers[size]); + + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_out_ext8s(s, type, l->datalo_reg, TCG_REG_A0); + break; + case MO_SW: + tcg_out_ext16s(s, type, l->datalo_reg, TCG_REG_A0); + break; + case MO_SL: + tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0); + break; + case MO_UL: + if (type == TCG_TYPE_I32) { + /* MO_UL loads of i32 should be sign-extended too */ + tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0); + break; + } + /* fallthrough */ + default: + tcg_out_mov(s, type, l->datalo_reg, TCG_REG_A0); + break; + } + + return tcg_out_goto(s, l->raddr); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + MemOp opc = get_memop(oi); + MemOp size = opc & MO_SIZE; + + /* resolve label address */ + if (!reloc_br_sk16(l->label_ptr[0], (s->code_ptr))) { + return false; + } + + /* call store helper */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); + switch (size) { + case MO_8: + tcg_out_ext8u(s, TCG_REG_A2, l->datalo_reg); + break; + case MO_16: + tcg_out_ext16u(s, TCG_REG_A2, l->datalo_reg); + break; + case MO_32: + tcg_out_ext32u(s, TCG_REG_A2, l->datalo_reg); + break; + case MO_64: + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A2, l->datalo_reg); + break; + default: + g_assert_not_reached(); + break; + } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr); + + tcg_out_call(s, qemu_st_helpers[size]); + + return tcg_out_goto(s, l->raddr); +} + +typedef struct { + MemOp atom; /* lg2 bits of atomicity required */ + MemOp align; /* lg2 bits of alignment to use */ +} TCGAtomAlign; + +typedef struct { + TCGReg base; + TCGReg index; +} HostAddress; + +// bool tcg_target_has_memory_bswap(MemOp memop) +// { +// return false; +// } + +/* We expect to use a 12-bit negative offset from ENV. */ +#define MIN_TLB_MASK_TABLE_OFS -(1 << 11) + +#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER) +static int tlb_mask_table_ofs(TCGContext *s, int which) +{ + return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - + sizeof(CPUNegativeOffsetState)); +} +#endif + +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addr_reg, TCGMemOpIdx oi, + bool is_ld, TCGType addr_type) +{ +#ifdef TARGET_ARM + struct uc_struct *uc = s->uc; +#endif + + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + MemOp a_bits = get_alignment_bits(opc); + +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + int mem_index = get_mmuidx(oi); + int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); + int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); + int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); + + tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); + tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); + + /* Load the tlb comparator and the addend. */ + // QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); + tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, + is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, + offsetof(CPUTLBEntry, addend)); + + /* + * For aligned accesses, we check the first byte and include the alignment + * bits within the address. For unaligned access, we check that we don't + * cross pages using the address of the last byte of the access. + */ + if (a_bits < s_bits) { + unsigned a_mask = (1u << a_bits) - 1; + unsigned s_mask = (1u << s_bits) - 1; + tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask); + } else { + tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg); + } + tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO, + a_bits, TARGET_PAGE_BITS - 1); + + /* Compare masked address with the TLB entry. */ + ldst->label_ptr[0] = s->code_ptr; + // tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); + tcg_out_opc_beq(s, 0, 0, 0); + + h->index = TCG_REG_TMP2; +#else + if (a_bits) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* + * Without micro-architecture details, we don't know which of + * bstrpick or andi is faster, so use bstrpick as it's not + * constrained by imm field width. Not to say alignments >= 2^12 + * are going to happen any time soon. + */ + tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); + + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); + } + + h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; +#endif + + if (addr_type == TCG_TYPE_I32) { + h->base = TCG_REG_TMP0; + tcg_out_ext32u(s, h->base, addr_reg); + } else { + h->base = addr_reg; + } + + return ldst; +} + +// static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, +// TCGReg addr_reg, TCGMemOpIdx oi, +// bool is_ld, TCGType addr_type) +// { +// TCGLabelQemuLdst *ldst = NULL; +// MemOp opc = get_memop(oi); +// unsigned a_bits = get_alignment_bits(opc); + +// #ifdef CONFIG_SOFTMMU +// unsigned s_bits = opc & MO_SIZE; +// int mem_index = get_mmuidx(oi); +// int table_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); +// int mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1); + +// ldst = new_ldst_label(s); +// ldst->is_ld = is_ld; +// ldst->oi = oi; +// ldst->addrlo_reg = addr_reg; + +// tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP0, mask); + +// tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); + +// tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg, +// TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); +// tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); +// tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); + +// /* Load the tlb comparator and the addend. */ +// tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, +// is_ld ? offsetof(CPUTLBEntry, addr_read) +// : offsetof(CPUTLBEntry, addr_write)); +// tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, +// offsetof(CPUTLBEntry, addend)); + +// /* +// * For aligned accesses, we check the first byte and include the alignment +// * bits within the address. For unaligned access, we check that we don't +// * cross pages using the address of the last byte of the access. +// */ +// if (a_bits < s_bits) { +// unsigned a_mask = (1u << a_bits) - 1; +// unsigned s_mask = (1u << s_bits) - 1; +// tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask); +// } else { +// tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg); +// } +// tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO, +// a_bits, TARGET_PAGE_BITS - 1); + +// /* Compare masked address with the TLB entry. */ +// ldst->label_ptr[0] = s->code_ptr; +// tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); + +// h->index = TCG_REG_TMP2; +// #else +// if (a_bits) { +// ldst = new_ldst_label(s); + +// ldst->is_ld = is_ld; +// ldst->oi = oi; +// ldst->addrlo_reg = addr_reg; + +// /* +// * Without micro-architecture details, we don't know which of +// * bstrpick or andi is faster, so use bstrpick as it's not +// * constrained by imm field width. Not to say alignments >= 2^12 +// * are going to happen any time soon. +// */ +// tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); + +// ldst->label_ptr[0] = s->code_ptr; +// tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); +// } + +// h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; +// #endif + +// if (addr_type == TCG_TYPE_I32) { +// h->base = TCG_REG_TMP0; +// tcg_out_ext32u(s, h->base, addr_reg); +// } else { +// h->base = addr_reg; +// } + +// return ldst; +// } + +static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, + TCGReg rd, HostAddress h) +{ + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); + + switch (opc & MO_SSIZE) { + case MO_UB: + tcg_out_opc_ldx_bu(s, rd, h.base, h.index); + break; + case MO_SB: + tcg_out_opc_ldx_b(s, rd, h.base, h.index); + break; + case MO_UW: + tcg_out_opc_ldx_hu(s, rd, h.base, h.index); + break; + case MO_SW: + tcg_out_opc_ldx_h(s, rd, h.base, h.index); + break; + case MO_UL: + if (type == TCG_TYPE_I64) { + tcg_out_opc_ldx_wu(s, rd, h.base, h.index); + break; + } + /* fallthrough */ + case MO_SL: + tcg_out_opc_ldx_w(s, rd, h.base, h.index); + break; + case MO_Q: + tcg_out_opc_ldx_d(s, rd, h.base, h.index); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi, TCGType data_type) +{ + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr_reg, oi, true, data_type); + tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h); + + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = s->code_ptr; + } +} + +static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, + TCGReg rd, HostAddress h) +{ + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); + + switch (opc & MO_SIZE) { + case MO_8: + tcg_out_opc_stx_b(s, rd, h.base, h.index); + break; + case MO_16: + tcg_out_opc_stx_h(s, rd, h.base, h.index); + break; + case MO_32: + tcg_out_opc_stx_w(s, rd, h.base, h.index); + break; + case MO_64: + tcg_out_opc_stx_d(s, rd, h.base, h.index); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi, TCGType data_type) +{ + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr_reg, oi, false, data_type); + tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h); + + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = s->code_ptr; + } +} + +/* + * Entry-points + */ + +// static tcg_insn_unit *tcg_code_gen_epilogue; +// static tcg_insn_unit *tb_ret_addr; + +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_call_int(s, s->code_gen_epilogue, true); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); + tcg_out_call_int(s, s->tb_ret_addr, true); + } +} + +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, + uintptr_t addr) +{ + uintptr_t d_addr = addr; + ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_addr) >> 2; + tcg_insn_unit insn; + + /* Either directly branch, or load slot address for indirect branch. */ + if (d_disp == sextreg(d_disp, 0, 26)) { + insn = encode_sd10k16_insn(OPC_B, d_disp); + } else { + uintptr_t i_addr = addr; + intptr_t i_disp = i_addr - jmp_addr; + insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2); + } + + *(tcg_insn_unit *)jmp_addr = insn; + // flush_idcache_range(jmp_rx, jmp_rw, 4); + flush_icache_range(jmp_addr, jmp_addr + 8); +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) +{ + TCGArg a0 = args[0]; + TCGArg a1 = args[1]; + TCGArg a2 = args[2]; + TCGArg a3 = args[3]; + int c2 = const_args[2]; + + switch (opc) { + case INDEX_op_mb: + tcg_out_mb(s, a0); + break; + + case INDEX_op_goto_ptr: + tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); + break; + + case INDEX_op_br: + tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0), + 0); + tcg_out_opc_b(s, 0); + break; + + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); + break; + + case INDEX_op_extrh_i64_i32: + tcg_out_opc_srai_d(s, a0, a1, 32); + break; + + case INDEX_op_not_i32: + case INDEX_op_not_i64: + tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO); + break; + + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: + if (c2) { + tcg_out_opc_ori(s, a0, a1, a2); + tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO); + } else { + tcg_out_opc_nor(s, a0, a1, a2); + } + break; + + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: + if (c2) { + /* guaranteed to fit due to constraint */ + tcg_out_opc_andi(s, a0, a1, ~a2); + } else { + tcg_out_opc_andn(s, a0, a1, a2); + } + break; + + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: + if (c2) { + /* guaranteed to fit due to constraint */ + tcg_out_opc_ori(s, a0, a1, ~a2); + } else { + tcg_out_opc_orn(s, a0, a1, a2); + } + break; + + case INDEX_op_and_i32: + case INDEX_op_and_i64: + if (c2) { + tcg_out_opc_andi(s, a0, a1, a2); + } else { + tcg_out_opc_and(s, a0, a1, a2); + } + break; + + case INDEX_op_or_i32: + case INDEX_op_or_i64: + if (c2) { + tcg_out_opc_ori(s, a0, a1, a2); + } else { + tcg_out_opc_or(s, a0, a1, a2); + } + break; + + case INDEX_op_xor_i32: + case INDEX_op_xor_i64: + if (c2) { + tcg_out_opc_xori(s, a0, a1, a2); + } else { + tcg_out_opc_xor(s, a0, a1, a2); + } + break; + + case INDEX_op_extract_i32: + tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1); + break; + case INDEX_op_extract_i64: + tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1); + break; + + case INDEX_op_deposit_i32: + tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); + break; + case INDEX_op_deposit_i64: + tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); + break; + + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + tcg_out_opc_revb_2h(s, a0, a1); + break; + + case INDEX_op_bswap32_i32: + /* All 32-bit values are computed sign-extended in the register. */ + /* fallthrough */ + case INDEX_op_bswap32_i64: + tcg_out_opc_revb_2w(s, a0, a1); + break; + + case INDEX_op_bswap64_i64: + tcg_out_opc_revb_d(s, a0, a1); + break; + + case INDEX_op_clz_i32: + tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true); + break; + case INDEX_op_clz_i64: + tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false); + break; + + case INDEX_op_ctz_i32: + tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); + break; + case INDEX_op_ctz_i64: + tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); + break; + + case INDEX_op_shl_i32: + if (c2) { + tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_sll_w(s, a0, a1, a2); + } + break; + case INDEX_op_shl_i64: + if (c2) { + tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); + } else { + tcg_out_opc_sll_d(s, a0, a1, a2); + } + break; + + case INDEX_op_shr_i32: + if (c2) { + tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_srl_w(s, a0, a1, a2); + } + break; + case INDEX_op_shr_i64: + if (c2) { + tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); + } else { + tcg_out_opc_srl_d(s, a0, a1, a2); + } + break; + + case INDEX_op_sar_i32: + if (c2) { + tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_sra_w(s, a0, a1, a2); + } + break; + case INDEX_op_sar_i64: + if (c2) { + tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); + } else { + tcg_out_opc_sra_d(s, a0, a1, a2); + } + break; + + case INDEX_op_rotl_i32: + /* transform into equivalent rotr/rotri */ + if (c2) { + tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f); + } else { + tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); + tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0); + } + break; + case INDEX_op_rotl_i64: + /* transform into equivalent rotr/rotri */ + if (c2) { + tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f); + } else { + tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); + tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0); + } + break; + + case INDEX_op_rotr_i32: + if (c2) { + tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_rotr_w(s, a0, a1, a2); + } + break; + case INDEX_op_rotr_i64: + if (c2) { + tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); + } else { + tcg_out_opc_rotr_d(s, a0, a1, a2); + } + break; + + case INDEX_op_add_i32: + if (c2) { + tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2); + } else { + tcg_out_opc_add_w(s, a0, a1, a2); + } + break; + case INDEX_op_add_i64: + if (c2) { + tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2); + } else { + tcg_out_opc_add_d(s, a0, a1, a2); + } + break; + + case INDEX_op_sub_i32: + if (c2) { + tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2); + } else { + tcg_out_opc_sub_w(s, a0, a1, a2); + } + break; + case INDEX_op_sub_i64: + if (c2) { + tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2); + } else { + tcg_out_opc_sub_d(s, a0, a1, a2); + } + break; + + case INDEX_op_mul_i32: + tcg_out_opc_mul_w(s, a0, a1, a2); + break; + case INDEX_op_mul_i64: + tcg_out_opc_mul_d(s, a0, a1, a2); + break; + + case INDEX_op_mulsh_i32: + tcg_out_opc_mulh_w(s, a0, a1, a2); + break; + case INDEX_op_mulsh_i64: + tcg_out_opc_mulh_d(s, a0, a1, a2); + break; + + case INDEX_op_muluh_i32: + tcg_out_opc_mulh_wu(s, a0, a1, a2); + break; + case INDEX_op_muluh_i64: + tcg_out_opc_mulh_du(s, a0, a1, a2); + break; + + case INDEX_op_div_i32: + tcg_out_opc_div_w(s, a0, a1, a2); + break; + case INDEX_op_div_i64: + tcg_out_opc_div_d(s, a0, a1, a2); + break; + + case INDEX_op_divu_i32: + tcg_out_opc_div_wu(s, a0, a1, a2); + break; + case INDEX_op_divu_i64: + tcg_out_opc_div_du(s, a0, a1, a2); + break; + + case INDEX_op_rem_i32: + tcg_out_opc_mod_w(s, a0, a1, a2); + break; + case INDEX_op_rem_i64: + tcg_out_opc_mod_d(s, a0, a1, a2); + break; + + case INDEX_op_remu_i32: + tcg_out_opc_mod_wu(s, a0, a1, a2); + break; + case INDEX_op_remu_i64: + tcg_out_opc_mod_du(s, a0, a1, a2); + break; + + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: + tcg_out_setcond(s, args[3], a0, a1, a2, c2); + break; + + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]); + break; + + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); + break; + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2); + break; + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + tcg_out_ldst(s, OPC_LD_H, a0, a1, a2); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32s_i64: + tcg_out_ldst(s, OPC_LD_W, a0, a1, a2); + break; + case INDEX_op_ld32u_i64: + tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2); + break; + case INDEX_op_ld_i64: + tcg_out_ldst(s, OPC_LD_D, a0, a1, a2); + break; + + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_ldst(s, OPC_ST_B, a0, a1, a2); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_ldst(s, OPC_ST_H, a0, a1, a2); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_ldst(s, OPC_ST_W, a0, a1, a2); + break; + case INDEX_op_st_i64: + tcg_out_ldst(s, OPC_ST_D, a0, a1, a2); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_insn_offset) { + /* TODO */ + g_assert_not_reached(); + } else { + /* indirect jump method */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, + (uintptr_t)(s->tb_jmp_target_addr + a0)); + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); + } + s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); + break; + case INDEX_op_exit_tb: + tcg_out_exit_tb(s, a0); + break; + + case INDEX_op_ext8s_i32: + tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1); + break; + case INDEX_op_ext8s_i64: + tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1); + break; + case INDEX_op_ext8u_i32: + case INDEX_op_ext8u_i64: + tcg_out_ext8u(s, a0, a1); + break; + case INDEX_op_ext16s_i32: + tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1); + break; + case INDEX_op_ext16s_i64: + tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1); + break; + case INDEX_op_ext16u_i32: + case INDEX_op_ext16u_i64: + tcg_out_ext16u(s, a0, a1); + break; + case INDEX_op_ext32s_i64: + tcg_out_ext32s(s, a0, a1); + break; + case INDEX_op_ext32u_i64: + tcg_out_ext32u(s, a0, a1); + break; + case INDEX_op_ext_i32_i64: + tcg_out_exts_i32_i64(s, a0, a1); + break; + case INDEX_op_extu_i32_i64: + tcg_out_extu_i32_i64(s, a0, a1); + break; + case INDEX_op_extrl_i64_i32: + tcg_out_extrl_i64_i32(s, a0, a1); + break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + // case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ + // case INDEX_op_ext8s_i64: + // case INDEX_op_ext8u_i32: + // case INDEX_op_ext8u_i64: + // case INDEX_op_ext16s_i32: + // case INDEX_op_ext16s_i64: + // case INDEX_op_ext16u_i32: + // case INDEX_op_ext16u_i64: + // case INDEX_op_ext32s_i64: + // case INDEX_op_ext32u_i64: + // case INDEX_op_ext_i32_i64: + // case INDEX_op_extu_i32_i64: + // case INDEX_op_extrl_i64_i32: + default: + g_assert_not_reached(); + } +} + +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg rd, TCGReg rs) +{ + switch (vece) { + case MO_8: + tcg_out_opc_vreplgr2vr_b(s, rd, rs); + break; + case MO_16: + tcg_out_opc_vreplgr2vr_h(s, rd, rs); + break; + case MO_32: + tcg_out_opc_vreplgr2vr_w(s, rd, rs); + break; + case MO_64: + tcg_out_opc_vreplgr2vr_d(s, rd, rs); + break; + default: + g_assert_not_reached(); + } + return true; +} + +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg r, TCGReg base, intptr_t offset) +{ + /* Handle imm overflow and division (vldrepl.d imm is divided by 8) */ + if (offset < -0x800 || offset > 0x7ff || \ + (offset & ((1 << vece) - 1)) != 0) { + tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset); + base = TCG_REG_TMP0; + offset = 0; + } + offset >>= vece; + + switch (vece) { + case MO_8: + tcg_out_opc_vldrepl_b(s, r, base, offset); + break; + case MO_16: + tcg_out_opc_vldrepl_h(s, r, base, offset); + break; + case MO_32: + tcg_out_opc_vldrepl_w(s, r, base, offset); + break; + case MO_64: + tcg_out_opc_vldrepl_d(s, r, base, offset); + break; + default: + g_assert_not_reached(); + } + return true; +} + +// static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, +// TCGReg rd, int64_t v64) +// { +// /* Try vldi if imm can fit */ +// int64_t value = sextract64(v64, 0, 8 << vece); +// if (-0x200 <= value && value <= 0x1FF) { +// uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF); +// tcg_out_opc_vldi(s, rd, imm); +// return; +// } + +// /* TODO: vldi patterns when imm 12 is set */ + +// /* Fallback to vreplgr2vr */ +// tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value); +// switch (vece) { +// case MO_8: +// tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0); +// break; +// case MO_16: +// tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0); +// break; +// case MO_32: +// tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0); +// break; +// case MO_64: +// tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0); +// break; +// default: +// g_assert_not_reached(); +// } +// } + +static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0, + const TCGArg a1, const TCGArg a2, + bool a2_is_const, bool is_add) +{ + static const LoongArchInsn add_vec_insn[4] = { + OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D + }; + static const LoongArchInsn add_vec_imm_insn[4] = { + OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU + }; + static const LoongArchInsn sub_vec_insn[4] = { + OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D + }; + static const LoongArchInsn sub_vec_imm_insn[4] = { + OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU + }; + + if (a2_is_const) { + int64_t value = sextract64(a2, 0, 8 << vece); + if (!is_add) { + value = -value; + } + + /* Try vaddi/vsubi */ + if (0 <= value && value <= 0x1f) { + tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \ + a1, value)); + return; + } else if (-0x1f <= value && value < 0) { + tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \ + a1, -value)); + return; + } + + /* constraint TCG_CT_CONST_VADD ensures unreachable */ + g_assert_not_reached(); + } + + if (is_add) { + tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2)); + } else { + tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2)); + } +} + +static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, + unsigned vecl, unsigned vece, + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) +{ + TCGType type = vecl + TCG_TYPE_V64; + TCGArg a0, a1, a2, a3; + TCGReg temp = TCG_REG_TMP0; + TCGReg temp_vec = TCG_VEC_TMP0; + + static const LoongArchInsn cmp_vec_insn[16][4] = { + [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D}, + [TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D}, + [TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU}, + [TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D}, + [TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU}, + }; + static const LoongArchInsn cmp_vec_imm_insn[16][4] = { + [TCG_COND_EQ] = {OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D}, + [TCG_COND_LE] = {OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D}, + [TCG_COND_LEU] = {OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU}, + [TCG_COND_LT] = {OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D}, + [TCG_COND_LTU] = {OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU}, + }; + LoongArchInsn insn; + static const LoongArchInsn neg_vec_insn[4] = { + OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D + }; + static const LoongArchInsn mul_vec_insn[4] = { + OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D + }; + static const LoongArchInsn smin_vec_insn[4] = { + OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D + }; + static const LoongArchInsn umin_vec_insn[4] = { + OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU + }; + static const LoongArchInsn smax_vec_insn[4] = { + OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D + }; + static const LoongArchInsn umax_vec_insn[4] = { + OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU + }; + static const LoongArchInsn ssadd_vec_insn[4] = { + OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D + }; + static const LoongArchInsn usadd_vec_insn[4] = { + OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU + }; + static const LoongArchInsn sssub_vec_insn[4] = { + OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D + }; + static const LoongArchInsn ussub_vec_insn[4] = { + OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU + }; + static const LoongArchInsn shlv_vec_insn[4] = { + OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D + }; + static const LoongArchInsn shrv_vec_insn[4] = { + OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D + }; + static const LoongArchInsn sarv_vec_insn[4] = { + OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D + }; + static const LoongArchInsn shli_vec_insn[4] = { + OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D + }; + static const LoongArchInsn shri_vec_insn[4] = { + OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D + }; + static const LoongArchInsn sari_vec_insn[4] = { + OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D + }; + static const LoongArchInsn rotrv_vec_insn[4] = { + OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D + }; + + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + a3 = args[3]; + + /* Currently only supports V128 */ + tcg_debug_assert(type == TCG_TYPE_V128); + + switch (opc) { + case INDEX_op_st_vec: + /* Try to fit vst imm */ + if (-0x800 <= a2 && a2 <= 0x7ff) { + tcg_out_opc_vst(s, a0, a1, a2); + } else { + tcg_out_movi(s, TCG_TYPE_I64, temp, a2); + tcg_out_opc_vstx(s, a0, a1, temp); + } + break; + case INDEX_op_ld_vec: + /* Try to fit vld imm */ + if (-0x800 <= a2 && a2 <= 0x7ff) { + tcg_out_opc_vld(s, a0, a1, a2); + } else { + tcg_out_movi(s, TCG_TYPE_I64, temp, a2); + tcg_out_opc_vldx(s, a0, a1, temp); + } + break; + case INDEX_op_and_vec: + tcg_out_opc_vand_v(s, a0, a1, a2); + break; + case INDEX_op_andc_vec: + /* + * vandn vd, vj, vk: vd = vk & ~vj + * andc_vec vd, vj, vk: vd = vj & ~vk + * vk and vk are swapped + */ + tcg_out_opc_vandn_v(s, a0, a2, a1); + break; + case INDEX_op_or_vec: + tcg_out_opc_vor_v(s, a0, a1, a2); + break; + case INDEX_op_orc_vec: + tcg_out_opc_vorn_v(s, a0, a1, a2); + break; + case INDEX_op_xor_vec: + tcg_out_opc_vxor_v(s, a0, a1, a2); + break; + case INDEX_op_not_vec: + tcg_out_opc_vnor_v(s, a0, a1, a1); + break; + case INDEX_op_cmp_vec: + { + TCGCond cond = args[3]; + if (const_args[2]) { + /* + * cmp_vec dest, src, value + * Try vseqi/vslei/vslti + */ + int64_t value = sextract64(a2, 0, 8 << vece); + if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \ + cond == TCG_COND_LT) && (-0x10 <= value && value <= 0x0f)) { + tcg_out32(s, encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \ + a0, a1, value)); + break; + } else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) && + (0x00 <= value && value <= 0x1f)) { + tcg_out32(s, encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \ + a0, a1, value)); + break; + } + + /* + * Fallback to: + * dupi_vec temp, a2 + * cmp_vec a0, a1, temp, cond + */ + // tcg_out_dupi_vec(s, type, vece, temp_vec, a2); + /* Try vldi if imm can fit */ + if (-0x200 <= value && value <= 0x1FF) { + uint32_t imm = (vece << 10) | ((uint32_t)a2 & 0x3FF); + tcg_out_opc_vldi(s, temp_vec, imm); + goto enddupi; + } + + /* TODO: vldi patterns when imm 12 is set */ + + /* Fallback to vreplgr2vr */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value); + switch (vece) { + case MO_8: + tcg_out_opc_vreplgr2vr_b(s, temp_vec, TCG_REG_TMP0); + break; + case MO_16: + tcg_out_opc_vreplgr2vr_h(s, temp_vec, TCG_REG_TMP0); + break; + case MO_32: + tcg_out_opc_vreplgr2vr_w(s, temp_vec, TCG_REG_TMP0); + break; + case MO_64: + tcg_out_opc_vreplgr2vr_d(s, temp_vec, TCG_REG_TMP0); + break; + default: + g_assert_not_reached(); + } + enddupi: + a2 = temp_vec; + } + + insn = cmp_vec_insn[cond][vece]; + if (insn == 0) { + TCGArg t; + t = a1, a1 = a2, a2 = t; + cond = tcg_swap_cond(cond); + insn = cmp_vec_insn[cond][vece]; + tcg_debug_assert(insn != 0); + } + tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2)); + } + break; + case INDEX_op_add_vec: + tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true); + break; + case INDEX_op_sub_vec: + tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false); + break; + case INDEX_op_neg_vec: + tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1)); + break; + case INDEX_op_mul_vec: + tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_smin_vec: + tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_smax_vec: + tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_umin_vec: + tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_umax_vec: + tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_ssadd_vec: + tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_usadd_vec: + tcg_out32(s, encode_vdvjvk_insn(usadd_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_sssub_vec: + tcg_out32(s, encode_vdvjvk_insn(sssub_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_ussub_vec: + tcg_out32(s, encode_vdvjvk_insn(ussub_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_shlv_vec: + tcg_out32(s, encode_vdvjvk_insn(shlv_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_shrv_vec: + tcg_out32(s, encode_vdvjvk_insn(shrv_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_sarv_vec: + tcg_out32(s, encode_vdvjvk_insn(sarv_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_shli_vec: + tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_shri_vec: + tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_sari_vec: + tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2)); + break; + case INDEX_op_bitsel_vec: + /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */ + tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1); + break; + case INDEX_op_dupm_vec: + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); + break; + default: + g_assert_not_reached(); + } +} + +// int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigned vece) +// { +// switch (opc) { +// case INDEX_op_ld_vec: +// case INDEX_op_st_vec: +// case INDEX_op_dup_vec: +// case INDEX_op_cmp_vec: +// case INDEX_op_add_vec: +// case INDEX_op_sub_vec: +// case INDEX_op_and_vec: +// case INDEX_op_andc_vec: +// case INDEX_op_or_vec: +// case INDEX_op_orc_vec: +// case INDEX_op_xor_vec: +// case INDEX_op_not_vec: +// case INDEX_op_neg_vec: +// case INDEX_op_mul_vec: +// case INDEX_op_shlv_vec: +// case INDEX_op_shrv_vec: +// case INDEX_op_sarv_vec: +// return 1; +// default: +// return 0; +// } +// } +int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigned vece) +{ + switch (opc) { + case INDEX_op_ld_vec: + case INDEX_op_st_vec: + case INDEX_op_dup_vec: + case INDEX_op_dupm_vec: + case INDEX_op_cmp_vec: + case INDEX_op_add_vec: + case INDEX_op_sub_vec: + case INDEX_op_and_vec: + case INDEX_op_andc_vec: + case INDEX_op_or_vec: + case INDEX_op_orc_vec: + case INDEX_op_xor_vec: + case INDEX_op_not_vec: + case INDEX_op_neg_vec: + case INDEX_op_mul_vec: + case INDEX_op_smin_vec: + case INDEX_op_smax_vec: + case INDEX_op_umin_vec: + case INDEX_op_umax_vec: + case INDEX_op_ssadd_vec: + case INDEX_op_usadd_vec: + case INDEX_op_sssub_vec: + case INDEX_op_ussub_vec: + case INDEX_op_shlv_vec: + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + case INDEX_op_bitsel_vec: + return 1; + default: + return 0; + } +} + +void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigned vece, + TCGArg a0, ...) +{ + g_assert_not_reached(); +} + +static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) +{ + static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; + static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } }; + static const TCGTargetOpDef rZ_rZ = { .args_ct_str = { "rZ", "rZ" } }; + //static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } }; + //static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } }; + + static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } }; + static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } }; + + static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } }; + static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } }; + static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } }; + static const TCGTargetOpDef r_r_rC = { .args_ct_str = { "r", "r", "rC" } }; + static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; + static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } }; + static const TCGTargetOpDef r_r_rJ = { .args_ct_str = { "r", "r", "rJ" } }; + static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } }; + static const TCGTargetOpDef r_r_rW = { .args_ct_str = { "r", "r", "rW" } }; + static const TCGTargetOpDef r_r_rZ = { .args_ct_str = { "r", "r", "rZ" } }; + static const TCGTargetOpDef r_0_rZ = { .args_ct_str = { "r", "0", "rZ" } }; + static const TCGTargetOpDef r_rZ_ri = { .args_ct_str = { "r", "rZ", "ri" } }; + static const TCGTargetOpDef r_rZ_rJ = { .args_ct_str = { "r", "rZ", "rJ" } }; + static const TCGTargetOpDef r_rZ_rZ = { .args_ct_str = { "r", "rZ", "rZ" } }; + static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } }; + static const TCGTargetOpDef w_w_wM = { .args_ct_str = { "w", "w", "wM" } }; + static const TCGTargetOpDef w_w_wA = { .args_ct_str = { "w", "w", "wA" } }; + static const TCGTargetOpDef w_w_w_w = { .args_ct_str = { "w", "w", "w", "w" } }; + static const TCGTargetOpDef r_rZ_rJ_rZ_rZ = { .args_ct_str = { "r", "rZ", "rJ", "rZ", "rZ" } }; + + switch (op) { + case INDEX_op_goto_ptr: + return &r; + + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + case INDEX_op_st32_i64: + case INDEX_op_st_i32: + case INDEX_op_st_i64: + return &rZ_r; + + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld_i64: + return &r_l; + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + return &lZ_l; + + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + return &rZ_rZ; + + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + case INDEX_op_ext8u_i32: + case INDEX_op_ext8u_i64: + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + case INDEX_op_ext16u_i32: + case INDEX_op_ext16u_i64: + case INDEX_op_ext32s_i64: + case INDEX_op_ext32u_i64: + case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: + case INDEX_op_ext_i32_i64: + case INDEX_op_not_i32: + case INDEX_op_not_i64: + case INDEX_op_extract_i32: + case INDEX_op_extract_i64: + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: + case INDEX_op_bswap64_i64: + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_ld32u_i64: + case INDEX_op_ld_i32: + case INDEX_op_ld_i64: + return &r_r; + + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: + /* + * LoongArch insns for these ops don't have reg-imm forms, but we + * can express using andi/ori if ~constant satisfies + * TCG_CT_CONST_U12. + */ + return &r_r_rC; + + case INDEX_op_shl_i32: + case INDEX_op_shl_i64: + case INDEX_op_shr_i32: + case INDEX_op_shr_i64: + case INDEX_op_sar_i32: + case INDEX_op_sar_i64: + case INDEX_op_rotl_i32: + case INDEX_op_rotl_i64: + case INDEX_op_rotr_i32: + case INDEX_op_rotr_i64: + case INDEX_op_add_i32: + return &r_r_ri; + + case INDEX_op_add_i64: + return &r_r_rJ; + + case INDEX_op_and_i32: + case INDEX_op_and_i64: + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: + case INDEX_op_or_i32: + case INDEX_op_or_i64: + case INDEX_op_xor_i32: + case INDEX_op_xor_i64: + /* LoongArch reg-imm bitops have their imms ZERO-extended */ + return &r_r_rU; + + case INDEX_op_clz_i32: + case INDEX_op_clz_i64: + case INDEX_op_ctz_i32: + case INDEX_op_ctz_i64: + return &r_r_rW; + + case INDEX_op_deposit_i32: + case INDEX_op_deposit_i64: + /* Must deposit into the same register as input */ + return &r_0_rZ; + + case INDEX_op_sub_i32: + case INDEX_op_setcond_i32: + return &r_rZ_ri; + case INDEX_op_sub_i64: + case INDEX_op_setcond_i64: + return &r_rZ_rJ; + + case INDEX_op_mul_i32: + case INDEX_op_mul_i64: + case INDEX_op_mulsh_i32: + case INDEX_op_mulsh_i64: + case INDEX_op_muluh_i32: + case INDEX_op_muluh_i64: + case INDEX_op_div_i32: + case INDEX_op_div_i64: + case INDEX_op_divu_i32: + case INDEX_op_divu_i64: + case INDEX_op_rem_i32: + case INDEX_op_rem_i64: + case INDEX_op_remu_i32: + case INDEX_op_remu_i64: + return &r_rZ_rZ; + + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + return &r_rZ_rJ_rZ_rZ; + + case INDEX_op_ld_vec: + case INDEX_op_dup_vec: + case INDEX_op_dupm_vec: + case INDEX_op_st_vec: + return &w_r; + + case INDEX_op_cmp_vec: + return &w_w_wM; + + case INDEX_op_add_vec: + case INDEX_op_sub_vec: + return &w_w_wA; + + case INDEX_op_and_vec: + case INDEX_op_andc_vec: + case INDEX_op_or_vec: + case INDEX_op_orc_vec: + case INDEX_op_xor_vec: + case INDEX_op_mul_vec: + + case INDEX_op_smin_vec: + case INDEX_op_smax_vec: + case INDEX_op_umin_vec: + case INDEX_op_umax_vec: + case INDEX_op_ssadd_vec: + case INDEX_op_usadd_vec: + case INDEX_op_sssub_vec: + case INDEX_op_ussub_vec: + + case INDEX_op_shlv_vec: + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + return &w_w_w; + + case INDEX_op_not_vec: + case INDEX_op_neg_vec: + case INDEX_op_shli_vec: + case INDEX_op_shri_vec: + case INDEX_op_sari_vec: + return &w_w; + + case INDEX_op_bitsel_vec: + return &w_w_w_w; + + default: + g_assert_not_reached(); + } +} + +static const int tcg_target_callee_save_regs[] = { + TCG_REG_S0, /* used for the global env (TCG_AREG0) */ + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_S9, + TCG_REG_RA, /* should be last for ABI compliance */ +}; + +/* Stack frame parameters. */ +#define REG_SIZE (TCG_TARGET_REG_BITS / 8) +#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE) +#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) +#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) +#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE) + +/* We're expecting to be able to use an immediate for frame allocation. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff); + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i; + + tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE); + + /* TB prologue */ + tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_SP, SAVE_OFS + i * REG_SIZE); + } + +#if !defined(CONFIG_SOFTMMU) + if (USE_GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + /* Call generated code */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); + + /* Return path for goto_ptr. Set return value to 0 */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); + + /* TB epilogue */ + s->tb_ret_addr = s->code_ptr; + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_SP, SAVE_OFS + i * REG_SIZE); + } + + tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0); +} + +static void tcg_out_tb_start(TCGContext *s) +{ + /* nothing to do */ +} + +static void tcg_target_init(TCGContext *s) +{ +#if 0 + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + + /* Server and desktop class cpus have UAL; embedded cpus do not. */ + if (!(hwcap & HWCAP_LOONGARCH_UAL)) { + vreport(REPORT_TYPE_ERROR, "%s\n", "TCG: unaligned access support required; exiting"); + exit(EXIT_FAILURE); + } + + if (hwcap & HWCAP_LOONGARCH_LSX) { + use_lsx_instructions = 1; + } +#else + use_lsx_instructions = 1; +#endif + + s->tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; + s->tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; + + s->tcg_target_call_clobber_regs = ALL_GENERAL_REGS; + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S0); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S1); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S2); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S3); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S4); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S5); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S6); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S7); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S8); + tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S9); + + s->reserved_regs = 0; + tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED); + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_LOONGARCH + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */ + .h.cie.return_column = TCG_REG_RA, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x80 + 23, 11, /* DW_CFA_offset, s0, -88 */ + 0x80 + 24, 10, /* DW_CFA_offset, s1, -80 */ + 0x80 + 25, 9, /* DW_CFA_offset, s2, -72 */ + 0x80 + 26, 8, /* DW_CFA_offset, s3, -64 */ + 0x80 + 27, 7, /* DW_CFA_offset, s4, -56 */ + 0x80 + 28, 6, /* DW_CFA_offset, s5, -48 */ + 0x80 + 29, 5, /* DW_CFA_offset, s6, -40 */ + 0x80 + 30, 4, /* DW_CFA_offset, s7, -32 */ + 0x80 + 31, 3, /* DW_CFA_offset, s8, -24 */ + 0x80 + 22, 2, /* DW_CFA_offset, s9, -16 */ + 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */ + } +}; + +void tcg_register_jit(TCGContext *s, void *buf, size_t buf_size) +{ + tcg_register_jit_int(s, buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/qemu/tcg/loongarch64/tcg-target.opc.h b/qemu/tcg/loongarch64/tcg-target.opc.h new file mode 100644 index 0000000000..4816a6c3d4 --- /dev/null +++ b/qemu/tcg/loongarch64/tcg-target.opc.h @@ -0,0 +1,3 @@ +/* Target-specific opcodes for host vector expansion. These will be + emitted by tcg_expand_vec_op. For those familiar with GCC internals, + consider these to be UNSPEC with names. */