-
Notifications
You must be signed in to change notification settings - Fork 150
Remill running on the web via Emscripten #402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,3 +94,6 @@ obj-intel64/* | |
|
||
# Lifted binaries | ||
*.lifted | ||
|
||
# Web build | ||
web/build |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,29 @@ endif () | |
project(remill) | ||
cmake_minimum_required(VERSION 3.2) | ||
|
||
if(EMSCRIPTEN) | ||
set(CMAKE_BUILD_TYPE Release) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be moved at the top of the cmake/settings.cmake file? If this new feature does not support other build configurations (such as Debug), we could add a warning message and overwrite the setting. Example:
EDIT: Replaced REMILL_EMSCRIPTEN with the value provided by the toolchain There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I love this idea! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Has this been resolved? |
||
set(gflags_DIR "$ENV{REPOS}/gflags/build") | ||
set(glog_DIR "$ENV{REPOS}/glog/build") | ||
set(LLVM_DIR "$ENV{REPOS}/llvm-project/llvm/build/lib/cmake/llvm/") | ||
set(remill_DIR "$ENV{REPOS}/remill/build/") | ||
|
||
# We want to manually invoke main for Lift instead of Emscripten calling it for us | ||
# So we disable INVOKE_RUN, export callMain (and FS for files). | ||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/web/src") | ||
TrevorSundberg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
set(CMAKE_CXX_FLAGS "\ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to move these settings in cmake/settings.cmake and use GLOBAL_CXX_FLAGS instead of CMAKE_CXX_FLAGS? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you prefer this or Or let me know if I'm not understanding what is wanted. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The goal is trying to not edit the CMake CXX variables by hand, and append them to the variables used there (which will eventually use property-based target settings such as I think it is fine to append them directly with something similar to if(EMSCRIPTEN)
list(GLOBAL_CXX_FLAGS APPEND
flag1
flag2
)
endif() but you can also create a setting such as |
||
${CMAKE_CXX_FLAGS} \ | ||
$ENV{EM_CXX_FLAGS} \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this setting be moved away from the environment variable? Example, in cmake/settings.cmake
This will make the flag list show up as a configurable setting (i.e.: make edit_cache). EDIT: The goal is to avoid using environment variables to configure the project (related: #402 (comment) ) |
||
-D__i386__ \ | ||
pgoodman marked this conversation as resolved.
Show resolved
Hide resolved
|
||
-DADDRESS_SIZE_BITS=32 \ | ||
-s ALLOW_MEMORY_GROWTH=1 \ | ||
--no-heap-copy \ | ||
-s INVOKE_RUN=0 \ | ||
-s EXTRA_EXPORTED_RUNTIME_METHODS='[\"callMain\",\"FS\"]' \ | ||
-s ASSERTIONS=1 \ | ||
--embed-file ${CMAKE_CURRENT_SOURCE_DIR}/web/build/remill/Arch/X86/Runtime/x86.bc@/share/remill/10.0/semantics/x86.bc") | ||
endif() | ||
|
||
include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/settings.cmake") | ||
include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/utils.cmake") | ||
include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/BCCompiler.cmake") | ||
|
@@ -227,7 +250,7 @@ if("${LIBRARY_REPOSITORY_ROOT}" STREQUAL "" OR NOT EXISTS "${LIBRARY_REPOSITORY_ | |
message("InstallExternalTarget: Found llvm-link executable: ${llvmlink_location}") | ||
|
||
find_library("libllvm_location" "${dynamic_lib_prefix}LLVM-${REMILL_LLVM_VERSION}.${dynamic_lib_extension}") | ||
if("${libllvm_location}" STREQUAL "libllvm_location-NOTFOUND") | ||
if("${libllvm_location}" STREQUAL "libllvm_location-NOTFOUND" AND NOT EMSCRIPTEN) | ||
message(FATAL_ERROR "InstallExternalTarget: Failed to locate the libLLVM dynamic library") | ||
endif() | ||
|
||
|
@@ -313,7 +336,7 @@ add_subdirectory(remill/Arch/AArch64/Runtime) | |
add_subdirectory(tools) | ||
|
||
# tests | ||
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "AppleClang") | ||
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "AppleClang" AND NOT EMSCRIPTEN) | ||
add_custom_target(test_dependencies) | ||
|
||
if(NOT "${PLATFORM_NAME}" STREQUAL "windows") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# Building | ||
The only tool you need is docker to build the web version: | ||
all tools and repositories are installed/checked out within the container. | ||
For example, ccache is installed within the container and is already setup. | ||
|
||
```bash | ||
# Generate the cmake/ninja build files | ||
./web/generate.sh | ||
# Build them into js/wasm files | ||
./web/build.sh | ||
``` | ||
|
||
# Using Lift | ||
To see the web demo host an http server in the directory `web/build/tools/lift`. | ||
The `index.html` has an example of how to execute it. | ||
|
||
To run it under node: | ||
```bash | ||
./web/build/tools/lift/index.js --bytes=90 --ir_out=out.ll | ||
``` | ||
|
||
# Using the library | ||
You can link against `web/build/libremill.a` which contains wasm binaries. | ||
|
||
# Debugging CMake | ||
It is useful to specify `-DCMAKE_VERBOSE_MAKEFILE=ON` in any of the | ||
generator calls to cmake inside the `Dockerfile` to see the exact | ||
commands being passed to Emscriptens compiler and linker. | ||
|
||
# Issues | ||
|
||
### Warnings | ||
There are several warnings due to conversions between signed/unsigned, pointer sizes, etc. | ||
To enable all warnings again remove `-Wno-everything` in the `Dockerfile`. | ||
|
||
There are also warnings with CMake mostly related to the use of deprecated functions or that | ||
Emscripten does not support dynamic linking (dll/so) and therefore reverts to static linkage. | ||
To enable all these warnings remove all occurances of `-Wno-deprecated` and `-Wno-dev`. | ||
|
||
### Debug | ||
Right now all the libraries are built in Release because in debug some libraries | ||
such as LLVM end up specifying specific debug formats like dwarf and this breaks Emscripten. | ||
This most likely can be fixed with a patch/sed to the LLVM cmake files. | ||
|
||
### 64 Bit | ||
We're only doing 32 bit x86 right now because wasm64 support is still in the works. | ||
Moreover, remill does not support targeting x64 from x86 (see `CMAKE_SIZEOF_VOID_P`). | ||
We would also need to change the define `-D__i386__` and `-DADDRESS_SIZE_BITS=32` in the `CMakeLists.txt`. | ||
|
||
Because remill needs to load the semantic files, we embed `x86.bc` into the generated JavaScript. | ||
Alternatively, instead of using `--embed-file` we could use `--preload-file` which is more efficient, | ||
but does not work directly in NodeJS without polyfills: | ||
|
||
``` | ||
--embed-file ${CMAKE_CURRENT_SOURCE_DIR}/web/build/remill/Arch/X86/Runtime/x86.bc@/share/remill/11.0/semantics/x86.bc | ||
``` | ||
|
||
### Undefined symbols in LLVM | ||
Currently we use `-s ERROR_ON_UNDEFINED_SYMBOLS=0` to avoid the following errors, but patches/sed would be better. | ||
``` | ||
warning: undefined symbol: __deregister_frame | ||
warning: undefined symbol: __register_frame | ||
warning: undefined symbol: posix_spawn_file_actions_adddup2 | ||
warning: undefined symbol: posix_spawn_file_actions_addopen | ||
warning: undefined symbol: posix_spawn_file_actions_destroy | ||
warning: undefined symbol: posix_spawn_file_actions_init | ||
``` | ||
|
||
### Undefined symbols in remill | ||
The most notable undefined symbol in remill is `popen`. | ||
Emscripten does not have an implementation for `popen` as there is no process model. | ||
The other errors are most likely from linking LLVM. | ||
``` | ||
warning: undefined symbol: popen | ||
warning: undefined symbol: posix_spawn_file_actions_adddup2 | ||
warning: undefined symbol: posix_spawn_file_actions_addopen | ||
warning: undefined symbol: posix_spawn_file_actions_destroy | ||
warning: undefined symbol: posix_spawn_file_actions_init | ||
``` | ||
|
||
### Unrolling loops | ||
Compiling under Emscripten fails with `_Pragma("unroll")` and produces the warning: | ||
``` | ||
remill/Arch/X86/Semantics/SSE.cpp:937:9: warning: loop not unrolled: | ||
the optimizer was unable to perform the requested transformation; | ||
the transformation might be disabled or specified as part of an unsupported transformation ordering | ||
[-Wpass-failed=transform-warning] | ||
``` | ||
|
||
Another interesting note is that this pragma is most likely also embedded into the llvm | ||
bitcode files because a similar warning is reported at runtime in wasm when the bitcode files are loaded: | ||
``` | ||
remill-lift-10.0.js:6361 warning: <unknown>:0:0: loop not unrolled: | ||
the optimizer was unable to perform the requested transformation; | ||
the transformation might be disabled or specified as part of an unsupported transformation ordering | ||
``` | ||
|
||
### Calling main once in Lift | ||
Emscripten supports tearing down the state after main is called (global destructors, etc.) | ||
by using `-s EXIT_RUNTIME=1` however it does not support calling main a second time. | ||
A workaround is to expose a function that can be invoked more than once that is not `main` or | ||
`callMain`. This can be done easily with Embind and passing `--bind` in `CMakeLists.txt`. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/usr/bin/env bash | ||
set -e | ||
cd `dirname "$0"`/build | ||
../src/run.sh cmake --build . | ||
cp -f ../src/index.html ./tools/lift | ||
cp -f ../src/index.js ./tools/lift |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/usr/bin/env bash | ||
set -e | ||
cd `dirname "$0"` | ||
rm -rf build | ||
mkdir -p build | ||
cd build | ||
../src/run.sh bash -c 'cmake -Wno-dev -GNinja -DCMAKE_TOOLCHAIN_FILE="$EM_TOOLCHAIN" ../..' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
FROM ubuntu:19.10@sha256:bd5f4f235eb31768b2c5caf1988bbdc182d4fc3cb6ee4aca6c6d74613f256140 | ||
|
||
RUN apt-get update | ||
RUN apt-get install -y \ | ||
git | ||
|
||
RUN git config --global advice.detachedHead false | ||
|
||
# Checkout all the releases at specific tags to ensure updates don't break the build. | ||
ENV REPOS="/repos" | ||
WORKDIR "$REPOS" | ||
RUN git clone https://github.com/llvm/llvm-project.git | ||
RUN cd llvm-project && git checkout llvmorg-10.0.0-rc2 | ||
RUN git clone https://github.com/juj/emsdk.git | ||
RUN cd emsdk && git checkout 1458145cf4f3db0fb548343e6acab267eef8e4ef | ||
RUN git clone https://github.com/intelxed/xed.git | ||
RUN cd xed && git checkout 11.0.1 | ||
RUN git clone https://github.com/intelxed/mbuild.git | ||
RUN cd mbuild && git checkout 1e57534e2122a39382c68e134026b15a3370e5b1 | ||
RUN git clone https://github.com/gflags/gflags.git | ||
RUN cd gflags && git checkout v2.2.2 | ||
RUN git clone https://github.com/google/glog.git | ||
RUN cd glog && git checkout v0.4.0 | ||
|
||
RUN apt-get install -y \ | ||
build-essential \ | ||
ccache \ | ||
clang \ | ||
cmake \ | ||
curl \ | ||
default-jre \ | ||
emscripten \ | ||
libtinfo-dev \ | ||
llvm \ | ||
lsb-release \ | ||
ninja-build \ | ||
patchelf \ | ||
python2.7 \ | ||
wget \ | ||
zlib1g-dev | ||
|
||
# Download a specific version of Emscripten (fastcomp fails with linker errors, upstream is required). | ||
ENV EMSCRIPTEN_VERSION sdk-tag-1.39.7-64bit-upstream | ||
RUN cd emsdk && \ | ||
./emsdk update-tags && \ | ||
./emsdk install $EMSCRIPTEN_VERSION && \ | ||
./emsdk activate --embedded $EMSCRIPTEN_VERSION | ||
|
||
ENV EMSDK="$REPOS/emsdk" | ||
ENV EMSDK_NODE_BIN="$EMSDK/node/12.9.1_64bit/bin" | ||
ENV EMSCRIPTEN="$EMSDK/upstream/emscripten" | ||
ENV PATH="$EMSDK:$EMSCRIPTEN:$EMSDK_NODE_BIN:${PATH}" | ||
ENV EM_CONFIG="$EMSDK/.emscripten" | ||
ENV EM_PORTS="$EMSDK/.emscripten_ports" | ||
ENV EM_CACHE="$EMSDK/.emscripten_cache" | ||
ENV EMSDK_NODE="$EMSDK_NODE_BIN/node" | ||
ENV EM_TOOLCHAIN="$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" | ||
ENV EMCC_WASM_BACKEND=1 | ||
ENV EMCC_SKIP_SANITY_CHECK=1 | ||
|
||
ENV EM_CXX_FLAGS="-s ERROR_ON_UNDEFINED_SYMBOLS=0 -s USE_PTHREADS=0 -s WASM=1 -Wno-everything -O2 --closure 1 --llvm-lto 3" | ||
|
||
# ===== Build llvm-tblgen on Host ===== | ||
# Building all of LLVM requires its own tool 'llvm-tblgen', however we can't use the Ubuntu package "llvm" | ||
# because it is too old and doesn't have the latest wasm changes, so build a host version ourselves. | ||
# Moreover, when building LLVM under Emscripten it will try and build tblgen but won't be able to run it | ||
# since it's a .js file, not an exectuable. Therefore below we manually specify -DLLVM_TABLEGEN=... | ||
RUN mkdir -p "$REPOS/llvm-project/build" | ||
WORKDIR "$REPOS/llvm-project/build" | ||
RUN cmake \ | ||
-GNinja \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
../llvm | ||
RUN cmake --build . --target llvm-tblgen | ||
|
||
# ===== Build LLVM on Emscripten ===== | ||
WORKDIR "$REPOS" | ||
# LLVM attempts to check the compiler version but Emscripten's emcc outputs its version | ||
# in a different format that can't be parsed by LLVM (a PR could be submitted upstream). | ||
RUN echo "" > llvm-project/llvm/cmake/modules/CheckCompilerVersion.cmake | ||
|
||
RUN mkdir -p "$REPOS/llvm-project/llvm/build" | ||
WORKDIR "$REPOS/llvm-project/llvm/build" | ||
|
||
RUN cmake \ | ||
-Wno-deprecated \ | ||
-Wno-dev \ | ||
-GNinja \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DLLVM_DEFAULT_TARGET_TRIPLE=wasm32-unknown-unknown-wasm \ | ||
-DLLVM_ENABLE_THREADS=OFF \ | ||
-DLLVM_USE_SANITIZER=OFF \ | ||
-DLLVM_ENABLE_EXPENSIVE_CHECKS=OFF \ | ||
-DLLVM_ENABLE_BACKTRACES=OFF \ | ||
-DLLVM_ENABLE_DUMP=OFF \ | ||
-DLLVM_INCLUDE_TESTS=OFF \ | ||
-DLLVM_INCLUDE_TOOLS=ON \ | ||
-DLLVM_BUILD_TOOLS=ON \ | ||
-DLLVM_BUILD_LLVM_DYLIB=ON \ | ||
-DLLVM_LINK_LLVM_DYLIB=ON \ | ||
-DLLVM_ENABLE_TERMINFO=OFF \ | ||
-DLLVM_TARGETS_TO_BUILD= \ | ||
-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly \ | ||
-DLLVM_TABLEGEN=$REPOS/llvm-project/build/bin/llvm-tblgen \ | ||
-DCMAKE_TOOLCHAIN_FILE="$EM_TOOLCHAIN" \ | ||
-DCMAKE_CXX_FLAGS="$EM_CXX_FLAGS" \ | ||
-DCMAKE_STRIP=llvm-strip \ | ||
-DCMAKE_AR="$EMSCRIPTEN/emar" \ | ||
.. | ||
|
||
RUN cmake --build . --target LLVM | ||
|
||
# ===== Build XED on Emscripten ===== | ||
WORKDIR "$REPOS/xed" | ||
RUN ./mfile.py \ | ||
--cc=emcc \ | ||
--cxx=em++ \ | ||
--linker=wasm-ld \ | ||
--ar=emar \ | ||
--host-cpu=x86 \ | ||
--extra-cxxflags="$EM_CXX_FLAGS" \ | ||
--extra-ccflags="$EM_CXX_FLAGS" | ||
|
||
# ===== Build gflags on Emscripten ===== | ||
WORKDIR "$REPOS" | ||
RUN mkdir -p "$REPOS/gflags/build" | ||
WORKDIR "$REPOS/gflags/build" | ||
RUN cmake \ | ||
-GNinja \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DCMAKE_TOOLCHAIN_FILE="$EM_TOOLCHAIN" \ | ||
-DCMAKE_CXX_FLAGS="$EM_CXX_FLAGS" \ | ||
.. | ||
RUN cmake --build . | ||
|
||
# ===== Build glog on Emscripten ===== | ||
WORKDIR "$REPOS" | ||
RUN sed -i 's/\bHAVE_SYMBOLIZE 1\b/HAVE_SYMBOLIZE 0/g' glog/CMakeLists.txt | ||
RUN mkdir -p "$REPOS/glog/build" | ||
WORKDIR "$REPOS/glog/build" | ||
RUN cmake \ | ||
-GNinja \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DCMAKE_TOOLCHAIN_FILE="$EM_TOOLCHAIN" \ | ||
-DCMAKE_CXX_FLAGS="$EM_CXX_FLAGS" \ | ||
-DBUILD_TESTING=OFF \ | ||
-Dgflags_DIR="$REPOS/gflags/build" \ | ||
.. | ||
|
||
# Emscripten has a syscall.h, however it does not implement the exact signature for syscall | ||
# and glog attempts to call it if the header exists, so just pretend it doesn't exist. | ||
RUN sed -i 's/\bHAVE_SYSCALL_H\b/HAVE_SYSCALL_H_INVALID/g' config.h | ||
RUN sed -i 's/\bHAVE_SYS_SYSCALL_H\b/HAVE_SYS_SYSCALL_H_INVALID/g' config.h | ||
|
||
RUN cmake --build . | ||
|
||
# ===== Build remill on Emscripten ===== | ||
WORKDIR "$REPOS" | ||
|
||
# Bypass a remill CMake error about finding clang. | ||
# This compiler is not used under Emscripten since it has it's own installed version of clang. | ||
# The version must match the checked out version of llvm-project. | ||
RUN cp /usr/bin/clang /usr/bin/clang-10.0 | ||
|
||
# The emscripten cache needs to be usable by the outside user (currently owned by root). | ||
RUN chmod -R 666 "$EMSDK/.emscripten_cache.lock" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
set(XED_FOUND TRUE) | ||
set(XED_INCLUDE_DIRS | ||
"/repos/xed/include/public" | ||
"/repos/xed/obj/wkit/include/xed" | ||
) | ||
set(XED_LIBRARIES | ||
"/repos/xed/obj/libxed.a" | ||
"/repos/xed/obj/libxed-ild.a" | ||
) | ||
mark_as_advanced(FORCE XED_FOUND) | ||
mark_as_advanced(FORCE XED_INCLUDE_DIRS) | ||
mark_as_advanced(FORCE XED_LIBRARIES) |
Uh oh!
There was an error while loading. Please reload this page.