clean up rough edges, add doc, reduce image size (now 2.2/0.8 GB)

011d683a · Sebastien Michelland · 15c77124 · 011d683a · 011d683a · 011d683a
Verified Commit 011d683a authored 1 year ago by Sebastien Michelland
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,11 @@
 # Git-free QEMU sources
 /qemu.tar
+# Exported image
+/cc24-fetch-skips-hardening.tar
+/cc24-fetch-skips-hardening.tar.xz
+# Simulation and figure outputs
+/out
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,13 +11,11 @@ RUN apt -y update && apt -y upgrade && apt -y install \
      # QEMU dependencies
      meson libglib2.0-dev \
      # gem5 dependencies
-      build-essential git m4 scons zlib1g zlib1g-dev libprotobuf-dev \
+      build-essential git m4 scons zlib1g zlib1g-dev python3-dev pkg-config \
-      protobuf-compiler libprotoc-dev libgoogle-perftools-dev python3-dev \
-      libboost-all-dev pkg-config \
      libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pip black
 # gem5 dependencies
-RUN pip install mypy pre-commit
+RUN pip install mypy pre-commit pyelftools
 WORKDIR /root
@@ -45,10 +43,11 @@ RUN mkdir llvm-property-preserving/build && \
    cd ../.. && \
    rm -rf llvm-property-preserving
-# Download the RISC-V GNU toolchain
+# Download the RISC-V GNU toolchain and strip the binaries for space
-# TODO: We can save ~500 MB by stripping the prebuilt binaries.
 RUN wget "https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2023.01.31/riscv32-elf-ubuntu-22.04-nightly-2023.01.31-nightly.tar.gz" && \
    tar -xzf "riscv32-elf-ubuntu-22.04-nightly-2023.01.31-nightly.tar.gz" && \
+    (strip riscv/bin/*; true) && \
+    (strip riscv/libexec/gcc/riscv32-unknown-elf/12.2.0/*; true) && \
    mv riscv riscv-custom && \
    rm "riscv32-elf-ubuntu-22.04-nightly-2023.01.31-nightly.tar.gz"
@@ -64,32 +63,29 @@ RUN mkdir binutils-gdb/build && \
    rm -rf binutils-gdb
 # Build QEMU
-# TODO: Install and remove sources?
 RUN mkdir qemu/build && \
    cd qemu/build && \
    ../configure \
+      --prefix="$(realpath ../../prefix)" \
      --target-list=riscv32-linux-user \
      --with-git-submodules=ignore && \
-    ninja
+    ninja install && \
+    cd ../.. && \
+    rm -rf qemu
-# Build gem5
+# Build gem5 and copy the executable manually (it's self-contained)
 RUN cd gem5 && \
    pip install -r requirements.txt && \
-    scons build/RISCV/gem5.opt -j$(nproc)
+    scons build/RISCV/gem5.opt -j$(nproc) && \
+    mv build/RISCV/gem5.opt ~/prefix/bin/ && \
-# Very crude cleaning because there's nothing better at first glance
+    cd .. && \
-RUN cd gem5 && \
+    rm -rf gem5
-    scons --clean && \
-    find build/RISCV -name '*.o' -delete
 # Copy test files
 COPY mibench mibench/
 COPY riscv_cc_REF riscv_cc_FSH riscv_qemu_FSH \
     elf32lriscv_ref.x elf32lriscv_ccs.x fault.py Makefile .
-# TODO: Move up
-RUN pip install pyelftools
 # Squash the final image so we don't ship source and build files as diffs
 FROM scratch
 COPY --from=layered / /

--- a/Makefile
+++ b/Makefile
 help:
-	@echo "all_NAT:    Build native programs"
+	@echo "Main commands for the Fetch Skips Hardening project:"
-	@echo "run_NAT:    Run native programs"
+	@echo ""
 	@echo "all_REF:    Build reference RISC-V programs (no FSH)"
 	@echo "run_REF:    Run reference programs to get reference outputs"
-	@echo "all_FSH:    Build programs with FSH"
+	@echo "all_FSH:    Build protected RISC-V programs (with FSH)"
-	@echo "run_FSH:    Run FSH programs, giving hopefully the ref output"
+	@echo "run_FSH:    Run FSH programs and compare with reference output"
-	@echo "clean:      Clean up build products (not test results)"
+	@echo "clean:      Clean up build products and binaries"
-	@echo "distclean:  Clean up everything (including test results)"
+	@echo ""
+	@echo "campaigns:  Run fault injection campaigns (use -jN)"
-all_NAT run_NAT all_REF run_REF all_FSH run_FSH clean distclean: %:
+	@echo "distclean:  Clean up binaries and campaign results (!)"
+all_REF run_REF all_FSH run_FSH clean: %:
 	@ $(MAKE) -C mibench/automotive/basicmath  $*
 	@ $(MAKE) -C mibench/automotive/bitcount   $*
 	@ $(MAKE) -C mibench/automotive/qsort      $*
@@ -19,7 +21,10 @@ all_NAT run_NAT all_REF run_REF all_FSH run_FSH clean distclean: %:
 	@ $(MAKE) -C mibench/security/rijndael     $*
 	@ $(MAKE) -C mibench/security/sha          $*
-.PHONY: all_NAT run_NAT all_REF run_REF all_FSH run_FSH clean distclean
+distclean: clean
+	@ rm -rf $(OUT)/
+.PHONY: all_REF run_REF all_FSH run_FSH clean distclean
 OUT := out
@@ -37,17 +42,6 @@ PROGRAMS := \
  mibench/security/rijndael \
  mibench/security/sha
-PROG_CAMPAIGNS_FSH := \
-  $(foreach P,$(PROGRAMS), \
-    $(foreach C,$(CAMPAIGNS_FSH),$(OUT)/$(notdir $P)-campaign-$C.txt))
-PROG_CAMPAIGNS_REF := \
-  $(foreach P,$(PROGRAMS), \
-    $(foreach C,$(CAMPAIGNS_REF),$(OUT)/$(notdir $P)-campaign-$C.txt))
-# In this order, preferably
-PROG_CAMPAIGNS_ALL := $(PROG_CAMPAIGNS_FSH) $(PROG_CAMPAIGNS_REF)
 # Remember the full path of each program x in variable $(PATH_x)
 $(foreach P,$(PROGRAMS),$(eval PATH_$(notdir $P) := $P))
@@ -86,14 +80,10 @@ endef
 $(foreach C,$(CAMPAIGNS_FSH),$(eval $(call do_campaign,$C,)))
 $(foreach C,$(CAMPAIGNS_REF),$(eval $(call do_campaign,$C,--block-wall)))
-# Single-core rules
+# Multi-core-friendly rules: run campaigns sequentially. We don't want multiple
-campaigns-fsh: $(PROG_CAMPAIGNS_FSH)
+# campaigns on the same program simultaneously, due to (1) file access races,
-campaigns-ref: $(PROG_CAMPAIGNS_REF)
+# and (2) suboptimal use of not-reached info.
-campaigns: $(PROG_CAMPAIGNS_ALL)
+campaigns:
-# Multi-core rules: avoid running multiple campaigns on the same program in
-# parallel due to (1) file access races, (2) suboptimal use of notreached info
-campaigns-multicore:
 	$(MAKE) campaign-fsh-ex-s32-1
 	$(MAKE) campaign-fsh-ex-s32-2
 	$(MAKE) campaign-fsh-ex-sar32

--- a/README.md
+++ b/README.md
@@ -15,32 +15,35 @@ _This repository houses the artifact for a [CC'24](https://conf.researchr.org/ho
 “Fetch skips” is fault model coined by Alshaer et al. [[2023](https://hal.science/hal-04273995v1)] which describes one common way microprocessors react to a glitch in their clock input. A typical model for this would be “instruction skip”, i.e. just skip an instruction in the execution of a program. Fetch skips are more precise and involve skipping or repeating 4 bytes of code, which can produce more complex effects for unaligned and variable-sized instructions. This is of course a major problem for security, as basically any incorrect execution can lead to abuse.
-This repository is a research project on protecting against fetch skips. It contains a modified compiler (LLVM/Clang 12), linker (GNU ld 2.40), emulator (QEMU 8.0) and processor simulator (Gem5 22.1) which implement a combined software/hardware countermeasure. The main result of the paper is a proof that running a program by these tools on a minimally-extended processor prevents exploitation of fetch skips by ensuring that every attack causes the program to stop or crash within a few instructions.
+This repository is a research project on protecting against fetch skips. It contains a modified compiler (LLVM/Clang 12), linker (GNU ld 2.40), emulator (QEMU 8.0) and processor simulator (Gem5 22.1) which implement a combined software/hardware countermeasure. The main result of the paper is a proof that running a program protected by these tools on a minimally-extended processor prevents exploitation of fetch skips by ensuring that every attack causes the program to stop or crash within a few instructions.
 In addition to the compiler/linker for generating protected programs, we use the emulator to simulate attacks and experimentally check the security claims, and the simulator to evaluate performance impact. A subset of programs from the [MiBench benchmark suite](https://vhosts.eecs.umich.edu/mibench/) is used.
 ## How to reproduce results from the paper
-To get straight to reproduced results, no questions asked, get the Docker image and run the following commands.
+To get straight to reproduced results on an x86\_64 machine, no questions asked, download the compressed Docker image and run the following commands. For details see below.
-TODO: Link to Docker image or commands to download from Zenodo and run.
+TODO: Link to Zenodo.
 ```
-% docker run -it cc24-fetch-skips-hardening
+% xz -d --stdout cc24-fetch-skips-hardening.tar.xz | sudo docker load
+% sudo docker run -it localhost/cc24-fetch-skips-hardening
 root@(container):~# make all_REF all_FSH run_REF run_FSH
-root@(container):~# make -j$(nproc) campaigns-multicore
+root@(container):~# make -j$(nproc) campaigns
 root@(container):~# TODO
 ```
-TODO: More instructions
+The Docker image is just a build of this repository on Ubuntu 22.04; see [Detailed description](#detailed-description) for an explanation of the contents. To build natively without using Docker, please check the [Manual build](#manual-build) instructions and the [Dockerfile](Dockerfile) as a reference.
-A build of all the tools for x86\_64 is provided as a Docker image. To build manually, see section [Manual build](#manual-build).
+The first step is to build a reference version of the benchmark programs (`make all_REF`) without enabling Fetch Skips Hardening, and then protected versions (`make all_FSH`) using this project's compiler and linker passes. To verify that the protected programs still work as intended, we run both versions (`make run_REF run_FSH`) and check that the outputs are identical.
-1. Build reference programs (RISC-V without Fetch Skips Hardening) with `make all_REF`.
+The second step is to run fault injection campaigns (`make -jN campaigns`). This uses a modified QEMU to emulate the effect of the fault and check that programs correctly stop or crash before the end of the attacked block. This fact is proven in the paper for single-fault injections (and proven up to the absence of checksum collisions for multi-fault injections) so the expected result is 100% fault resistance. See in [Technical notes](#technical-notes) for an explanation on how to read the outputs if you're interested.
-2. Build FSH programs with `make all_FSH`.
-3. Generate reference outputs with `make all_REF`.
+The same command also runs injection campaigns on the reference (non-protected) programs to collect statistics about the percentage of attacks that result in a crash within the attacked block, as a baseline comparison. Predictably, these campaigns result in a lot of security "bypasses" since the countermeasure isn't active.
-4. Generate FSH outputs and compare with ref with `make run_FSH` (if there's a difference there will be an error).
-5. Run the fault injection campaigns (single core: `make campaigns`, multiple cores: `make -j<CORES> campaigns-multicore`). There are 9 programs so up to 9 cores can be used effectively.
+TODO: Performance simulations
+TODO: Generating figures
 ## Detailed description
@@ -61,7 +64,7 @@ Both linker scripts can be diffed against the original, which can be found at `.
 Other files used in the testing process include:
- `mibench`: Programs from the [MiBench benchmark suite][https://vhosts.eecs.umich.edu/mibench/index.html). We target the Industrial, Network and Security applications. The source files are original but the Makefiles are basically new.
+- `mibench`: Programs from the [MiBench benchmark suite](https://vhosts.eecs.umich.edu/mibench/index.html). We target the Industrial, Network and Security applications. The source files are original but the Makefiles are basically new.
 - `riscv_qemu_REF`, `riscv_qemu_FSH`: Wrappers around QEMU and QEMU-with-FSH-support.
 - `fault.py`: Script for running fault injection campaigns (details inside).
 - `fault_summary.py`: TODO.
@@ -71,6 +74,47 @@ The Makefile just contains a few top-level commands for using the project.
 ## Technical notes
+**Reading the output of the fault injection script**
+Below in an excerpt from the fault campaign script's output (running in parallel).
+```
+[patricia 44.6%] 0x40770:s32,1... CCS_VIOLATION
+[basicmath 48.5%] 0x41358:s32,1... NOT_REACHED
+[patricia 44.7%] 0x40774:s32,1... CCS_VIOLATION
+[patricia 44.8%] 0x40778:s32,1... CCS_VIOLATION
+[patricia 44.9%] 0x4077c:s32,1... SIGILL
+[susan 48.6%] 0x43ef8:s32,1... NOT_REACHED (predicted)
+[patricia 44.9%] 0x40780:s32,1... CCS_VIOLATION
+```
+Each line corresponds to a faulted execution. The bracketed section indicates the program being run and the campaign's progress. The fault description follows; `0x40770:s32,1` for instance indicates injecting a single 32-bit skip fault at PC 0x40770. Then comes the exit status, which is usually `NOT_REACHED` (if the attacked PC is not reached during the entire execution), `CCS_VIOLATION` (attack detected by the countermeasure), or a crash signal. Green exit statuses means no security vulnerability, red statuses a security bypass.
+Executions where the targeted PC is not reached take the longest, because there is no early exit/crash. In addition, a second execution is needed to check whether PC was actually reached or not (by injecting an illegal instruction at that address). Attacks that are not reached are also mostly uninteresting. Two mechanisms are in place to accelerate simulations by avoiding these unneeded executions:
+1. Prediction: when the script believes the targeted PC is likely not reached it will try the illegal instruction first to save one execution. If that guess is correct the script will print "(predicted)".
+2. Not-reached output file: the script will produce `*-notreached.txt` files in the output folder where it records PC values that are not reached. This way, only the first campaign deals with them. This is why `s32,2` and `s&r32` are so much faster than `s32,1`.
+The results are summarized in `out/` in files such as `out/basicmath-campaign-fsh-ex-s32-1.txt`:
+```
+= 272364
+setting,EXITED,CCS_VIOLATION,CCS_BYPASSED,NOT_REACHED,SILENT_REPLACE,SIGSEGV,SIGILL,SIGTRAP,OTHER
+fsh-ex-s32-1,0,1543,0,833,0,115,60,3,1
+# OTHER for (266136, 's32,1'):
+# summary of faults to be injected:
+#   00040f98: s32 (k=1)
+# /root/riscv_qemu_FSH: line 5: 50623 Bus error               "${ROOT}"/prefix/bin/qemu-riscv32 -cpu rv32-fsh "$@"
+```
+The first line indicates the campaign's progress and is used for resuming gracefully if the script is ever interrupted. The next two lines summarize the results, importantly in the absence of `EXITED` and `CCS_BYPASSED` outcomes (the red ones). Any non-conventional result is finally reported with a comment, which here includes a case of crash by `SIGBUS`.
+TODO: Explain aggregate CSV file
+**Reading the output of performance simulations**
+TODO: Explain output of Gem5 simulations
 **False-positive QEMU “bugs”**
 The fault injection campaign script prints a result for each execution, such as `CCS_VIOLATION` or `NOT_REACHED`. When it doesn't recognize a result, it prints `OTHER` and logs the parameters along with the stdout/stderr of the QEMU invocation to the log file. On some machine there are many of these and they appear to be segfaults or assertion errors _within QEMU itself_, but this is mostly a red herring. The TL;DR is that QEMU is sometimes unable to catch exceptions from the emulated programs and crashes itself instead.
@@ -93,21 +137,19 @@ At least 3 bugs I investigated led back to this:
 **Property-preserving LLVM**
-The compiler transforms the program into a protected form and is the core of the countermeasure. Pull the [`llvm-property-preserving`](https://gricad-gitlab.univ-grenoble-alpes.fr/michelse/llvm-property-preserving) submodule and build it with CMake. We configure to install in the `prefix/` folder of this repo, but never do that - we run binaries from to build folder directly.
+The compiler transforms the program into a protected form and is the core of the countermeasure. Pull the [`llvm-property-preserving`](https://gricad-gitlab.univ-grenoble-alpes.fr/michelse/llvm-property-preserving) submodule and build it with CMake. We configure to install in the `prefix/` folder of this repo.
-TODO: The Dockerfile does it. We should probably match that unless it saves space.
 ```bash
 % git submodule update --init llvm-property-preserving
 % cd llvm-property-preserving
 % mkdir build && cd build
 % cmake -G Ninja -DLLVM_ENABLE_PROJECTS="clang;lldb" -DLLVM_TARGETS_TO_BUILD="RISCV" -DCMAKE_INSTALL_PREFIX=../prefix -DCMAKE_BUILD_TYPE=Release -DLLVM_USE_LINKER=lld -DBUILD_SHARED_LIBS=ON -DLLVM_PARALLEL_LINK_JOBS=1 ../llvm
-% ninja
+% ninja install
 ```
 **RISC-V GNU toolchain**
-In order to compile and link useful C programs, we need both standard library headers, the standard library, and the C runtime for the RISC-V target. Grab the 32-bit RISC-V toolchain from [`riscv-collab/riscv-gnu-toolchain`](https://github.com/riscv-collab/riscv-gnu-toolchain/releases), e.g. `riscv32-elf-ubuntu-22.04-nightly-2023.01.31-nightly.tar.gz`. Uncompress it and rename the `riscv` folder to `riscv-custom` (we're going to replace the linker).
+In order to compile and link useful C programs, we need both standard library headers, the standard library, and the C runtime for the RISC-V target. Grab the 32-bit RISC-V toolchain from [`riscv-collab/riscv-gnu-toolchain`](https://github.com/riscv-collab/riscv-gnu-toolchain/releases), e.g. `riscv32-elf-ubuntu-22.04-nightly-2023.01.31-nightly.tar.gz`. Extract it and rename the `riscv` folder to `riscv-custom` (we're going to replace the linker).
 ```bash
 % wget "https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2023.01.31/riscv32-elf-ubuntu-22.04-nightly-2023.01.31-nightly.tar.gz"
@@ -118,7 +160,7 @@ In order to compile and link useful C programs, we need both standard library he
 **Custom linker**
-The countermeasure relies on computing checksums of fragments of code, which is only possible after relocation in the linker. So we use a sligthly-modified linker. Pull the [`binutils-gdb`](https://gricad-gitlab.univ-grenoble-alpes.fr/michelse/binutils-gdb) submodule and build it.
+The countermeasure relies on computing checksums of fragments of code, which is only possible after relocation in the linker. So we use a slightly-modified linker. Pull the [`binutils-gdb`](https://gricad-gitlab.univ-grenoble-alpes.fr/michelse/binutils-gdb) submodule and build it.
 ```bash
 % git submodule update --init binutils-gdb
@@ -126,7 +168,7 @@ The countermeasure relies on computing checksums of fragments of code, which is
 % mkdir build && cd build
 % ../configure --prefix="$(realpath ../../riscv-custom)" --target="riscv32-unknown-elf"
 % make -j4
-% make -j4 install
+% make install
 ```
 **Custom QEMU**
@@ -138,7 +180,7 @@ We use QEMU to emulate the hardware support of the countermeasure and the inject
 % cd qemu
 % mkdir build && cd build
 % ../configure --target-list=riscv32-linux-user
-% ninja
+% ninja install
 ```
 **gem5 simulator**
@@ -156,10 +198,23 @@ Note: I was unsuccessful in getting a clean build on Arch; Ubuntu seems to be th
 ## Generating the Docker image
-The Docker image for this projet is generated from the source files in this repository (including unstaged changes). Make sure all submodules are pulled. QEMU only builds out-of-git when using a release tarball, so we generate that first. We also clean any generated from the `mibench` folder, which will get copied.
+The Docker image for this project is generated from the source files in this repository (including unstaged changes). Make sure all submodules are pulled. QEMU only builds out-of-git when using a release tarball, so we generate that first. We also clean any generated from the `mibench` folder, which will get copied.
 ```bash
 % (cd qemu && scripts/archive-source.sh ../qemu.tar)
 % make distclean
 % podman build -t cc24-fetch-skips-hardening .
 ```
+One way to export the image is then to save it and compress it.
+```bash
+% podman save cc24-fetch-skips-hardening:latest > cc24-fetch-skips-hardening.tar
+% xz -vk -T0 cc24-fetch-skips-hardening.tar
+```
+After running the tests in a container, get reference results like so.
+```bash
+% podman cp $containerID:/root/out out-reference
+```
--- a/riscv_qemu_FSH
+++ b/riscv_qemu_FSH
@@ -2,4 +2,4 @@
 ROOT="$(dirname $0)"
-"${ROOT}"/qemu/build/qemu-riscv32 -cpu rv32-fsh "$@"
+"${ROOT}"/prefix/bin/qemu-riscv32 -cpu rv32-fsh "$@"