From 2e2f3f263109e2cf7c9c0b33f161f2191b2696cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Michelland?=
 <sebastien.michelland@lcis.grenoble-inp.fr>
Date: Wed, 3 Jan 2024 00:20:15 +0100
Subject: [PATCH] plot scripts, almost done

All that's left now is:
- Include a reference out/ folder in the next build
- Upload to Zenodo
---
 Dockerfile          |   5 +-
 Makefile            |   8 ++-
 README.md           |  18 ++++---
 fault.py            |   1 -
 plot_campaigns.py   | 118 ++++++++++++++++++++++++++++++++++++++++++++
 plot_performance.py |  85 +++++++++++++++++++++++++++++++
 summary.py          |  55 +++++++++++++++++++--
 7 files changed, 274 insertions(+), 16 deletions(-)
 create mode 100755 plot_campaigns.py
 create mode 100755 plot_performance.py

diff --git a/Dockerfile b/Dockerfile
index 20eafbe..b477552 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,7 +15,7 @@ RUN apt -y update && apt -y upgrade && apt -y install \
       libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pip black
 
 # gem5 dependencies
-RUN pip install mypy pre-commit pyelftools
+RUN pip install mypy pre-commit pyelftools matplotlib numpy
 
 WORKDIR /root
 
@@ -85,7 +85,8 @@ RUN cd gem5 && \
 COPY mibench mibench/
 COPY riscv_cc_REF riscv_cc_FSH riscv_qemu_FSH \
      elf32lriscv_ref.x elf32lriscv_ccs.x \
-     fault.py summary.py gem5script.py Makefile .
+     fault.py summary.py gem5script.py \
+     plot_campaigns.py plot_performance.py Makefile .
 
 # Squash the final image so we don't ship source and build files as diffs
 FROM scratch
diff --git a/Makefile b/Makefile
index 7f5a258..7d6d90c 100644
--- a/Makefile
+++ b/Makefile
@@ -119,5 +119,11 @@ $(foreach P,$(PROGRAMS),$(eval \
   $(call simulation_rule,$(notdir $P),icache,REF) \
   $(call simulation_rule,$(notdir $P),icache,FSH)))
 
-.PHONY: campaigns% simulations
+# Final rule for making plots
+plots:
+	./summary.py
+	./plot_campaigns.py
+	./plot_performance.py
+
+.PHONY: campaigns% simulations plots
 .PRECIOUS: $(OUT)/
diff --git a/README.md b/README.md
index 79503a3..a1858be 100644
--- a/README.md
+++ b/README.md
@@ -30,8 +30,8 @@ TODO: Link to Zenodo.
 % sudo docker run -it localhost/cc24-fetch-skips-hardening
 root@(container):~# make all_REF all_FSH run_REF run_FSH
 root@(container):~# make -j$(nproc) campaigns
-root@(container):~# TODO
-# make -j$(nproc) simulations
+root@(container):~# make -j$(nproc) simulations
+root@(container):~# make plots
 ```
 
 The Docker image is just a build of this repository on Ubuntu 22.04; see [Detailed description](#detailed-description) for an explanation of the contents. To build natively without using Docker, please check the [Manual build](#manual-build) instructions and the [Dockerfile](Dockerfile) as a reference.
@@ -42,9 +42,9 @@ The second step is to run fault injection campaigns (`make -jN campaigns`). This
 
 The same command also runs injection campaigns on the reference (non-protected) programs to collect statistics about the percentage of attacks that result in a crash within the attacked block, as a baseline comparison. Predictably, these campaigns result in a lot of security "bypasses" since the countermeasure isn't active.
 
-TODO: Performance simulations
+The third and last major step is to run performance simulations in Gem5 to compare the runtime of original and protected programs (`make -jN simulations`). We do this in a scenario where no fault is injected, since in an attack scenario performance cannot be measured due to the absence of a recovery mechanism in the countermeasure.
 
-TODO: Generating figures
+Finally, `make plots` will run 3 scripts. `summary.py` will generate CSV files in `out/` that aggregate test and simulation results, and two plot scripts will generate `out/campaigns.png` and `out/perf.png` which are used in the paper (except that rendering will not use the LaTeX backend if LaTeX is not installed, such as in the Docker image).
 
 ## Detailed description
 
@@ -69,7 +69,7 @@ Other files used in the testing process include:
 - `riscv_qemu_REF`, `riscv_qemu_FSH`: Wrappers around QEMU and QEMU-with-FSH-support.
 - `fault.py`: Script for running fault injection campaigns (details inside).
 - `summary.py`: Script for aggregating security and performance test results.
-- TODO: Generating figures.
+- `plot_campaigns.py`, `plot_performance.py`: Scripts for generating figures with matplotlib based on aggregated results.
 
 The Makefile just contains a few top-level commands for using the project.
 
@@ -110,11 +110,15 @@ fsh-ex-s32-1,0,1543,0,833,0,115,60,3,1
 
 The first line indicates the campaign's progress and is used for resuming gracefully if the script is ever interrupted. The next two lines summarize the results, importantly in the absence of `EXITED` and `CCS_BYPASSED` outcomes (the red ones). Any non-conventional result is finally reported with a comment, which here includes a case of crash by `SIGBUS`.
 
-TODO: Explain aggregate CSV file
+The aggregate file `out/campaigns.csv` collects this information in a straightforward format.
 
 **Reading the output of performance simulations**
 
-TODO: Explain output of Gem5 simulations
+Gem5 produces results for each simulation in a folder. Here, these are named `out/m5out/<program>_<cache>_<type>` where `cache` indicates whether the instruction cache was enabled and `<type>` whether the reference (REF) or hardened (FSH) binary was executed. Here we use the simplest metric, which is the total execution time reported in `stats.txt` as the `finalTick` value on line 3.
+
+The aggregate file `out/perf.csv` collects the `finalTick` values for each program and cache/type configuration in a single table.
+
+A related performance file (but generated by `summary.py`, not Gem5) is `out/size.csv`, which lists the size of the program's code in the reference and hardened binaries.
 
 **False-positive QEMU â€œbugsâ€**
 
diff --git a/fault.py b/fault.py
index 8a93678..f27c997 100755
--- a/fault.py
+++ b/fault.py
@@ -391,7 +391,6 @@ def main(argv):
     elf = elftools.elf.elffile.ELFFile(fp)
 
     # Determine the address range to attack
-    # TODO: Use __user_start / __user_end ranges instead of whole .text
     if opt.campaign.startswith("fsh-"):
         symtable = elf.get_section_by_name(".symtab")
         s = symtable.get_symbol_by_name("__ccs_start")[0].entry["st_value"]
diff --git a/plot_campaigns.py b/plot_campaigns.py
new file mode 100755
index 0000000..3355b3f
--- /dev/null
+++ b/plot_campaigns.py
@@ -0,0 +1,118 @@
+#! /usr/bin/env python3
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import shutil
+
+PLOT_CAMPAIGNS = [
+    "fsh-ex-s32-1", "fsh-ex-s32-2", "fsh-ex-sar32", "fsh-multi-random"
+]
+PLOT_CAMPAIGN_LABELS = [
+    "1", "2", "3", "R",
+]
+COLUMNS = [
+    ("Fault reported",
+        "#6eaa56", ["CCS_VIOLATION", "SIGTRAP"]),
+    ("Segfault",
+        "#e0c050", ["SIGSEGV"]),
+    ("Other crash",
+        "#a986bc", ["SILENT_REPLACE", "SIGILL", "OTHER"]),
+    ("Countermeasure bypassed",
+        "#d04030", ["EXITED", "CCS_BYPASSED"]),
+]
+COLUMN_NAMES = [c[0] for c in COLUMNS]
+
+# Group individual fields into categories
+def categorize(dataset, fields):
+    # Check that all fields are used in categorization
+    assert all(any(f in mapping for (_, _, mapping) in COLUMNS)
+               for f in fields if f != "NOT_REACHED")
+
+    categorizedDataset = dict()
+
+    for key, values in dataset.items():
+        # Exclude NOT_REACHED from the count
+        totalFaults = sum(values) - values[fields.index("NOT_REACHED")]
+        categorizedDataset[key] = {"TOTAL": totalFaults}
+
+        for cat, _, mapping in COLUMNS:
+            categorizedDataset[key][cat] = \
+                sum(values[fields.index(m)] for m in mapping) / totalFaults
+
+    return categorizedDataset
+
+# Group/transpose campaigns by program, using numpy arrays
+def group(dataset, programs, campaigns):
+    groupedDataset = dict()
+
+    for p in programs:
+        groups = dict()
+        for cat in {"TOTAL"}.union(COLUMN_NAMES):
+            groups[cat] = np.array([dataset[(p, c)][cat] for c in campaigns])
+
+        groupedDataset[p] = groups
+
+    return groupedDataset
+
+def plot(dataset):
+    N = len(dataset)
+    programs = sorted(dataset)
+    groups = list((np.array([4*[u] for u in programs])).flat)
+
+    # Use LaTeX if available, but don't require it
+    if shutil.which("tex") is not None:
+        plt.rcParams["text.usetex"] = True
+        plt.rcParams["font.family"] = "Times"
+        plt.rcParams["font.size"] = 13
+
+    fig, ax = plt.subplots(1, N, sharey='all')
+    width = N*5/9
+
+    for i, program in enumerate(programs):
+        bottom = np.zeros(len(dataset[program]["TOTAL"]))
+        for c, color, _ in COLUMNS:
+            values = dataset[program][c]
+            ax[i].bar(PLOT_CAMPAIGN_LABELS, dataset[program][c],
+                      width=0.3, color=color,
+                      bottom=bottom, alpha=0.8, edgecolor="black")
+            bottom += values
+
+        ax[i].spines["right"].set_visible(False)
+        ax[i].spines["top"].set_visible(False)
+
+        # Remove ticks and frame except for the first plot on the left
+        if i == 0:
+            ax[i].yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.2))
+            ax[i].yaxis.set_minor_locator(mpl.ticker.NullLocator())
+        else:
+            ax[i].spines["left"].set_visible(False)
+            ax[i].yaxis.set_ticks_position("none")
+
+        totalFaults = sum(dataset[program]["TOTAL"])
+        ax[i].set_xlabel(f"{program}\n({totalFaults} faults)", ha="center")
+
+    fig.legend(COLUMN_NAMES, title="Fault injection outcomes, by proportion",
+               loc="lower center", bbox_to_anchor=(0.5, 0.85), frameon=False,
+               ncol=4)
+    fig.set_size_inches(12, 2)
+    fig.savefig("out/campaigns.png", dpi=400, transparent=False,
+                bbox_inches="tight")
+
+def main():
+    dataset = dict()
+    programs = set()
+
+    with open("out/campaigns.csv", "r") as fp:
+        fields = next(fp).strip().split(",")[2:]
+        for row in fp:
+            program, campaign, *values = row.strip().split(",")
+            dataset[(program, campaign)] = list(map(int, values))
+            programs.add(program)
+
+    dataset = categorize(dataset, fields)
+    dataset = group(dataset, programs, PLOT_CAMPAIGNS)
+    plot(dataset)
+
+if __name__ == "__main__":
+    main()
diff --git a/plot_performance.py b/plot_performance.py
new file mode 100755
index 0000000..954e549
--- /dev/null
+++ b/plot_performance.py
@@ -0,0 +1,85 @@
+#! /usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+    CGO plotting stuff
+"""
+__author__ = "Laure Gonnord"
+__copyright__ = "Grenoble INP/Esisar, 2023"
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import shutil
+
+def compute_derived_metrics(d):
+    d["size_ratios"] = d["size_FSH"] / d["size_REF"]
+    d["nocache_increase"] = (d["nocache_FSH"] - d["nocache_REF"]) * 100 / d["nocache_REF"]
+    d["icache_increase"] = (d["icache_FSH"] - d["icache_REF"]) * 100 / d["icache_REF"]
+
+def plot(dataset, programs):
+    plt.rc("axes", axisbelow=True)
+
+    # Use LaTeX if available, but don't require it
+    if shutil.which("tex") is not None:
+        plt.rcParams["text.usetex"] = True
+        plt.rcParams["font.family"] = "Times"
+        plt.rcParams["font.size"] = 13
+
+    fig, ax = plt.subplots(1, 3)
+
+    # Add a red line at size ratio 1 for reference
+    ax[0].axhline(y=1, color="red", linestyle='-', alpha=0.7, linewidth=1) \
+         .set_zorder(1)
+
+    metrics = [
+        'Protected code size ratio\n(libraries/runtime not counted)',
+        'Execution time overhead in Gem5 in \%\n(without cache)',
+        'Execution time overhead in Gem5 in \%\n(8 kB 4-way instruction cache)']
+    columns = ['size_ratios', 'nocache_increase', 'icache_increase']
+
+    for i, (metric, column) in enumerate(zip(metrics, columns)):
+        if i == 0:
+            ax[i].bar(programs, dataset[column], width=0.3, color="tab:gray")
+        else:
+            ax[i].bar(programs, dataset[column], width=0.3, color="#a986bc")
+
+        blu = ax[i].get_xticklabels()
+        ax[i].spines['right'].set_visible(False)
+        ax[i].spines['top'].set_visible(False)
+        ax[i].set_zorder(0)
+        ax[i].yaxis.set_major_locator(
+            mpl.ticker.MultipleLocator(0.5 if i == 0 else 10))
+        ax[i].set_xticklabels(blu, rotation=55, ha='right',fontsize=13)
+        ax[i].set_xlabel(metric, ha='center', rotation=0)
+        ax[i].yaxis.grid(color='gray', linestyle='dashed')
+
+    fig.set_size_inches(12, 2)
+    fig.savefig("out/perf.png", dpi=400, bbox_inches="tight")
+    
+def main():
+    # Start with a dataset indexed by programs, i.e program -> program's data
+    dataset = dict()
+
+    with open("out/perf.csv", "r") as fp:
+        fields = next(fp).strip().split(",")[1:]
+        for row in fp:
+            program, *values = row.strip().split(",")
+            dataset[program] = list(map(int, values))
+
+    with open("out/size.csv", "r") as fp:
+        fields += next(fp).strip().split(",")[1:]
+        for row in fp:
+            program, *values = row.strip().split(",")
+            if program in dataset:
+                dataset[program] += list(map(int, values))
+
+    # Sort programs by name and transpose to field -> per-program values
+    programs = sorted(dataset)
+    dataset = {f: np.array([dataset[p][fields.index(f)] for p in programs])
+               for f in fields}
+
+    compute_derived_metrics(dataset)
+    plot(dataset, programs)
+
+if __name__ == "__main__":
+    main()
diff --git a/summary.py b/summary.py
index 08ec982..b7a5215 100755
--- a/summary.py
+++ b/summary.py
@@ -52,14 +52,20 @@ Generated by {USER}@{HOSTNAME} on {NOW}.
 campaigns/*.txt -- generated by fault.py:
   Raw results of fault injection campaigns (might be partial), and information
   about PCs not reached by test executions (*-notreached.txt).
+m5out/* -- generated by gem5script.py running in Gem5:
+  Raw results of performance simulations in Gem5.
 
 campaigns.csv -- generated by summary.py:
-  Summary of all above results of campaigns in CSV format.
-
+  Summary/statistics of above campaigns in CSV format.
+perf.csv -- generated by summary.py:
+  Execution time statistics extracted from m5out/ traces, in CSV format.
 size.csv -- generated by summary.py:
   Variation in program size between reference and hardened version.
 
-TODO: Performance simulations.
+campaigns.png -- generated by plot_campaigns.py:
+  Plot of campaigns.csv.
+perf.png -- generated by plot_performance.py:
+  Plot of size.csv and perf.csv.
 """.strip()
 
 # Get the short name for a program given by index
@@ -93,7 +99,19 @@ def fault_log_to_csv(path):
     del csv["setting"]
     return csv
 
+def m5out_get_finalTick(folder):
+    RE_FINALTICK = re.compile(r"^finalTick\s+(\d+)\s+#")
+
+    with open(os.path.join(folder, "stats.txt"), "r") as fp:
+        lines = fp.read().splitlines()
+    for l in lines:
+        m = RE_FINALTICK.match(l)
+        if m:
+            return int(m[1])
+
 def main():
+    ###-- Generate a summary of fault injection campaigns ---###
+
     # Read fields from all campaign result files
     results = dict()
     for i in range(len(ALL_PROGRAMS)):
@@ -127,7 +145,33 @@ def main():
             y = FSH_text_ccs
             fp.write(f"{program_name(i)},{x},{y}\n")
 
-    # Generate a README file to remember some useful data
+    ###--- Generate a summary of performance simulations ---###
+
+    all_m5out = glob.glob("out/m5out/*_*_*/")
+    re_m5out_folder = re.compile(r"([^_]+)_([^_]+_[^_]+)")
+
+    tick_values = {}
+    for f in all_m5out:
+        m = re_m5out_folder.fullmatch(os.path.basename(f[:-1]))
+        if not m:
+            raise Exception(f"ehrrrr what is {f} supposed to be?")
+        name, ctgy = m[1], m[2]
+        finalTick = m5out_get_finalTick(f)
+        if finalTick is None:
+            continue
+        if name not in tick_values:
+            tick_values[name] = dict()
+        tick_values[name][ctgy] = finalTick
+
+    ctgys = sorted({ k for name in tick_values for k in tick_values[name] })
+    with open(os.path.join(WORK_FOLDER, "perf.csv"), "w") as fp:
+        fp.write("program," + ",".join(ctgys) + "\n")
+        for p, res in tick_values.items():
+            fields = [str(res.get(ctgy, 0)) for ctgy in ctgys]
+            fp.write(p + "," + ",".join(fields) + "\n")
+
+    ###--- Generate a README file with some context ---###
+
     with open(WORK_FOLDER + "/README", "w") as fp:
         fp.write(README_HEADER.format(
             USER=os.getenv("USER"),
@@ -135,7 +179,8 @@ def main():
             NOW=str(datetime.now())) + "\n")
 
         fp.write("\nTool versions:\n")
-        for repo in ["llvm-property-preserving", "binutils-gdb", "qemu"]:
+        repos = ["llvm-property-preserving", "binutils-gdb", "qemu", "gem5"]
+        for repo in repos:
             cmd = ["git", "-C", repo, "rev-parse", "@"]
             proc = subprocess.run(cmd,
                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-- 
GitLab