Commit 7cdd676d authored by Xavier Leroy's avatar Xavier Leroy Committed by Xavier Leroy
Browse files

AArch64 port

This commit adds a back-end for the AArch64 architecture, namely ARMv8
in 64-bit mode.
parent eb858038
......@@ -40,6 +40,9 @@
/riscV/ConstpropOp.v
/riscV/SelectOp.v
/riscV/SelectLong.v
/aarch64/ConstpropOp.v
/aarch64/SelectOp.v
/aarch64/SelectLong.v
/backend/SelectDiv.v
/backend/SplitLong.v
/cparser/Parser.v
......
(* *********************************************************************)
(* *)
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, Collège de France and INRIA Paris *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(** Architecture-dependent parameters for AArch64 *)
Require Import ZArith List.
(*From Flocq*)
Require Import Binary Bits.
Definition ptr64 := true.
Definition big_endian := false.
Definition align_int64 := 8%Z.
Definition align_float64 := 8%Z.
Definition splitlong := false.
Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
Proof.
unfold splitlong, ptr64; congruence.
Qed.
Definition default_nan_64 := (false, iter_nat 51 _ xO xH).
Definition default_nan_32 := (false, iter_nat 22 _ xO xH).
(** Choose the first signaling NaN, if any;
otherwise choose the first NaN;
otherwise use default. *)
Definition choose_nan (is_signaling: positive -> bool)
(default: bool * positive)
(l0: list (bool * positive)) : bool * positive :=
let fix choose_snan (l1: list (bool * positive)) :=
match l1 with
| nil =>
match l0 with nil => default | n :: _ => n end
| ((s, p) as n) :: l1 =>
if is_signaling p then n else choose_snan l1
end
in choose_snan l0.
Lemma choose_nan_idem: forall is_signaling default n,
choose_nan is_signaling default (n :: n :: nil) =
choose_nan is_signaling default (n :: nil).
Proof.
intros. destruct n as [s p]; unfold choose_nan; simpl.
destruct (is_signaling p); auto.
Qed.
Definition choose_nan_64 :=
choose_nan (fun p => negb (Pos.testbit p 51)) default_nan_64.
Definition choose_nan_32 :=
choose_nan (fun p => negb (Pos.testbit p 22)) default_nan_32.
Lemma choose_nan_64_idem: forall n,
choose_nan_64 (n :: n :: nil) = choose_nan_64 (n :: nil).
Proof. intros; apply choose_nan_idem. Qed.
Lemma choose_nan_32_idem: forall n,
choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
Proof. intros; apply choose_nan_idem. Qed.
Definition fma_order {A: Type} (x y z: A) := (z, x, y).
Definition fma_invalid_mul_is_nan := true.
Definition float_of_single_preserves_sNaN := false.
Global Opaque ptr64 big_endian splitlong
default_nan_64 choose_nan_64
default_nan_32 choose_nan_32
fma_order fma_invalid_mul_is_nan
float_of_single_preserves_sNaN.
(** Whether to generate position-independent code or not *)
Parameter pic_code: unit -> bool.
This diff is collapsed.
(* *********************************************************************)
(* *)
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, Collège de France and INRIA Paris *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(* Functions to serialize AArch64 Asm to JSON *)
(* Dummy function *)
let destination: string option ref = ref None
let sdump_folder = ref ""
let print_if prog sourcename =
()
let pp_mnemonics pp = ()
(* *********************************************************************)
(* *)
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, Collège de France and INRIA Paris *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(* Expanding built-ins and some pseudo-instructions by rewriting
of the AArch64 assembly code. *)
open Asm
open Asmexpandaux
open AST
open Camlcoq
module Ptrofs = Integers.Ptrofs
exception Error of string
(* Useful constants *)
let _0 = Z.zero
let _1 = Z.one
let _2 = Z.of_sint 2
let _4 = Z.of_sint 4
let _8 = Z.of_sint 8
let _16 = Z.of_sint 16
let _m1 = Z.of_sint (-1)
(* Emit instruction sequences that set or offset a register by a constant. *)
let expand_loadimm32 (dst: ireg) n =
List.iter emit (Asmgen.loadimm32 dst n [])
let expand_addimm64 (dst: iregsp) (src: iregsp) n =
List.iter emit (Asmgen.addimm64 dst src n [])
let expand_storeptr (src: ireg) (base: iregsp) ofs =
List.iter emit (Asmgen.storeptr src base ofs [])
(* Handling of varargs *)
(* Determine the number of int registers, FP registers, and stack locations
used to pass the fixed parameters. *)
let rec next_arg_locations ir fr stk = function
| [] ->
(ir, fr, stk)
| (Tint | Tlong | Tany32 | Tany64) :: l ->
if ir < 8
then next_arg_locations (ir + 1) fr stk l
else next_arg_locations ir fr (stk + 8) l
| (Tfloat | Tsingle) :: l ->
if fr < 8
then next_arg_locations ir (fr + 1) stk l
else next_arg_locations ir fr (stk + 8) l
(* Allocate memory on the stack and use it to save the registers
used for parameter passing. As an optimization, do not save
the registers used to pass the fixed parameters. *)
let int_param_regs = [| X0; X1; X2; X3; X4; X5; X6; X7 |]
let float_param_regs = [| D0; D1; D2; D3; D4; D5; D6; D7 |]
let size_save_register_area = 8*8 + 8*16
let save_parameter_registers ir fr =
emit (Psubimm(X, XSP, XSP, Z.of_uint size_save_register_area));
let i = ref ir in
while !i < 8 do
let pos = 8*16 + !i*8 in
if !i land 1 = 0 then begin
emit (Pstp(int_param_regs.(!i), int_param_regs.(!i + 1),
ADimm(XSP, Z.of_uint pos)));
i := !i + 2
end else begin
emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos)));
i := !i + 1
end
done;
for i = fr to 7 do
let pos = i*16 in
emit (Pstrd(float_param_regs.(i), ADimm(XSP, Z.of_uint pos)))
done
(* Initialize a va_list as per va_start.
Register r points to the following struct:
typedef struct __va_list {
void *__stack; // next stack parameter
void *__gr_top; // top of the save area for int regs
void *__vr_top; // top of the save area for float regs
int__gr_offs; // offset from gr_top to next int reg
int__vr_offs; // offset from gr_top to next FP reg
}
*)
let current_function_stacksize = ref 0L
let expand_builtin_va_start r =
if not (is_current_function_variadic ()) then
invalid_arg "Fatal error: va_start used in non-vararg function";
let (ir, fr, stk) =
next_arg_locations 0 0 0 (get_current_function_args ()) in
let stack_ofs = Int64.(add !current_function_stacksize (of_int stk))
and gr_top_ofs = !current_function_stacksize
and vr_top_ofs = Int64.(sub !current_function_stacksize 64L)
and gr_offs = - ((8 - ir) * 8)
and vr_offs = - ((8 - fr) * 16) in
(* va->__stack = sp + stack_ofs *)
expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 stack_ofs);
emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 0L)));
(* va->__gr_top = sp + gr_top_ofs *)
if gr_top_ofs <> stack_ofs then
expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 gr_top_ofs);
emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 8L)));
(* va->__vr_top = sp + vr_top_ofs *)
expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 vr_top_ofs);
emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 16L)));
(* va->__gr_offs = gr_offs *)
expand_loadimm32 X16 (coqint_of_camlint (Int32.of_int gr_offs));
emit (Pstrw(X16, ADimm(RR1 r, coqint_of_camlint64 24L)));
(* va->__vr_offs = vr_offs *)
expand_loadimm32 X16 (coqint_of_camlint (Int32.of_int vr_offs));
emit (Pstrw(X16, ADimm(RR1 r, coqint_of_camlint64 28L)))
(* Handling of annotations *)
let expand_annot_val kind txt targ args res =
emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none));
match args, res with
| [BA(IR src)], BR(IR dst) ->
if dst <> src then emit (Pmov (RR1 dst, RR1 src))
| [BA(FR src)], BR(FR dst) ->
if dst <> src then emit (Pfmov (dst, src))
| _, _ ->
raise (Error "ill-formed __builtin_annot_val")
(* Handling of memcpy *)
(* We assume unaligned memory accesses are efficient. Hence we use
memory accesses as wide as we can, up to 16 bytes.
Temporary registers used: x15 x16 x17 x29 x30. *)
let offset_in_range ofs =
let ofs = Z.to_int64 ofs in 0L <= ofs && ofs < 0x1000L
let memcpy_small_arg sz arg tmp =
match arg with
| BA (IR r) ->
(RR1 r, _0)
| BA_addrstack ofs ->
if offset_in_range ofs
&& offset_in_range (Ptrofs.add ofs (Ptrofs.repr (Z.of_uint sz)))
then (XSP, ofs)
else begin expand_addimm64 (RR1 tmp) XSP ofs; (RR1 tmp, _0) end
| _ ->
assert false
let expand_builtin_memcpy_small sz al src dst =
let (tsrc, tdst) =
if dst <> BA (IR X17) then (X17, X29) else (X29, X17) in
let (rsrc, osrc) = memcpy_small_arg sz src tsrc in
let (rdst, odst) = memcpy_small_arg sz dst tdst in
let rec copy osrc odst sz =
if sz >= 16 then begin
emit (Pldp(X16, X30, ADimm(rsrc, osrc)));
emit (Pstp(X16, X30, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16)
end
else if sz >= 8 then begin
emit (Pldrx(X16, ADimm(rsrc, osrc)));
emit (Pstrx(X16, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _8) (Ptrofs.add odst _8) (sz - 8)
end
else if sz >= 4 then begin
emit (Pldrw(X16, ADimm(rsrc, osrc)));
emit (Pstrw(X16, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _4) (Ptrofs.add odst _4) (sz - 4)
end
else if sz >= 2 then begin
emit (Pldrh(W, X16, ADimm(rsrc, osrc)));
emit (Pstrh(X16, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _2) (Ptrofs.add odst _2) (sz - 2)
end
else if sz >= 1 then begin
emit (Pldrb(W, X16, ADimm(rsrc, osrc)));
emit (Pstrb(X16, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _1) (Ptrofs.add odst _1) (sz - 1)
end
in copy osrc odst sz
let memcpy_big_arg arg tmp =
match arg with
| BA (IR r) -> emit (Pmov(RR1 tmp, RR1 r))
| BA_addrstack ofs -> expand_addimm64 (RR1 tmp) XSP ofs
| _ -> assert false
let expand_builtin_memcpy_big sz al src dst =
assert (sz >= 16);
memcpy_big_arg src X30;
memcpy_big_arg dst X29;
let lbl = new_label () in
expand_loadimm32 X15 (Z.of_uint (sz / 16));
emit (Plabel lbl);
emit (Pldp(X16, X17, ADpostincr(RR1 X30, _16)));
emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16)));
emit (Psubimm(W, RR1 X15, RR1 X15, _1));
emit (Pcbnz(W, X15, lbl));
if sz mod 16 >= 8 then begin
emit (Pldrx(X16, ADpostincr(RR1 X30, _8)));
emit (Pstrx(X16, ADpostincr(RR1 X29, _8)))
end;
if sz mod 8 >= 4 then begin
emit (Pldrw(X16, ADpostincr(RR1 X30, _4)));
emit (Pstrw(X16, ADpostincr(RR1 X29, _4)))
end;
if sz mod 4 >= 2 then begin
emit (Pldrh(W, X16, ADpostincr(RR1 X30, _2)));
emit (Pstrh(X16, ADpostincr(RR1 X29, _2)))
end;
if sz mod 2 >= 1 then begin
emit (Pldrb(W, X16, ADpostincr(RR1 X30, _1)));
emit (Pstrb(X16, ADpostincr(RR1 X29, _1)))
end
let expand_builtin_memcpy sz al args =
let (dst, src) =
match args with [d; s] -> (d, s) | _ -> assert false in
if sz < 64
then expand_builtin_memcpy_small sz al src dst
else expand_builtin_memcpy_big sz al src dst
(* Handling of volatile reads and writes *)
let expand_builtin_vload_common chunk base ofs res =
let addr = ADimm(base, ofs) in
match chunk, res with
| Mint8unsigned, BR(IR res) ->
emit (Pldrb(W, res, addr))
| Mint8signed, BR(IR res) ->
emit (Pldrsb(W, res, addr))
| Mint16unsigned, BR(IR res) ->
emit (Pldrh(W, res, addr))
| Mint16signed, BR(IR res) ->
emit (Pldrsh(W, res, addr))
| Mint32, BR(IR res) ->
emit (Pldrw(res, addr))
| Mint64, BR(IR res) ->
emit (Pldrx(res, addr))
| Mfloat32, BR(FR res) ->
emit (Pldrs(res, addr))
| Mfloat64, BR(FR res) ->
emit (Pldrd(res, addr))
| _ ->
assert false
let expand_builtin_vload chunk args res =
match args with
| [BA(IR addr)] ->
expand_builtin_vload_common chunk (RR1 addr) _0 res
| [BA_addrstack ofs] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vload_common chunk XSP ofs res
else begin
expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
expand_builtin_vload_common chunk (RR1 X16) _0 res
end
| [BA_addptr(BA(IR addr), BA_long ofs)] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vload_common chunk (RR1 addr) ofs res
else begin
expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
expand_builtin_vload_common chunk (RR1 X16) _0 res
end
| _ ->
assert false
let expand_builtin_vstore_common chunk base ofs src =
let addr = ADimm(base, ofs) in
match chunk, src with
| (Mint8signed | Mint8unsigned), BA(IR src) ->
emit (Pstrb(src, addr))
| (Mint16signed | Mint16unsigned), BA(IR src) ->
emit (Pstrh(src, addr))
| Mint32, BA(IR src) ->
emit (Pstrw(src, addr))
| Mint64, BA(IR src) ->
emit (Pstrx(src, addr))
| Mfloat32, BA(FR src) ->
emit (Pstrs(src, addr))
| Mfloat64, BA(FR src) ->
emit (Pstrd(src, addr))
| _ ->
assert false
let expand_builtin_vstore chunk args =
match args with
| [BA(IR addr); src] ->
expand_builtin_vstore_common chunk (RR1 addr) _0 src
| [BA_addrstack ofs; src] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vstore_common chunk XSP ofs src
else begin
expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
expand_builtin_vstore_common chunk (RR1 X16) _0 src
end
| [BA_addptr(BA(IR addr), BA_long ofs); src] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vstore_common chunk (RR1 addr) ofs src
else begin
expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
expand_builtin_vstore_common chunk (RR1 X16) _0 src
end
| _ ->
assert false
(* Handling of compiler-inlined builtins *)
let expand_builtin_inline name args res =
match name, args, res with
(* Synchronization *)
| "__builtin_membar", [], _ ->
()
| "__builtin_nop", [], _ ->
emit Pnop
(* Byte swap *)
| ("__builtin_bswap" | "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
emit (Prev(W, res, a1))
| "__builtin_bswap64", [BA(IR a1)], BR(IR res) ->
emit (Prev(X, res, a1))
| "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
emit (Prev16(W, res, a1));
emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
(* Count leading zeros and leading sign bits *)
| "__builtin_clz", [BA(IR a1)], BR(IR res) ->
emit (Pclz(W, res, a1))
| ("__builtin_clzl" | "__builtin_clzll"), [BA(IR a1)], BR(IR res) ->
emit (Pclz(X, res, a1))
| "__builtin_cls", [BA(IR a1)], BR(IR res) ->
emit (Pcls(W, res, a1))
| ("__builtin_clsl" | "__builtin_clsll"), [BA(IR a1)], BR(IR res) ->
emit (Pcls(X, res, a1))
(* Float arithmetic *)
| "__builtin_fabs", [BA(FR a1)], BR(FR res) ->
emit (Pfabs(D, res, a1))
| "__builtin_fsqrt", [BA(FR a1)], BR(FR res) ->
emit (Pfsqrt(D, res, a1))
| "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfmadd(D, res, a1, a2, a3))
| "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfmsub(D, res, a1, a2, a3))
| "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfnmadd(D, res, a1, a2, a3))
| "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfnmsub(D, res, a1, a2, a3))
(* Vararg *)
| "__builtin_va_start", [BA(IR a)], _ ->
expand_builtin_va_start a
(* Catch-all *)
| _ ->
raise (Error ("unrecognized builtin " ^ name))
(* Expansion of instructions *)
let expand_instruction instr =
match instr with
| Pallocframe (sz, ofs) ->
emit (Pmov (RR1 X29, XSP));
if is_current_function_variadic() then begin
let (ir, fr, _) =
next_arg_locations 0 0 0 (get_current_function_args ()) in
save_parameter_registers ir fr;
current_function_stacksize :=
Int64.(add (Z.to_int64 sz) (of_int size_save_register_area))
end else begin
current_function_stacksize := Z.to_int64 sz
end;
expand_addimm64 XSP XSP (Ptrofs.repr (Z.neg sz));
expand_storeptr X29 XSP ofs
| Pfreeframe (sz, ofs) ->
expand_addimm64 XSP XSP (coqint_of_camlint64 !current_function_stacksize)
| Pcvtx2w rd ->
(* no code generated, the upper 32 bits of rd will be ignored *)
()
| Pbuiltin (ef,args,res) ->
begin match ef with
| EF_builtin (name,sg) ->
expand_builtin_inline (camlstring_of_coqstring name) args res
| EF_vload chunk ->
expand_builtin_vload chunk args res
| EF_vstore chunk ->
expand_builtin_vstore chunk args
| EF_annot_val (kind,txt,targ) ->
expand_annot_val kind txt targ args res
| EF_memcpy(sz, al) ->
expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
| EF_annot _ | EF_debug _ | EF_inline_asm _ ->
emit instr
| _ ->
assert false
end
| _ ->
emit instr
let int_reg_to_dwarf r = 0 (* TODO *)
let float_reg_to_dwarf r = 0 (* TODO *)
let preg_to_dwarf = function
| IR r -> int_reg_to_dwarf r
| FR r -> float_reg_to_dwarf r
| _ -> assert false
let expand_function id fn =
try
set_current_function fn;
expand id (* sp= *) 2 preg_to_dwarf expand_instruction fn.fn_code;
Errors.OK (get_current_function ())
with Error s ->
Errors.Error (Errors.msg (coqstring_of_camlstring s))
let expand_fundef id = function
| Internal f ->
begin match expand_function id f with
| Errors.OK tf -> Errors.OK (Internal tf)
| Errors.Error msg -> Errors.Error msg
end
| External ef ->
Errors.OK (External ef)
let expand_program (p: Asm.program) : Asm.program Errors.res =
AST.transform_partial_program2 expand_fundef (fun id v -> Errors.OK v) p
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
(* *********************************************************************)
(* *)
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, Collège de France and Inria Paris *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
(* under the terms of the GNU General Public License as published by *)
(* the Free Software Foundation, either version 2 of the License, or *)
(* (at your option) any later version. This file is also distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(** Platform-specific built-in functions *)
Require Import String Coqlib.
Require Import AST Integers Floats Values.
Require Import Builtins0.
Inductive platform_builtin : Type := .
Local Open Scope string_scope.
Definition platform_builtin_table : list (string * platform_builtin) :=
nil.