Commit 66b0512c authored by Bernhard Schommer's avatar Bernhard Schommer
Browse files

Moved the rest of the ia32 builtins to asmexpand.

parent 0dda7b5a
......@@ -324,7 +324,9 @@ let expand_instruction instr =
| EF_memcpy(sz, al) ->
expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz))
(Int32.to_int (camlint_of_coqint al)) args
| _ -> ()
| EF_inline_asm(txt, sg, clob) ->
emit instr
| _ -> assert false
end
| _ ->
emit instr
......
......@@ -216,9 +216,12 @@ Inductive instruction: Type :=
| Padcl_ir (n: int) (r: ireg)
| Padcl_rr (r1: ireg) (r2: ireg)
| Paddl (r1: ireg) (r2: ireg)
| Paddl_mi (a: addrmode) (n: int)
| Paddl_ri (r1: ireg) (n: int)
| Pbsfl (r1: ireg) (r2: ireg)
| Pbslr (r1: ireg) (r2: ireg)
| Pbswap (r: ireg)
| Pcfi_adjust (n: int)
| Pfmadd132 (r1: freg) (r2: freg) (r3: freg)
| Pfmadd213 (r1: freg) (r2: freg) (r3: freg)
| Pfmadd231 (r1: freg) (r2: freg) (r3: freg)
......@@ -233,9 +236,18 @@ Inductive instruction: Type :=
| Pfnmsub231 (r1: freg) (r2: freg) (r3: freg)
| Pmaxsd (r1: freg) (r2: freg)
| Pminsd (r1: freg) (r2: freg)
| Pmovb_rm (rs: ireg) (a: addrmode)
| Pmovq_rm (rs: freg) (a: addrmode)
| Pmovq_mr (a: addrmode) (rs: freg)
| Pmovsb
| Pmovsw
| Pmovw_rm (rs: ireg) (ad: addrmode)
| Prep_movsl
| Prolw_8 (r: ireg)
| Psbbl (r1: ireg) (r2: ireg)
| Psqrtsd (r1: freg) (r2: freg).
| Psqrtsd (r1: freg) (r2: freg)
| Psubl_ri (r1: ireg) (n: int)
| Pxchg (r1: ireg) (r2: ireg).
Definition code := list instruction.
Record function : Type := mkfunction { fn_sig: signature; fn_code: code }.
......@@ -777,9 +789,12 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Padcl_ir _ _
| Padcl_rr _ _
| Paddl _ _
| Paddl_mi _ _
| Paddl_ri _ _
| Pbswap _
| Pbsfl _ _
| Pbslr _ _
| Pcfi_adjust _
| Pfmadd132 _ _ _
| Pfmadd213 _ _ _
| Pfmadd231 _ _ _
......@@ -794,9 +809,18 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Pfnmsub231 _ _ _
| Pmaxsd _ _
| Pminsd _ _
| Pmovb_rm _ _
| Pmovq_rm _ _
| Pmovq_mr _ _
| Pmovsb
| Pmovsw
| Pmovw_rm _ _
| Prep_movsl
| Prolw_8 _
| Psbbl _ _
| Psqrtsd _ _ => Stuck
| Psqrtsd _ _
| Psubl_ri _ _
| Pxchg _ _ => Stuck
end.
(** Translation of the LTL/Linear/Mach view of machine registers
......
......@@ -16,16 +16,198 @@
open Asm
open Asmexpandaux
open Asmgen
open AST
open Camlcoq
open Datatypes
open Integers
(* Useful constants and helper functions *)
let _0 = Integers.Int.zero
let _0 = Int.zero
let _1 = Int.one
let _2 = coqint_of_camlint 2l
let _4 = coqint_of_camlint 4l
let _8 = coqint_of_camlint 8l
let stack_alignment () =
if Configuration.system = "macoxs" then 16
else 8
(* SP adjustment to allocate or free a stack frame *)
let int32_align n a =
if n >= 0l
then Int32.logand (Int32.add n (Int32.of_int (a-1))) (Int32.of_int (-a))
else Int32.logand n (Int32.of_int (-a))
let sp_adjustment sz =
let sz = camlint_of_coqint sz in
(* Preserve proper alignment of the stack *)
let sz = int32_align sz (stack_alignment ()) in
(* The top 4 bytes have already been allocated by the "call" instruction. *)
let sz = Int32.sub sz 4l in
sz
(* Built-ins. They come in two flavors:
- annotation statements: take their arguments in registers or stack
locations; generate no code;
- inlined by the compiler: take their arguments in arbitrary
registers; preserve all registers except ECX, EDX, XMM6 and XMM7. *)
(* Handling of annotations *)
let expand_annot_val txt targ args res =
emit (Pannot (EF_annot(txt,[targ]), List.map (fun r -> AA_base r) args));
match args, res with
| [IR src], [IR dst] ->
if dst <> src then emit (Pmov_rr (dst,src))
| [FR src], [FR dst] ->
if dst <> src then emit (Pmovsd_ff (dst,src))
| _, _ -> assert false
(* Handling of memcpy *)
(* Unaligned memory accesses are quite fast on IA32, so use large
memory accesses regardless of alignment. *)
let expand_builtin_memcpy_small sz al src dst =
assert (src = EDX && dst = EAX);
let rec copy ofs sz =
if sz >= 8 && !Clflags.option_ffpu then begin
emit (Pmovq_rm (XMM7,Addrmode (Some src, None, Coq_inl ofs)));
emit (Pmovq_mr (Addrmode (Some src, None, Coq_inl ofs),XMM7));
copy (Int.add ofs _8) (sz - 8)
end else if sz >= 4 then begin
emit (Pmov_rm (ECX,Addrmode (Some src, None, Coq_inl ofs)));
emit (Pmov_mr (Addrmode (Some src, None, Coq_inl ofs),ECX));
copy (Int.add ofs _4) (sz - 4)
end else if sz >= 2 then begin
emit (Pmovw_rm (ECX,Addrmode (Some src, None, Coq_inl ofs)));
emit (Pmovw_mr (Addrmode (Some src, None, Coq_inl ofs),ECX));
copy (Int.add ofs _2) (sz - 2)
end else if sz >= 1 then begin
emit (Pmovb_rm (ECX,Addrmode (Some src, None, Coq_inl ofs)));
emit (Pmovb_mr (Addrmode (Some src, None, Coq_inl ofs),ECX));
copy (Int.add ofs _1) (sz - 1)
end in
copy _0 sz
let expand_builtin_memcpy_big sz al src dst =
assert (src = ESI && dst = EDI);
emit (Pmov_ri (ECX,coqint_of_camlint (Int32.of_int (sz / 4))));
emit Prep_movsl;
if sz mod 4 >= 2 then emit Pmovsw;
if sz mod 2 >= 1 then emit Pmovsb
let expand_builtin_memcpy sz al args =
let (dst, src) =
match args with [IR d; IR s] -> (d, s) | _ -> assert false in
if sz <= 32
then expand_builtin_memcpy_small sz al src dst
else expand_builtin_memcpy_big sz al src dst
(* Handling of volatile reads and writes *)
let offset_addressing (Addrmode(base, ofs, cst)) delta =
Addrmode(base, ofs,
match cst with
| Coq_inl n -> Coq_inl(Integers.Int.add n delta)
| Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta))
let expand_builtin_vload_common chunk addr res =
match chunk, res with
| Mint8unsigned, [IR res] ->
emit (Pmovzb_rm (res,addr))
| Mint8signed, [IR res] ->
emit (Pmovsb_rm (res,addr))
| Mint16unsigned, [IR res] ->
emit (Pmovzw_rm (res,addr))
| Mint16signed, [IR res] ->
emit (Pmovsw_rm (res,addr))
| Mint32, [IR res] ->
emit (Pmov_rm (res,addr))
| Mint64, [IR res1; IR res2] ->
let addr' = offset_addressing addr (coqint_of_camlint 4l) in
if not (Asmgen.addressing_mentions addr res2) then begin
emit (Pmov_rm (res2,addr));
emit (Pmov_rm (res1,addr'))
end else begin
emit (Pmov_rm (res1,addr'));
emit (Pmov_rm (res2,addr))
end
| Mfloat32, [FR res] ->
emit (Pmovss_fm (res,addr))
| Mfloat64, [FR res] ->
emit (Pmovsd_fm (res,addr))
| _ ->
assert false
let expand_builtin_vload chunk args res =
match args with
| [IR addr] ->
expand_builtin_vload_common chunk (Addrmode (Some addr,None, Coq_inl _0)) res
| _ ->
assert false
let expand_builtin_vload_global chunk id ofs args res =
expand_builtin_vload_common chunk (Addrmode(None, None, Coq_inr(id,ofs))) res
let expand_builtin_vstore_common chunk addr src tmp =
match chunk, src with
| (Mint8signed | Mint8unsigned), [IR src] ->
if Asmgen.low_ireg src then
emit (Pmovb_mr (addr,src))
else begin
emit (Pmov_rr (tmp,src));
emit (Pmovb_mr (addr,tmp))
end
| (Mint16signed | Mint16unsigned), [IR src] ->
emit (Pmovw_mr (addr,src))
| Mint32, [IR src] ->
emit (Pmov_mr (addr,src))
| Mint64, [IR src1; IR src2] ->
let addr' = offset_addressing addr (coqint_of_camlint 4l) in
emit (Pmov_mr (addr,src2));
emit (Pmov_mr (addr',src1))
| Mfloat32, [FR src] ->
emit (Pmovss_mf (addr,src))
| Mfloat64, [FR src] ->
emit (Pmovsd_mf (addr,src))
| _ ->
assert false
let expand_builtin_vstore chunk args =
match args with
| IR addr :: src ->
expand_builtin_vstore_common chunk
(Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src
(if addr = EAX then ECX else EAX)
| _ -> assert false
let expand_builtin_vstore_global chunk id ofs args =
expand_builtin_vstore_common chunk
(Addrmode(None, None, Coq_inr(id,ofs))) args EAX
(* Handling of varargs *)
let expand_builtin_va_start r =
if not !current_function.fn_sig.sig_cc.cc_vararg then
invalid_arg "Fatal error: va_start used in non-vararg function";
let ofs = coqint_of_camlint
Int32.(add (add !PrintAsmaux.current_function_stacksize 4l)
(mul 4l (Z.to_int32 (Conventions1.size_arguments
!current_function.fn_sig)))) in
emit (Pmov_mr (Addrmode (Some r, None, Coq_inl _0),ESP));
emit (Paddl_mi (Addrmode (Some r,None,Coq_inl _0),ofs))
(* Handling of compiler-inlined builtins *)
let emit_builtin_inline oc name args res =
let expand_builtin_inline name args res =
match name, args, res with
(* Integer arithmetic *)
| ("__builtin_bswap"| "__builtin_bswap32"), [IR a1], [IR res] ->
......@@ -128,36 +310,70 @@ let emit_builtin_inline oc name args res =
assert (a = EAX && b = EDX && rh = EDX && rl = EAX);
emit (Pmul_r EDX)
(* Memory accesses *)
(* | "__builtin_read16_reversed", [IR a1], [IR res] ->
fprintf oc " movzwl 0(%a), %a\n" ireg a1 ireg res;
fprintf oc " rolw $8, %a\n" ireg16 res
| "__builtin_read16_reversed", [IR a1], [IR res] ->
let addr = Addrmode(Some a1,None,Coq_inl _0) in
emit (Pmovzw_rm (res,addr));
emit (Prolw_8 res)
| "__builtin_read32_reversed", [IR a1], [IR res] ->
fprintf oc " movl 0(%a), %a\n" ireg a1 ireg res;
fprintf oc " bswap %a\n" ireg res
let addr = Addrmode(Some a1,None,Coq_inl _0) in
emit (Pmov_rm (res,addr));
emit (Pbswap res)
| "__builtin_write16_reversed", [IR a1; IR a2], _ ->
let tmp = if a1 = ECX then EDX else ECX in
let addr = Addrmode(Some a1,None,Coq_inl _0) in
if a2 <> tmp then
emit (Pmov_rr (a2,tmp));
fprintf oc " xchg %a, %a\n" ireg8 tmp high_ireg8 tmp;
fprintf oc " movw %a, 0(%a)\n" ireg16 tmp ireg a1
| "__builtin_write32_reversed", [IR a1; IR a2], _ ->
let tmp = if a1 = ECX then EDX else ECX in
if a2 <> tmp then
emit (Pmov_rr (a2,tmp));
emit (Pbswap res);
fprintf oc " movl %a, 0(%a)\n" ireg tmp ireg a1
emit (Pxchg (tmp,tmp));
emit (Pmovw_mr (addr,tmp))
(* Vararg stuff *)
| "__builtin_va_start", [IR a], _ ->
expand_builtin_va_start a
(* Synchronization *)
| "__builtin_membar", [], _ ->
()
(* Vararg stuff *)
| "__builtin_va_start", [IR a], _ ->
print_builtin_va_start oc a
(* Catch-all *) *)
(* Catch-all *)
| _ ->
invalid_arg ("unrecognized builtin " ^ name)
let expand_instruction instr =
match instr with
| Pallocframe (sz, ofs_ra, ofs_link) ->
let sz = sp_adjustment sz in
let addr = Addrmode(Some ESP,None,Coq_inl (coqint_of_camlint (Int32.add sz 4l))) in
let addr' = Addrmode (Some ESP, None, Coq_inl ofs_link) in
let sz' = coqint_of_camlint sz in
emit (Psubl_ri (ESP,sz'));
emit (Pcfi_adjust sz');
emit (Plea (EDX,addr));
emit (Pmov_mr (addr',EDX));
PrintAsmaux.current_function_stacksize := sz
| Pfreeframe(sz, ofs_ra, ofs_link) ->
let sz = sp_adjustment sz in
emit (Paddl_ri (ESP,coqint_of_camlint sz))
| Pbuiltin (ef,args, res) ->
begin
match ef with
| EF_builtin(name, sg) ->
expand_builtin_inline (extern_atom name) args res
| EF_vload chunk ->
expand_builtin_vload chunk args res
| EF_vstore chunk ->
expand_builtin_vstore chunk args
| EF_vload_global(chunk, id, ofs) ->
expand_builtin_vload_global chunk id ofs args res
| EF_vstore_global(chunk, id, ofs) ->
expand_builtin_vstore_global chunk id ofs args
| EF_memcpy(sz, al) ->
expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz))
(Int32.to_int (camlint_of_coqint al)) args
| EF_annot_val(txt, targ) ->
expand_annot_val txt targ args res
| EF_inline_asm(txt, sg, clob) ->
emit instr
| _ -> assert false
end
| _ -> emit instr
let expand_program p = p
......
......@@ -348,156 +348,7 @@ module Target(System: SYSTEM):TARGET =
print_annot_stmt preg "%esp" oc txt targs args
end
let print_annot_val oc txt args res =
fprintf oc "%s annotation: " comment;
print_annot_val preg oc txt args;
match args, res with
| [IR src], [IR dst] ->
if dst <> src then fprintf oc " movl %a, %a\n" ireg src ireg dst
| [FR src], [FR dst] ->
if dst <> src then fprintf oc " movapd %a, %a\n" freg src freg dst
| _, _ -> assert false
(* Handling of memcpy *)
(* Unaligned memory accesses are quite fast on IA32, so use large
memory accesses regardless of alignment. *)
let print_builtin_memcpy_small oc sz al src dst =
assert (src = EDX && dst = EAX);
let rec copy ofs sz =
if sz >= 8 && !Clflags.option_ffpu then begin
fprintf oc " movq %d(%a), %a\n" ofs ireg src freg XMM7;
fprintf oc " movq %a, %d(%a)\n" freg XMM7 ofs ireg dst;
copy (ofs + 8) (sz - 8)
end else if sz >= 4 then begin
fprintf oc " movl %d(%a), %a\n" ofs ireg src ireg ECX;
fprintf oc " movl %a, %d(%a)\n" ireg ECX ofs ireg dst;
copy (ofs + 4) (sz - 4)
end else if sz >= 2 then begin
fprintf oc " movw %d(%a), %a\n" ofs ireg src ireg16 ECX;
fprintf oc " movw %a, %d(%a)\n" ireg16 ECX ofs ireg dst;
copy (ofs + 2) (sz - 2)
end else if sz >= 1 then begin
fprintf oc " movb %d(%a), %a\n" ofs ireg src ireg8 ECX;
fprintf oc " movb %a, %d(%a)\n" ireg8 ECX ofs ireg dst;
copy (ofs + 1) (sz - 1)
end in
copy 0 sz
let print_builtin_memcpy_big oc sz al src dst =
assert (src = ESI && dst = EDI);
fprintf oc " movl $%d, %a\n" (sz / 4) ireg ECX;
fprintf oc " rep movsl\n";
if sz mod 4 >= 2 then fprintf oc " movsw\n";
if sz mod 2 >= 1 then fprintf oc " movsb\n"
let print_builtin_memcpy oc sz al args =
let (dst, src) =
match args with [IR d; IR s] -> (d, s) | _ -> assert false in
fprintf oc "%s begin builtin __builtin_memcpy_aligned, size = %d, alignment = %d\n"
comment sz al;
if sz <= 32
then print_builtin_memcpy_small oc sz al src dst
else print_builtin_memcpy_big oc sz al src dst;
fprintf oc "%s end builtin __builtin_memcpy_aligned\n" comment
(* Handling of volatile reads and writes *)
let offset_addressing (Addrmode(base, ofs, cst)) delta =
Addrmode(base, ofs,
match cst with
| Coq_inl n -> Coq_inl(Integers.Int.add n delta)
| Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta))
let print_builtin_vload_common oc chunk addr res =
match chunk, res with
| Mint8unsigned, [IR res] ->
fprintf oc " movzbl %a, %a\n" addressing addr ireg res
| Mint8signed, [IR res] ->
fprintf oc " movsbl %a, %a\n" addressing addr ireg res
| Mint16unsigned, [IR res] ->
fprintf oc " movzwl %a, %a\n" addressing addr ireg res
| Mint16signed, [IR res] ->
fprintf oc " movswl %a, %a\n" addressing addr ireg res
| Mint32, [IR res] ->
fprintf oc " movl %a, %a\n" addressing addr ireg res
| Mint64, [IR res1; IR res2] ->
let addr' = offset_addressing addr (coqint_of_camlint 4l) in
if not (Asmgen.addressing_mentions addr res2) then begin
fprintf oc " movl %a, %a\n" addressing addr ireg res2;
fprintf oc " movl %a, %a\n" addressing addr' ireg res1
end else begin
fprintf oc " movl %a, %a\n" addressing addr' ireg res1;
fprintf oc " movl %a, %a\n" addressing addr ireg res2
end
| Mfloat32, [FR res] ->
fprintf oc " movss %a, %a\n" addressing addr freg res
| Mfloat64, [FR res] ->
fprintf oc " movsd %a, %a\n" addressing addr freg res
| _ ->
assert false
let print_builtin_vload oc chunk args res =
fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
begin match args with
| [IR addr] ->
print_builtin_vload_common oc chunk
(Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) res
| _ ->
assert false
end;
fprintf oc "%s end builtin __builtin_volatile_read\n" comment
let print_builtin_vload_global oc chunk id ofs args res =
fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
print_builtin_vload_common oc chunk
(Addrmode(None, None, Coq_inr(id,ofs))) res;
fprintf oc "%s end builtin __builtin_volatile_read\n" comment
let print_builtin_vstore_common oc chunk addr src tmp =
match chunk, src with
| (Mint8signed | Mint8unsigned), [IR src] ->
if Asmgen.low_ireg src then
fprintf oc " movb %a, %a\n" ireg8 src addressing addr
else begin
fprintf oc " movl %a, %a\n" ireg src ireg tmp;
fprintf oc " movb %a, %a\n" ireg8 tmp addressing addr
end
| (Mint16signed | Mint16unsigned), [IR src] ->
fprintf oc " movw %a, %a\n" ireg16 src addressing addr
| Mint32, [IR src] ->
fprintf oc " movl %a, %a\n" ireg src addressing addr
| Mint64, [IR src1; IR src2] ->
let addr' = offset_addressing addr (coqint_of_camlint 4l) in
fprintf oc " movl %a, %a\n" ireg src2 addressing addr;
fprintf oc " movl %a, %a\n" ireg src1 addressing addr'
| Mfloat32, [FR src] ->
fprintf oc " movss %a, %a\n" freg src addressing addr
| Mfloat64, [FR src] ->
fprintf oc " movsd %a, %a\n" freg src addressing addr
| _ ->
assert false
let print_builtin_vstore oc chunk args =
fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
begin match args with
| IR addr :: src ->
print_builtin_vstore_common oc chunk
(Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src
(if addr = EAX then ECX else EAX)
| _ ->
assert false
end;
fprintf oc "%s end builtin __builtin_volatile_write\n" comment
let print_builtin_vstore_global oc chunk id ofs args =
fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
print_builtin_vstore_common oc chunk
(Addrmode(None, None, Coq_inr(id,ofs))) args EAX;
fprintf oc "%s end builtin __builtin_volatile_write\n" comment
(* Handling of varargs *)
(* Handling of varargs *)
let print_builtin_va_start oc r =
if not (!current_function_sig).sig_cc.cc_vararg then
......@@ -509,117 +360,6 @@ module Target(System: SYSTEM):TARGET =
fprintf oc " movl %%esp, 0(%a)\n" ireg r;
fprintf oc " addl $%ld, 0(%a)\n" ofs ireg r
(* Handling of compiler-inlined builtins *)
let print_builtin_inline oc name args res =
fprintf oc "%s begin builtin %s\n" comment name;
begin match name, args, res with
(* Integer arithmetic *)
| ("__builtin_bswap"| "__builtin_bswap32"), [IR a1], [IR res] ->
if a1 <> res then
fprintf oc " movl %a, %a\n" ireg a1 ireg res;
fprintf oc " bswap %a\n" ireg res
| "__builtin_bswap16", [IR a1], [IR res] ->
if a1 <> res then
fprintf oc " movl %a, %a\n" ireg a1 ireg res;
fprintf oc " rolw $8, %a\n" ireg16 res
| "__builtin_clz", [IR a1], [IR res] ->
fprintf oc " bsrl %a, %a\n" ireg a1 ireg res;
fprintf oc " xorl $31, %a\n" ireg res
| "__builtin_ctz", [IR a1], [IR res] ->
fprintf oc " bsfl %a, %a\n" ireg a1 ireg res
(* Float arithmetic *)
| "__builtin_fabs", [FR a1], [FR res] ->
need_masks := true;
if a1 <> res then
fprintf oc " movapd %a, %a\n" freg a1 freg res;
fprintf oc " andpd %a, %a\n" raw_symbol "__absd_mask" freg res
| "__builtin_fsqrt", [FR a1], [FR res] ->
fprintf oc " sqrtsd %a, %a\n" freg a1 freg res
| "__builtin_fmax", [FR a1; FR a2], [FR res] ->
if res = a1 then
fprintf oc " maxsd %a, %a\n" freg a2 freg res
else if res = a2 then
fprintf oc " maxsd %a, %a\n" freg a1 freg res
else begin
fprintf oc " movapd %a, %a\n" freg a1 freg res;
fprintf oc " maxsd %a, %a\n" freg a2 freg res
end
| "__builtin_fmin", [FR a1; FR a2], [FR res] ->
if res = a1 then
fprintf oc " minsd %a, %a\n" freg a2 freg res
else if res = a2 then
fprintf oc " minsd %a, %a\n" freg a1 freg res
else begin
fprintf oc " movapd %a, %a\n" freg a1 freg res;
fprintf oc " minsd %a, %a\n" freg a2 freg res
end
| ("__builtin_fmadd"|"__builtin_fmsub"|"__builtin_fnmadd"|"__builtin_fnmsub"),
[FR a1; FR a2; FR a3], [FR res] ->
let opcode =
match name with
| "__builtin_fmadd" -> "vfmadd"
| "__builtin_fmsub" -> "vfmsub"
| "__builtin_fnmadd" -> "vfnmadd"
| "__builtin_fnmsub" -> "vfnmsub"
| _ -> assert false in
if res = a1 then
fprintf oc " %s132sd %a, %a, %a\n" opcode freg a2 freg a3 freg res
else if res = a2 then
fprintf oc " %s213sd %a, %a, %a\n" opcode freg a3 freg a1 freg res
else if res = a3 then
fprintf oc " %s231sd %a, %a, %a\n" opcode freg a1 freg a2 freg res
else begin
fprintf oc " movapd %a, %a\n" freg a3 freg res;
fprintf oc " %s231sd %a, %a, %a\n" opcode freg a1 freg a2 freg res
end
(* 64-bit integer arithmetic *)
| "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
assert (ah = EDX && al = EAX && rh = EDX && rl = EAX);
fprintf oc " negl %a\n" ireg EAX;
fprintf oc " adcl $0, %a\n" ireg EDX;
fprintf oc " negl %a\n" ireg EDX