From a745efa7f07a10cec625b9c302eb0196b7bfaefb Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Thu, 26 Feb 2009 14:48:59 +0000
Subject: [PATCH] Reserve register GPR13 for compatibility with EABI.  Optimize
 operations 'x >= 0' and 'x < 0'.

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@999 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 common/Values.v              | 17 +++++++++++++
 lib/Integers.v               | 20 ++++++++++++++++
 powerpc/Asm.v                |  2 +-
 powerpc/Asmgen.v             | 46 +++++++++++++++++++++++++++++++-----
 powerpc/Asmgenproof.v        | 29 +++++++++++++++++++----
 powerpc/Asmgenproof1.v       | 34 ++++++++++++++++++++++----
 powerpc/Asmgenretaddr.v      |  5 +++-
 powerpc/Machregs.v           | 18 +++++++-------
 powerpc/eabi/Conventions.v   | 12 +++++-----
 powerpc/macosx/Conventions.v | 12 +++++-----
 10 files changed, 156 insertions(+), 39 deletions(-)

diff --git a/common/Values.v b/common/Values.v
index 27d4f50c0..8182d7a5c 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -918,6 +918,23 @@ Proof.
   elim H0; intro; subst v; reflexivity.
 Qed.
 
+Lemma rolm_lt_zero:
+  forall v, rolm v Int.one Int.one = cmp Clt v (Vint Int.zero).
+Proof.
+  intros. destruct v; simpl; auto.
+  transitivity (Vint (Int.shru i (Int.repr (Z_of_nat wordsize - 1)))).
+  decEq. symmetry. rewrite Int.shru_rolm. auto. auto. 
+  rewrite Int.shru_lt_zero. destruct (Int.lt i Int.zero); auto. 
+Qed.
+
+Lemma rolm_ge_zero:
+  forall v,
+  xor (rolm v Int.one Int.one) (Vint Int.one) = cmp Cge v (Vint Int.zero).
+Proof.
+  intros. rewrite rolm_lt_zero. destruct v; simpl; auto.
+  destruct (Int.lt i Int.zero); auto.
+Qed.
+
 (** The ``is less defined'' relation between values. 
     A value is less defined than itself, and [Vundef] is
     less defined than any value. *)
diff --git a/lib/Integers.v b/lib/Integers.v
index ff29d2a51..ceda85121 100644
--- a/lib/Integers.v
+++ b/lib/Integers.v
@@ -2545,4 +2545,24 @@ Proof.
   contradiction. auto.
 Qed.
 
+Theorem shru_lt_zero:
+  forall x,
+  shru x (repr (Z_of_nat wordsize - 1)) = if lt x zero then one else zero.
+Proof.
+  intros. rewrite shru_div_two_p. 
+  change (two_p (unsigned (repr (Z_of_nat wordsize - 1))))
+    with half_modulus.
+  generalize (unsigned_range x); intro. 
+  unfold lt. change (signed zero) with 0. unfold signed. 
+  destruct (zlt (unsigned x) half_modulus).
+  rewrite zlt_false.
+  replace (unsigned x / half_modulus) with 0. reflexivity. 
+  symmetry. apply Zdiv_unique with (unsigned x). ring. omega. omega.
+  rewrite zlt_true. 
+  replace (unsigned x / half_modulus) with 1. reflexivity.
+  symmetry. apply Zdiv_unique with (unsigned x - half_modulus). ring. 
+  replace modulus with (2 * half_modulus) in H. omega. reflexivity. 
+  omega. 
+Qed.
+
 End Int.
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index 69085aa99..63f0232ae 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -759,7 +759,7 @@ Definition ireg_of (r: mreg) : ireg :=
   match r with
   | R3 => GPR3  | R4 => GPR4  | R5 => GPR5  | R6 => GPR6
   | R7 => GPR7  | R8 => GPR8  | R9 => GPR9  | R10 => GPR10
-  | R13 => GPR13 | R14 => GPR14 | R15 => GPR15 | R16 => GPR16
+  | R14 => GPR14 | R15 => GPR15 | R16 => GPR16
   | R17 => GPR17 | R18 => GPR18 | R19 => GPR19 | R20 => GPR20
   | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24
   | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 1dcc39904..0c3ac75dc 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -223,6 +223,32 @@ Definition crbit_for_cond (cond: condition) :=
   | Cmasknotzero n => (CRbit_2, false)
   end.
 
+(** Recognition of comparisons [>= 0] and [< 0]. *)
+
+Inductive condition_class: condition -> list mreg -> Set :=
+  | condition_ge0:
+      forall n r, n = Int.zero -> condition_class (Ccompimm Cge n) (r :: nil)
+  | condition_lt0:
+      forall n r, n = Int.zero -> condition_class (Ccompimm Clt n) (r :: nil)
+  | condition_default:
+      forall c rl, condition_class c rl.
+
+Definition classify_condition (c: condition) (args: list mreg): condition_class c args :=
+  match c as z1, args as z2 return condition_class z1 z2 with
+  | Ccompimm Cge n, r :: nil =>
+      match Int.eq_dec n Int.zero with
+      | left EQ => condition_ge0 n r EQ
+      | right _ => condition_default (Ccompimm Cge n) (r :: nil)
+      end
+  | Ccompimm Clt n, r :: nil =>
+      match Int.eq_dec n Int.zero with
+      | left EQ => condition_lt0 n r EQ
+      | right _ => condition_default (Ccompimm Clt n) (r :: nil)
+      end
+  | x, y =>
+      condition_default x y
+  end.
+
 (** Translation of the arithmetic operation [r <- op(args)].
   The corresponding instructions are prepended to [k]. *)
 
@@ -331,12 +357,20 @@ Definition transl_op
   | Ofloatofintu, a1 :: nil =>
       Piuctf (freg_of r) (ireg_of a1) :: k
   | Ocmp cmp, _ =>
-      let p := crbit_for_cond cmp in
-      transl_cond cmp args
-        (Pmfcrbit (ireg_of r) (fst p) ::
-         if snd p
-         then k
-         else Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k)
+      match classify_condition cmp args with
+      | condition_ge0 _ a _ =>
+          Prlwinm (ireg_of r) (ireg_of a) Int.one Int.one ::
+          Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k
+      | condition_lt0 _ a _ =>
+          Prlwinm (ireg_of r) (ireg_of a) Int.one Int.one :: k
+      | condition_default _ _ =>
+          let p := crbit_for_cond cmp in
+          transl_cond cmp args
+            (Pmfcrbit (ireg_of r) (fst p) ::
+             if snd p
+             then k
+             else Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k)
+      end
   | _, _ =>
       k (**r never happens for well-typed code *)
   end.
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index a96125a2c..40c593a34 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -432,6 +432,30 @@ Proof.
   apply andimm_label. apply andimm_label.
 Qed.
 Hint Rewrite transl_cond_label: labels.
+
+Remark transl_op_cmp_label:
+  forall c args r k,
+  find_label lbl
+    (match classify_condition c args with
+     | condition_ge0 _ a _ =>
+        Prlwinm (ireg_of r) (ireg_of a) Int.one Int.one
+        :: Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k
+     | condition_lt0 _ a _ =>
+        Prlwinm (ireg_of r) (ireg_of a) Int.one Int.one :: k
+     | condition_default _ _ =>
+        transl_cond c args
+          (Pmfcrbit (ireg_of r) (fst (crbit_for_cond c))
+           :: (if snd (crbit_for_cond c)
+               then k
+               else Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k))
+    end) = find_label lbl k.
+Proof.
+  intros. destruct (classify_condition c args); auto. 
+  autorewrite with labels. 
+  case (snd (crbit_for_cond c)); auto.
+Qed.
+Hint Rewrite transl_op_cmp_label: labels.
+
 Remark transl_op_label:
   forall op args r k, find_label lbl (transl_op op args r k) = find_label lbl k.
 Proof.
@@ -441,11 +465,6 @@ Proof.
   case (mreg_type m); reflexivity.
   case (Int.eq (high_s i) Int.zero); autorewrite with labels; reflexivity.
   case (Int.eq (high_s i) Int.zero); autorewrite with labels; reflexivity.
-  case (snd (crbit_for_cond c)); reflexivity.
-  case (snd (crbit_for_cond c)); reflexivity.
-  case (snd (crbit_for_cond c)); reflexivity.
-  case (snd (crbit_for_cond c)); reflexivity.
-  case (snd (crbit_for_cond c)); reflexivity.
 Qed.
 Hint Rewrite transl_op_label: labels.
 
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index fdb48bec1..5eda7adf3 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -1349,14 +1349,38 @@ Proof.
   repeat (rewrite (ireg_val ms sp rs); auto).
   reflexivity. auto 10 with ppcgen.
   (* Ocmp *)
-  set (bit := fst (crbit_for_cond c)).
-  set (isset := snd (crbit_for_cond c)).
+  generalize H2; case (classify_condition c args); intros.
+  (* Optimization: compimm Cge 0 *)
+  subst n. simpl in H4. simpl. 
+  set (rs1 := nextinstr (rs#(ireg_of res) <- (Val.rolm (ms r) Int.one Int.one))).
+  set (rs2 := nextinstr (rs1#(ireg_of res) <- (Val.xor (rs1#(ireg_of res)) (Vint Int.one)))).
+  exists rs2.
+  split. apply exec_straight_two with rs1 m. 
+  simpl. unfold rs1. rewrite (ireg_val ms sp rs); auto. congruence.
+  auto. auto. auto.
+  rewrite <- Val.rolm_ge_zero. 
+  unfold rs2. apply agree_nextinstr.
+  unfold rs1. apply agree_nextinstr_commut. fold rs1.
+  replace (rs1 (ireg_of res)) with (Val.rolm (ms r) Int.one Int.one).
+  apply agree_set_mireg_twice; auto.
+  unfold rs1. rewrite nextinstr_inv. rewrite Pregmap.gss. auto. 
+  auto with ppcgen. auto with ppcgen. 
+  (* Optimization: compimm Clt 0 *)
+  subst n. simpl in H4. simpl. 
+  exists (nextinstr (rs#(ireg_of res) <- (Val.rolm (ms r) Int.one Int.one))).
+  split. apply exec_straight_one. simpl. rewrite (ireg_val ms sp rs); auto. congruence.
+  auto. 
+  apply agree_nextinstr. apply agree_set_mireg.
+  rewrite Val.rolm_lt_zero. apply agree_set_mreg. auto. congruence. 
+  (* General case *)
+  set (bit := fst (crbit_for_cond c0)).
+  set (isset := snd (crbit_for_cond c0)).
   set (k1 :=
         Pmfcrbit (ireg_of res) bit ::
         (if isset
          then k
          else Pxori (ireg_of res) (ireg_of res) (Cint Int.one) :: k)).
-  generalize (transl_cond_correct_aux c args k1 ms sp rs m H2 H0).
+  generalize (transl_cond_correct_aux c0 rl k1 ms sp rs m H4 H0).
   fold bit; fold isset. 
   intros [rs1 [EX1 [RES1 AG1]]].
   set (rs2 := nextinstr (rs1#(ireg_of res) <- (rs1#(reg_of_crbit bit)))).
@@ -1366,12 +1390,12 @@ Proof.
   unfold k1. apply exec_straight_one. 
   reflexivity. reflexivity. 
   unfold rs2. rewrite RES1. auto with ppcgen. 
-  exists (nextinstr (rs2#(ireg_of res) <- (eval_condition_total c ms##args))).
+  exists (nextinstr (rs2#(ireg_of res) <- (eval_condition_total c0 ms##rl))).
   split. apply exec_straight_trans with k1 rs1 m. assumption.
   unfold k1. apply exec_straight_two with rs2 m.
   reflexivity. simpl. 
   replace (Val.xor (rs2 (ireg_of res)) (Vint Int.one))
-     with (eval_condition_total c ms##args).
+     with (eval_condition_total c0 ms##rl).
   reflexivity.
   unfold rs2. rewrite nextinstr_inv; auto with ppcgen. rewrite Pregmap.gss. 
   rewrite RES1. apply Val.notbool_xor. apply eval_condition_total_is_bool.
diff --git a/powerpc/Asmgenretaddr.v b/powerpc/Asmgenretaddr.v
index 23bd186e1..d414752c3 100644
--- a/powerpc/Asmgenretaddr.v
+++ b/powerpc/Asmgenretaddr.v
@@ -149,7 +149,10 @@ Hint Resolve transl_cond_tail: ppcretaddr.
 
 Lemma transl_op_tail:
   forall op args r k, is_tail k (transl_op op args r k).
-Proof. unfold transl_op; intros; destruct op; IsTail. Qed.
+Proof.
+  unfold transl_op; intros; destruct op; IsTail. 
+  destruct (classify_condition c args); IsTail.
+Qed.
 Hint Resolve transl_op_tail: ppcretaddr.
 
 Lemma transl_load_store_tail:
diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v
index 260a0e859..d0f7cf464 100644
--- a/powerpc/Machregs.v
+++ b/powerpc/Machregs.v
@@ -26,14 +26,14 @@ Require Import AST.
 - Two float registers, not allocatable, reserved as temporaries for
   spilling and reloading ([FT1, FT2]).
 
-  The type [mreg] does not include special-purpose machine registers
-  such as the stack pointer and the condition codes. *)
+  The type [mreg] does not include special-purpose or reserved
+  machine registers such as the stack pointer and the condition codes. *)
 
 Inductive mreg: Set :=
   (** Allocatable integer regs *)
   | R3: mreg  | R4: mreg  | R5: mreg  | R6: mreg
   | R7: mreg  | R8: mreg  | R9: mreg  | R10: mreg
-  | R13: mreg | R14: mreg | R15: mreg | R16: mreg
+  | R14: mreg | R15: mreg | R16: mreg
   | R17: mreg | R18: mreg | R19: mreg | R20: mreg
   | R21: mreg | R22: mreg | R23: mreg | R24: mreg
   | R25: mreg | R26: mreg | R27: mreg | R28: mreg
@@ -58,7 +58,7 @@ Definition mreg_type (r: mreg): typ :=
   match r with
   | R3 => Tint  | R4 => Tint  | R5 => Tint  | R6 => Tint
   | R7 => Tint  | R8 => Tint  | R9 => Tint  | R10 => Tint
-  | R13 => Tint | R14 => Tint | R15 => Tint | R16 => Tint
+  | R14 => Tint | R15 => Tint | R16 => Tint
   | R17 => Tint | R18 => Tint | R19 => Tint | R20 => Tint
   | R21 => Tint | R22 => Tint | R23 => Tint | R24 => Tint
   | R25 => Tint | R26 => Tint | R27 => Tint | R28 => Tint
@@ -83,11 +83,11 @@ Module IndexedMreg <: INDEXED_TYPE.
     match r with
     | R3 => 1  | R4 => 2  | R5 => 3  | R6 => 4
     | R7 => 5  | R8 => 6  | R9 => 7  | R10 => 8
-    | R13 => 9 | R14 => 10 | R15 => 11 | R16 => 12
-    | R17 => 13 | R18 => 14 | R19 => 15 | R20 => 16
-    | R21 => 17 | R22 => 18 | R23 => 19 | R24 => 20
-    | R25 => 21 | R26 => 22 | R27 => 23 | R28 => 24
-    | R29 => 25 | R30 => 26 | R31 => 27
+    | R14 => 9 | R15 => 10 | R16 => 11
+    | R17 => 12 | R18 => 13 | R19 => 14 | R20 => 15
+    | R21 => 16 | R22 => 17 | R23 => 18 | R24 => 19
+    | R25 => 20 | R26 => 21 | R27 => 22 | R28 => 23
+    | R29 => 24 | R30 => 25 | R31 => 26
     | F1 => 28  | F2 => 29  | F3 => 30  | F4 => 31
     | F5 => 32  | F6 => 33  | F7 => 34  | F8 => 35
     | F9 => 36  | F10 => 37 | F14 => 38 | F15 => 39
diff --git a/powerpc/eabi/Conventions.v b/powerpc/eabi/Conventions.v
index 90e14a0af..acad0129b 100644
--- a/powerpc/eabi/Conventions.v
+++ b/powerpc/eabi/Conventions.v
@@ -38,7 +38,7 @@ Definition float_caller_save_regs :=
   F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: nil.
 
 Definition int_callee_save_regs :=
-  R13 :: R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: 
+  R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: 
   R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30 :: R31 :: nil.
 
 Definition float_callee_save_regs :=
@@ -65,11 +65,11 @@ Definition temporaries :=
 
 Definition index_int_callee_save (r: mreg) :=
   match r with
-  | R13 => 0  | R14 => 1  | R15 => 2  | R16 => 3
-  | R17 => 4  | R18 => 5  | R19 => 6  | R20 => 7
-  | R21 => 8  | R22 => 9  | R23 => 10 | R24 => 11
-  | R25 => 12 | R26 => 13 | R27 => 14 | R28 => 15
-  | R29 => 16 | R30 => 17 | R31 => 18 | _ => -1
+  | R14 => 0  | R15 => 1  | R16 => 2  | R17 => 3
+  | R18 => 4  | R19 => 5  | R20 => 6  | R21 => 7
+  | R22 => 8  | R23 => 9  | R24 => 10 | R25 => 11
+  | R26 => 12 | R27 => 13 | R28 => 14 | R29 => 15
+  | R30 => 16 | R31 => 17 | _ => -1
   end.
 
 Definition index_float_callee_save (r: mreg) :=
diff --git a/powerpc/macosx/Conventions.v b/powerpc/macosx/Conventions.v
index 6d1ccb138..62723627b 100644
--- a/powerpc/macosx/Conventions.v
+++ b/powerpc/macosx/Conventions.v
@@ -38,7 +38,7 @@ Definition float_caller_save_regs :=
   F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: nil.
 
 Definition int_callee_save_regs :=
-  R13 :: R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: 
+  R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: 
   R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30 :: R31 :: nil.
 
 Definition float_callee_save_regs :=
@@ -65,11 +65,11 @@ Definition temporaries :=
 
 Definition index_int_callee_save (r: mreg) :=
   match r with
-  | R13 => 0  | R14 => 1  | R15 => 2  | R16 => 3
-  | R17 => 4  | R18 => 5  | R19 => 6  | R20 => 7
-  | R21 => 8  | R22 => 9  | R23 => 10 | R24 => 11
-  | R25 => 12 | R26 => 13 | R27 => 14 | R28 => 15
-  | R29 => 16 | R30 => 17 | R31 => 18 | _ => -1
+  | R14 => 0  | R15 => 1  | R16 => 2  | R17 => 3
+  | R18 => 4  | R19 => 5  | R20 => 6  | R21 => 7
+  | R22 => 8  | R23 => 9  | R24 => 10 | R25 => 11
+  | R26 => 12 | R27 => 13 | R28 => 14 | R29 => 15
+  | R30 => 16 | R31 => 17 | _ => -1
   end.
 
 Definition index_float_callee_save (r: mreg) :=
-- 
GitLab