Commit aa25ec27 authored by Cyril SIX's avatar Cyril SIX
Browse files

MPPA - Oshrximm + Mgetparam + FP is GPR10 + bug

Added Oshrximm and Mgetparam -> mmult.c divide & conqueer generates

FP is now GPR10 instead of being a mix of GPR30 and GPR32

Corrected a bug where Pgoto and Pj_l were given the same interpretation,
where in fact there's a fundamental difference : Pgoto is supposed to
have a function name (symbol), while Pj_l is supposed to have a label
name (print_label). This led to having undefinite labels in the code.
parent 41a048fa
......@@ -92,6 +92,7 @@ Module Pregmap := EMap(PregEq).
(** Conventional names for stack pointer ([SP]) and return address ([RA]). *)
Notation "'SP'" := GPR12 (only parsing) : asm.
Notation "'FP'" := GPR10 (only parsing) : asm.
Notation "'RTMP'" := GPR31 (only parsing) : asm.
Inductive btest: Type :=
......@@ -1008,7 +1009,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
let sp := (Vptr stk Ptrofs.zero) in
match Mem.storev Mptr m1 (Val.offset_ptr sp pos) rs#SP with
| None => Stuck
| Some m2 => Next (nextinstr (rs #GPR32 <- (rs SP) #SP <- sp #GPR31 <- Vundef)) m2
| Some m2 => Next (nextinstr (rs #FP <- (rs SP) #SP <- sp #GPR31 <- Vundef)) m2
end
| Pfreeframe sz pos =>
match Mem.loadv Mptr m (Val.offset_ptr rs#SP pos) with
......@@ -1082,7 +1083,7 @@ Definition preg_of (r: mreg) : preg :=
match r with
| R0 => GPR0 | R1 => GPR1 | R2 => GPR2 | R3 => GPR3 | R4 => GPR4
| R5 => GPR5 | R6 => GPR6 | R7 => GPR7 | R9 => GPR9
(*| R10 => GPR10 | R11 => GPR11 | R12 => GPR12 | R13 => GPR13 | R14 => GPR14 *)
| R10 => GPR10 (*| R11 => GPR11 | R12 => GPR12 | R13 => GPR13 | R14 => GPR14 *)
| R15 => GPR15 | R16 => GPR16 | R17 => GPR17 | R18 => GPR18 | R19 => GPR19
| R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24
| R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 | R29 => GPR29
......
......@@ -346,7 +346,7 @@ Definition transl_op
| Oshruimm n, a1 :: nil =>
do rd <- ireg_of res; do rs <- ireg_of a1;
OK (Psrliw rd rs n :: k)
(*| Oshrximm n, a1 :: nil =>
| Oshrximm n, a1 :: nil =>
do rd <- ireg_of res; do rs <- ireg_of a1;
OK (if Int.eq n Int.zero then Pmv rd rs :: k else
Psraiw GPR31 rs (Int.repr 31) ::
......@@ -355,7 +355,7 @@ Definition transl_op
Psraiw rd GPR31 n :: k)
(* [Omakelong], [Ohighlong] should not occur *)
*)| Olowlong, a1 :: nil =>
| Olowlong, a1 :: nil =>
do rd <- ireg_of res; do rs <- ireg_of a1;
OK (Pcvtl2w rd rs :: k)
| Ocast32signed, a1 :: nil =>
......@@ -675,12 +675,12 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
loadind SP ofs ty dst k
| Msetstack src ofs ty =>
storeind src SP ofs ty k
(*| Mgetparam ofs ty dst =>
| Mgetparam ofs ty dst =>
(* load via the frame pointer if it is valid *)
do c <- loadind GPR30 ofs ty dst k;
do c <- loadind FP ofs ty dst k;
OK (if ep then c
else loadind_ptr SP f.(fn_link_ofs) GPR30 c)
*)| Mop op args res =>
else loadind_ptr SP f.(fn_link_ofs) FP c)
| Mop op args res =>
transl_op op args res k
| Mload chunk addr args dst =>
transl_load chunk addr args dst k
......@@ -716,8 +716,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
match i with
| Msetstack src ofs ty => before
| Mgetparam ofs ty dst => negb (mreg_eq dst R32)
| Mop op args res => before && negb (mreg_eq res R32)
| Mgetparam ofs ty dst => negb (mreg_eq dst R10)
| Mop op args res => before && negb (mreg_eq res R10)
| _ => false
end.
......
......@@ -323,6 +323,8 @@ Opaque Int.eq.
- apply opimm32_label; intros; exact I.
(* Oandimm32 *)
- apply opimm32_label; intros; exact I.
(* Oshrximm *)
- destruct (Int.eq n Int.zero); TailNoLabel.
(* Oaddimm64 *)
- apply opimm64_label; intros; exact I.
(* Oandimm64 *)
......@@ -415,6 +417,9 @@ Proof.
- eapply loadind_label; eauto.
(* storeind *)
- eapply storeind_label; eauto.
(* Mgetparam *)
- destruct ep. eapply loadind_label; eauto.
eapply tail_nolabel_trans. apply loadind_ptr_label. eapply loadind_label; eauto.
(* transl_op *)
- eapply transl_op_label; eauto.
(* transl_load *)
......@@ -429,8 +434,7 @@ Proof.
Qed.
(*
- destruct ep. eapply loadind_label; eauto.
eapply tail_nolabel_trans. apply loadind_ptr_label. eapply loadind_label; eauto.
- eapply transl_op_label; eauto.
- destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I.
- destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I.
......@@ -552,7 +556,7 @@ Inductive match_states: Mach.state -> Asm.state -> Prop :=
(MEXT: Mem.extends m m')
(AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
(AG: agree ms sp rs)
(DXP: ep = true -> rs#GPR32 = parent_sp s),
(DXP: ep = true -> rs#FP = parent_sp s),
match_states (Mach.State s fb sp c ms m)
(Asm.State rs m')
| match_states_call:
......@@ -583,7 +587,7 @@ Lemma exec_straight_steps:
exists rs2,
exec_straight tge tf c rs1 m1' k rs2 m2'
/\ agree ms2 sp rs2
/\ (it1_is_parent ep i = true -> rs2#GPR32 = parent_sp s)) ->
/\ (it1_is_parent ep i = true -> rs2#FP = parent_sp s)) ->
exists st',
plus step tge (State rs1 m1') E0 st' /\
match_states (Mach.State s fb sp c ms2 m2) st'.
......@@ -694,9 +698,9 @@ Definition measure (s: Mach.state) : nat :=
| Mach.Returnstate _ _ _ => 1%nat
end.
Remark preg_of_not_GPR32: forall r, negb (mreg_eq r R32) = true -> IR GPR32 <> preg_of r.
Remark preg_of_not_FP: forall r, negb (mreg_eq r R10) = true -> IR FP <> preg_of r.
Proof.
intros. change (IR GPR32) with (preg_of R32). red; intros.
intros. change (IR FP) with (preg_of R10). red; intros.
exploit preg_of_injective; eauto. intros; subst r; discriminate.
Qed.
......@@ -748,16 +752,15 @@ Proof.
intros [v' [C D]].
(* Opaque loadind. *)
left; eapply exec_straight_steps; eauto; intros. monadInv TR.
(*
destruct ep.
(* X30 contains parent *)
(* GPR31 contains parent *)
exploit loadind_correct. eexact EQ.
instantiate (2 := rs0). rewrite DXP; eauto. congruence.
intros [rs1 [P [Q R]]].
exists rs1; split. eauto.
split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
simpl; intros. rewrite R; auto with asmgen.
apply preg_of_not_X30; auto.
apply preg_of_not_FP; auto.
(* GPR11 does not contain parent *)
rewrite chunk_of_Tptr in A.
exploit loadind_ptr_correct. eexact A. congruence. intros [rs1 [P [Q R]]].
......@@ -765,13 +768,12 @@ Proof.
intros [rs2 [S [T U]]].
exists rs2; split. eapply exec_straight_trans; eauto.
split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
instantiate (1 := rs1#X30 <- (rs2#X30)). intros.
instantiate (1 := rs1#FP <- (rs2#FP)). intros.
rewrite Pregmap.gso; auto with asmgen.
congruence.
intros. unfold Pregmap.set. destruct (PregEq.eq r' X30). congruence. auto with asmgen.
intros. unfold Pregmap.set. destruct (PregEq.eq r' FP). congruence. auto with asmgen.
simpl; intros. rewrite U; auto with asmgen.
apply preg_of_not_X30; auto.
*)
apply preg_of_not_FP; auto.
- (* Mop *)
assert (eval_operation tge sp op (map rs args) m = Some v).
rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
......@@ -783,7 +785,7 @@ Proof.
apply agree_set_undef_mreg with rs0; auto.
apply Val.lessdef_trans with v'; auto.
simpl; intros. destruct (andb_prop _ _ H1); clear H1.
rewrite R; auto. apply preg_of_not_GPR32; auto.
rewrite R; auto. apply preg_of_not_FP; auto.
Local Transparent destroyed_by_op.
destruct op; simpl; auto; congruence.
......@@ -1021,7 +1023,7 @@ Local Transparent destroyed_by_op.
Pget GPR8 RA ::
storeind_ptr GPR8 SP (fn_retaddr_ofs f) x0) in *.
set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *.
set (rs2 := nextinstr (rs0#GPR32 <- (parent_sp s) #SP <- sp #GPR31 <- Vundef)).
set (rs2 := nextinstr (rs0#FP <- (parent_sp s) #SP <- sp #GPR31 <- Vundef)).
exploit (Pget_correct tge tf GPR8 RA (storeind_ptr GPR8 SP (fn_retaddr_ofs f) x0) rs2 m2'); auto.
intros (rs' & U' & V').
exploit (storeind_ptr_correct tge tf SP (fn_retaddr_ofs f) GPR8 x0 rs' m2').
......
......@@ -1214,6 +1214,18 @@ Opaque Int.eq.
- (* Oaddrstack *)
exploit addptrofs_correct. instantiate (1 := GPR12); auto with asmgen. intros (rs' & A & B & C).
exists rs'; split; eauto. auto with asmgen.
- (* Oshrximm *)
clear H. exploit Val.shrx_shr_2; eauto. intros E; subst v; clear EV.
destruct (Int.eq n Int.zero).
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
split; intros; Simpl.
+ change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
econstructor; split.
eapply exec_straight_step. simpl; reflexivity. auto.
eapply exec_straight_step. simpl; reflexivity. auto.
eapply exec_straight_step. simpl; reflexivity. auto.
apply exec_straight_one. simpl; reflexivity. auto.
split; intros; unfold getw; Simpl.
- (* Ocast32signed *)
exploit cast32signed_correct; eauto. intros (rs' & A & B & C).
exists rs'; split; eauto. split. apply B.
......@@ -1275,18 +1287,7 @@ Opaque Int.eq.
exploit (opimm32_correct Pxorw Pxoriw Val.xor); auto. instantiate (1 := x0); eauto with asmgen.
intros (rs' & A & B & C).
exists rs'; split; eauto. rewrite B; auto with asmgen.
- (* shrximm *)
clear H. exploit Val.shrx_shr_2; eauto. intros E; subst v; clear EV.
destruct (Int.eq n Int.zero).
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
split; intros; Simpl.
+ change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
econstructor; split.
eapply exec_straight_step. simpl; reflexivity. auto.
eapply exec_straight_step. simpl; reflexivity. auto.
eapply exec_straight_step. simpl; reflexivity. auto.
apply exec_straight_one. simpl; reflexivity. auto.
split; intros; Simpl.
- (* longofintu *)
econstructor; split.
eapply exec_straight_three. simpl; eauto. simpl; eauto. simpl; eauto. auto. auto. auto.
......
......@@ -42,7 +42,7 @@ Require Import Op.
Inductive mreg: Type :=
(* Allocatable General Purpose regs. *)
| R0 | R1 | R2 | R3 | R4 | R5 | R6 | R7 | R9
(* R10 to R14 are reserved *) | R15 | R16 | R17 | R18 | R19
| R10 (* R11 to R14 res *) | R15 | R16 | R17 | R18 | R19
| R20 | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | R29
| R30 | R32 | R33 | R34 | R35 | R36 | R37 | R38 | R39
| R40 | R41 | R42 | R43 | R44 | R45 | R46 | R47 | R48 | R49
......@@ -55,7 +55,7 @@ Global Opaque mreg_eq.
Definition all_mregs :=
R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: R9
:: R15 :: R16 :: R17 :: R18 :: R19
:: R10 :: R15 :: R16 :: R17 :: R18 :: R19
:: R20 :: R21 :: R22 :: R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29
:: R30 :: R32 :: R33 :: R34 :: R35 :: R36 :: R37 :: R38 :: R39
:: R40 :: R41 :: R42 :: R43 :: R44 :: R45 :: R46 :: R47 :: R48 :: R49
......@@ -87,6 +87,7 @@ Module IndexedMreg <: INDEXED_TYPE.
match r with
| R0 => 1 | R1 => 2 | R2 => 3 | R3 => 4 | R4 => 5
| R5 => 6 | R6 => 7 | R7 => 8 | R9 => 10
| R10 => 11
| R15 => 16 | R16 => 17 | R17 => 18 | R18 => 19 | R19 => 20
| R20 => 21 | R21 => 22 | R22 => 23 | R23 => 24 | R24 => 25
| R25 => 26 | R26 => 27 | R27 => 28 | R28 => 29 | R29 => 30
......@@ -115,6 +116,7 @@ Local Open Scope string_scope.
Definition register_names :=
("R0" , R0) :: ("R1" , R1) :: ("R2" , R2) :: ("R3" , R3) :: ("R4" , R4)
:: ("R5" , R5) :: ("R6" , R6) :: ("R7" , R7) :: ("R9" , R9)
:: ("R10", R10)
:: ("R15", R15) :: ("R16", R16) :: ("R17", R17) :: ("R18", R18) :: ("R19", R19)
:: ("R20", R20) :: ("R21", R21) :: ("R22", R22) :: ("R23", R23) :: ("R24", R24)
:: ("R25", R25) :: ("R26", R26) :: ("R27", R27) :: ("R28", R28) :: ("R29", R29)
......@@ -173,9 +175,9 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
Definition destroyed_by_setstack (ty: typ): list mreg := nil.
Definition destroyed_at_function_entry: list mreg := R32 :: nil.
Definition destroyed_at_function_entry: list mreg := R10 :: nil.
Definition temp_for_parent_frame: mreg := R9. (* FIXME - and R8 ?? *)
Definition temp_for_parent_frame: mreg := R10. (* FIXME - and R8 ?? *)
Definition destroyed_at_indirect_call: list mreg := nil.
(* R10 :: R11 :: R12 :: R13 :: R14 :: R15 :: R16 :: R17 :: nil. *)
......
......@@ -191,7 +191,9 @@ module Target : TARGET =
let print_instruction oc = function
| Pcall(s) ->
fprintf oc " call %a\n;;\n" symbol s
| Pgoto(s) | Pj_l(s) ->
| Pgoto(s) ->
fprintf oc " goto %a\n;;\n" symbol s
| Pj_l(s) ->
fprintf oc " goto %a\n;;\n" print_label s
| Pret ->
fprintf oc " ret\n;;\n"
......
......@@ -39,7 +39,7 @@ typedef struct mblock {
#define MAT_XY(mat, x, y) (mat)[(x)*SIZE + (y)]
#define MAT_IJ(block, i, j) MAT_XY((block)->mat, (block)->imin + (i), block->jmin + (j))
int strassen_mul(mblock *C, const mblock *A, const mblock *B){
void divac_mul(mblock *C, const mblock *A, const mblock *B){
const int size = C->imax - C->imin;
for (int i = 0 ; i < size ; i++)
......@@ -67,50 +67,50 @@ int strassen_mul(mblock *C, const mblock *A, const mblock *B){
newb.jmax = (block)->jmax;\
}
int strassen_part(mblock *C, const mblock *A, const mblock *B);
void divac_part(mblock *C, const mblock *A, const mblock *B);
void strassen_wrap(mblock *C , char IC, char JC,
void divac_wrap(mblock *C , char IC, char JC,
const mblock *A, char IA, char JA,
const mblock *B, char IB, char JB){
MAKE_MBLOCK(Cb, C, IC, JC);
MAKE_MBLOCK(Ab, A, IA, JA);
MAKE_MBLOCK(Bb, B, IB, JB);
strassen_part(&Cb, &Ab, &Bb);
divac_part(&Cb, &Ab, &Bb);
}
int strassen_part(mblock *C, const mblock *A, const mblock *B){
void divac_part(mblock *C, const mblock *A, const mblock *B){
const int size = C->imax - C->imin;
if (size % 2 == 1)
strassen_mul(C, A, B);
divac_mul(C, A, B);
else{
/* C_00 = A_00 B_00 + A_01 B_10 */
strassen_wrap(C, 0, 0, A, 0, 0, B, 0, 0);
strassen_wrap(C, 0, 0, A, 0, 1, B, 1, 0);
divac_wrap(C, 0, 0, A, 0, 0, B, 0, 0);
divac_wrap(C, 0, 0, A, 0, 1, B, 1, 0);
/* C_10 = A_10 B_00 + A_11 B_10 */
strassen_wrap(C, 1, 0, A, 1, 0, B, 0, 0);
strassen_wrap(C, 1, 0, A, 1, 1, B, 1, 0);
divac_wrap(C, 1, 0, A, 1, 0, B, 0, 0);
divac_wrap(C, 1, 0, A, 1, 1, B, 1, 0);
/* C_01 = A_00 B_01 + A_01 B_11 */
strassen_wrap(C, 0, 1, A, 0, 0, B, 0, 1);
strassen_wrap(C, 0, 1, A, 0, 1, B, 1, 1);
divac_wrap(C, 0, 1, A, 0, 0, B, 0, 1);
divac_wrap(C, 0, 1, A, 0, 1, B, 1, 1);
/* C_11 = A_10 B_01 + A_11 B_11 */
strassen_wrap(C, 1, 1, A, 1, 0, B, 0, 1);
strassen_wrap(C, 1, 1, A, 1, 1, B, 1, 1);
divac_wrap(C, 1, 1, A, 1, 0, B, 0, 1);
divac_wrap(C, 1, 1, A, 1, 1, B, 1, 1);
}
}
void mmult_strassen(uint64_t C[][SIZE], uint64_t A[][SIZE], uint64_t B[][SIZE]){
void mmult_divac(uint64_t C[][SIZE], uint64_t A[][SIZE], uint64_t B[][SIZE]){
mblock Cb = {.mat = (uint64_t *) C, .imin = 0, .imax = SIZE, .jmin = 0, .jmax = SIZE};
mblock Ab = {.mat = (uint64_t *) A , .imin = 0, .imax = SIZE, .jmin = 0, .jmax = SIZE};
mblock Bb = {.mat = (uint64_t *) B , .imin = 0, .imax = SIZE, .jmin = 0, .jmax = SIZE};
strassen_part(&Cb, &Ab, &Bb);
divac_part(&Cb, &Ab, &Bb);
}
#ifdef __UNIT_TEST_MMULT__
......@@ -128,7 +128,7 @@ int main(void){
mmult_row(C1, A, B);
mmult_col(C2, A, B);
mmult_strassen(C3, A, B);
mmult_divac(C3, A, B);
for (int i = 0 ; i < SIZE ; i++)
for (int j = 0 ; j < SIZE ; j++)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment