Skip to content
Snippets Groups Projects
Commit 00b605a1 authored by xleroy's avatar xleroy
Browse files

Added tail call optimization pass

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1015 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
parent c90abe06
No related branches found
No related tags found
No related merge requests found
......@@ -68,6 +68,8 @@ backend/RTL.vo: backend/RTL.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Intege
backend/Stackingproof.vo: backend/Stackingproof.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo common/Values.vo $(ARCH)/Op.vo common/Mem.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo backend/Linear.vo backend/Lineartyping.vo backend/Mach.vo backend/Machabstr.vo backend/Bounds.vo $(ARCH)/$(VARIANT)/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Stacking.vo
backend/Stackingtyping.vo: backend/Stackingtyping.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo lib/Integers.vo common/AST.vo $(ARCH)/Op.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Conventions.vo backend/Linear.vo backend/Lineartyping.vo backend/Mach.vo backend/Machtyping.vo backend/Bounds.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Stacking.vo backend/Stackingproof.vo
backend/Stacking.vo: backend/Stacking.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/RTL.vo backend/Locations.vo backend/Linear.vo backend/Bounds.vo backend/Mach.vo $(ARCH)/$(VARIANT)/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo
backend/Tailcallproof.vo: backend/Tailcallproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Mem.vo $(ARCH)/Op.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Registers.vo backend/RTL.vo $(ARCH)/$(VARIANT)/Conventions.vo backend/Tailcall.vo
backend/Tailcall.vo: backend/Tailcall.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Globalenvs.vo backend/Registers.vo $(ARCH)/Op.vo backend/RTL.vo $(ARCH)/$(VARIANT)/Conventions.vo
backend/Tunnelingproof.vo: backend/Tunnelingproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Values.vo common/Mem.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Tunneling.vo
backend/Tunnelingtyping.vo: backend/Tunnelingtyping.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Values.vo common/Mem.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/LTLtyping.vo backend/Tunneling.vo backend/Tunnelingproof.vo
backend/Tunneling.vo: backend/Tunneling.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Values.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo
......@@ -81,5 +83,5 @@ cfrontend/Cshmgenproof3.vo: cfrontend/Cshmgenproof3.v lib/Coqlib.vo common/Error
cfrontend/Cshmgen.vo: cfrontend/Cshmgen.v lib/Coqlib.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/AST.vo cfrontend/Csyntax.vo backend/Cminor.vo cfrontend/Csharpminor.vo
cfrontend/Csyntax.vo: cfrontend/Csyntax.v lib/Coqlib.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/AST.vo
cfrontend/Ctyping.vo: cfrontend/Ctyping.v lib/Coqlib.vo lib/Maps.vo common/AST.vo cfrontend/Csyntax.vo
driver/Compiler.vo: driver/Compiler.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo common/Values.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo cfrontend/Csharpminor.vo backend/Cminor.vo backend/CminorSel.vo backend/RTL.vo backend/LTL.vo backend/LTLin.vo backend/Linear.vo backend/Mach.vo $(ARCH)/Asm.vo cfrontend/Cshmgen.vo cfrontend/Cminorgen.vo $(ARCH)/Selection.vo backend/RTLgen.vo $(ARCH)/Constprop.vo backend/CSE.vo backend/Allocation.vo backend/Tunneling.vo backend/Linearize.vo backend/Reload.vo backend/Stacking.vo $(ARCH)/Asmgen.vo cfrontend/Ctyping.vo backend/RTLtyping.vo backend/LTLtyping.vo backend/LTLintyping.vo backend/Lineartyping.vo backend/Machtyping.vo cfrontend/Cshmgenproof3.vo cfrontend/Cminorgenproof.vo $(ARCH)/Selectionproof.vo backend/RTLgenproof.vo $(ARCH)/Constpropproof.vo backend/CSEproof.vo backend/Allocproof.vo backend/Alloctyping.vo backend/Tunnelingproof.vo backend/Tunnelingtyping.vo backend/Linearizeproof.vo backend/Linearizetyping.vo backend/Reloadproof.vo backend/Reloadtyping.vo backend/Stackingproof.vo backend/Stackingtyping.vo backend/Machabstr2concr.vo $(ARCH)/Asmgenproof.vo
driver/Compiler.vo: driver/Compiler.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo common/Values.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo cfrontend/Csharpminor.vo backend/Cminor.vo backend/CminorSel.vo backend/RTL.vo backend/LTL.vo backend/LTLin.vo backend/Linear.vo backend/Mach.vo $(ARCH)/Asm.vo cfrontend/Cshmgen.vo cfrontend/Cminorgen.vo $(ARCH)/Selection.vo backend/RTLgen.vo backend/Tailcall.vo $(ARCH)/Constprop.vo backend/CSE.vo backend/Allocation.vo backend/Tunneling.vo backend/Linearize.vo backend/Reload.vo backend/Stacking.vo $(ARCH)/Asmgen.vo cfrontend/Ctyping.vo backend/RTLtyping.vo backend/LTLtyping.vo backend/LTLintyping.vo backend/Lineartyping.vo backend/Machtyping.vo cfrontend/Cshmgenproof3.vo cfrontend/Cminorgenproof.vo $(ARCH)/Selectionproof.vo backend/RTLgenproof.vo backend/Tailcallproof.vo $(ARCH)/Constpropproof.vo backend/CSEproof.vo backend/Allocproof.vo backend/Alloctyping.vo backend/Tunnelingproof.vo backend/Tunnelingtyping.vo backend/Linearizeproof.vo backend/Linearizetyping.vo backend/Reloadproof.vo backend/Reloadtyping.vo backend/Stackingproof.vo backend/Stackingtyping.vo backend/Machabstr2concr.vo $(ARCH)/Asmgenproof.vo
driver/Complements.vo: driver/Complements.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo $(ARCH)/Asm.vo driver/Compiler.vo common/Errors.vo
- New optimization over RTL: turning calls into tail calls when possible.
Release 1.3, 2008-08-11
=======================
......
......@@ -48,6 +48,7 @@ BACKEND=\
Selection.v Selectionproof.v \
Registers.v RTL.v \
RTLgen.v RTLgenspec.v RTLgenproof.v \
Tailcall.v Tailcallproof.v \
RTLtyping.v \
Kildall.v \
Constprop.v Constpropproof.v \
......
......@@ -167,12 +167,6 @@ Section TYPECHECKING.
Variable funct: function.
Variable env: regenv.
Lemma typ_eq: forall (t1 t2: typ), {t1=t2} + {t1<>t2}.
Proof. decide equality. Qed.
Lemma opt_typ_eq: forall (t1 t2: option typ), {t1=t2} + {t1<>t2}.
Proof. decide equality. apply typ_eq. Qed.
Definition check_reg (r: reg) (ty: typ): bool :=
if typ_eq (env r) ty then true else false.
......
(* *********************************************************************)
(* *)
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, INRIA Paris-Rocquencourt *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(** Recognition of tail calls. *)
Require Import Coqlib.
Require Import Maps.
Require Import AST.
Require Import Globalenvs.
Require Import Registers.
Require Import Op.
Require Import RTL.
Require Conventions.
(** An [Icall] instruction that stores its result in register [rreg]
can be turned into a tail call if:
- its successor is a [Ireturn None] or [Ireturn (Some rreg)] instruction;
- the stack block of the current function is empty: [stacksize = 0].
If the current function had a non-empty stack block, it could be that
the called function accesses it, for instance if a pointer into the
stack block is passed as an argument. In this case, it would be
semantically incorrect to deallocate the stack block before the call,
as [Itailcall] does. Therefore, the optimization can only be performed if
the stack block of the current function is empty, in which case it makes
no semantic difference to deallocate it before or after the call.
Another complication is that the [Ireturn] instruction does not, in general,
follow immediately the [Icall] instruction: the RTL code generator
can have inserted moves and no-ops between these instructions.
The general pattern we recognize is therefore:
<<
r1 <- call(....)
nop
r2 <- r1
r3 <- r2
return r3
>>
The [is_return] function below recognizes this pattern.
*)
Fixpoint is_return (n: nat) (f: function) (pc: node) (rret: reg)
{struct n}: bool :=
match n with
| O => false
| S n' =>
match f.(fn_code)!pc with
| Some(Ireturn None) => true
| Some(Ireturn (Some r)) => Reg.eq r rret
| Some(Inop s) => is_return n' f s rret
| Some(Iop op args dst s) =>
match is_move_operation op args with
| None => false
| Some src =>
if Reg.eq rret src
then is_return n' f s dst
else false
end
| _ => false
end
end.
(** To ensure termination, we bound arbitrarily the number of iterations
of the [is_return] function: we look ahead for a nop/move/return of length
at most [niter] instructions. *)
Definition niter := 5%nat.
(** The code transformation is straightforward: call instructions
followed by an appropriate nop/move/return sequence become
tail calls; other instructions are unchanged.
To ensure that the resulting RTL code is well typed, we
restrict the transformation to the cases where a tail call is
allowed by the calling conventions, and where the result signatures
match. *)
Definition transf_instr (f: function) (pc: node) (instr: instruction) :=
match instr with
| Icall sig ros args res s =>
if is_return niter f s res
&& Conventions.tailcall_is_possible sig
&& opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res)
then Itailcall sig ros args
else instr
| _ => instr
end.
(** A function is transformed only if its stack block is empty,
as explained above. *)
Definition transf_function (f: function) : function :=
if zeq f.(fn_stacksize) 0
then RTL.transf_function (transf_instr f) f
else f.
Definition transf_fundef (fd: fundef) : fundef :=
AST.transf_fundef transf_function fd.
Definition transf_program (p: program) : program :=
transform_program transf_fundef p.
This diff is collapsed.
......@@ -46,6 +46,12 @@ Definition typesize (ty: typ) : Z :=
Lemma typesize_pos: forall ty, typesize ty > 0.
Proof. destruct ty; simpl; omega. Qed.
Lemma typ_eq: forall (t1 t2: typ), {t1=t2} + {t1<>t2}.
Proof. decide equality. Qed.
Lemma opt_typ_eq: forall (t1 t2: option typ), {t1=t2} + {t1<>t2}.
Proof. decide equality. apply typ_eq. Qed.
(** Additionally, function definitions and function calls are annotated
by function signatures indicating the number and types of arguments,
as well as the type of the returned value if any. These signatures
......
......@@ -1796,6 +1796,26 @@ Proof.
unfold inject_id; intros. congruence.
Qed.
Lemma free_left_lessdef:
forall m1 m2 b,
lessdef m1 m2 -> lessdef (free m1 b) m2.
Proof.
intros. destruct H. split.
rewrite <- H. auto.
apply free_left_inj; auto.
Qed.
Lemma free_right_lessdef:
forall m1 m2 b,
lessdef m1 m2 -> low_bound m1 b >= high_bound m1 b ->
lessdef m1 (free m2 b).
Proof.
intros. destruct H. unfold lessdef. split.
rewrite H. auto.
apply free_right_inj; auto. intros. unfold inject_id in H2. inv H2.
red; intro. inv H2. generalize (size_chunk_pos chunk); intro. omega.
Qed.
Lemma valid_block_lessdef:
forall m1 m2 b, lessdef m1 m2 -> valid_block m1 b -> valid_block m2 b.
Proof.
......
......@@ -36,6 +36,7 @@ Require Cshmgen.
Require Cminorgen.
Require Selection.
Require RTLgen.
Require Tailcall.
Require Constprop.
Require CSE.
Require Allocation.
......@@ -56,6 +57,7 @@ Require Cshmgenproof3.
Require Cminorgenproof.
Require Selectionproof.
Require RTLgenproof.
Require Tailcallproof.
Require Constpropproof.
Require CSEproof.
Require Allocproof.
......@@ -108,6 +110,7 @@ Notation "a @@ b" :=
Definition transf_rtl_fundef (f: RTL.fundef) : res Asm.fundef :=
OK f
@@ Tailcall.transf_fundef
@@ Constprop.transf_fundef
@@ CSE.transf_fundef
@@@ Allocation.transf_fundef
......@@ -245,7 +248,9 @@ Proof.
clear H2.
destruct (transform_program_compose _ _ _ _ _ _ _ _ H1) as [p0 [H00 P0]].
clear H1.
generalize (transform_partial_program_identity _ _ _ _ H00). clear H00. intro. subst p0.
destruct (transform_program_compose _ _ _ _ _ _ _ _ H00) as [p00 [H000 P00]].
clear H00.
generalize (transform_partial_program_identity _ _ _ _ H000). clear H000. intro. subst p00.
assert (WT3 : LTLtyping.wt_program p3).
apply Alloctyping.program_typing_preserved with p2. auto.
......@@ -266,7 +271,9 @@ Proof.
subst p4; apply Tunnelingproof.transf_program_correct.
apply Allocproof.transf_program_correct with p2; auto.
subst p2; apply CSEproof.transf_program_correct.
subst p1; apply Constpropproof.transf_program_correct. auto.
subst p1; apply Constpropproof.transf_program_correct.
subst p0; apply Tailcallproof.transf_program_correct.
auto.
Qed.
Theorem transf_cminor_program_correct:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment