Commit 094ac30c authored by Xavier Leroy's avatar Xavier Leroy Committed by Xavier Leroy
Browse files

RISC-V implementation of __builtin_clz* and __builtin_ctz*

Using binary search loops expanded at point of use.
parent 4cf2fc41
......@@ -394,6 +394,90 @@ let expand_bswap64 d s =
emit (Psrlil(X31, X s, coqint_of_camlint 56l));
emit (Porl(d, X X1, X X31))
(* Count leading zeros. Algorithm 5-7 from Hacker's Delight,
re-rolled as a loop to produce more compact code. *)
let expand_clz ~sixtyfour ~splitlong =
(* Input: X in X5 or (X5, X6) if splitlong
Result: N in X7
Temporaries: S in X8, Y in X9 *)
let lbl1 = new_label() in
let lbl2 = new_label() in
(* N := bitsize of X's type (32 or 64) *)
expand_loadimm32 X7 (coqint_of_camlint
(if sixtyfour || splitlong then 64l else 32l));
(* S := initial shift amount (16 or 32) *)
expand_loadimm32 X8 (coqint_of_camlint (if sixtyfour then 32l else 16l));
if splitlong then begin
(* if (Xhigh == 0) goto lbl1 *)
emit (Pbeqw(X X6, X0, lbl1));
(* N := 32 *)
expand_loadimm32 X7 (coqint_of_camlint 32l);
(* X := Xhigh *)
emit (Pmv(X5, X6))
end;
(* lbl1: *)
emit (Plabel lbl1);
(* Y := X >> S *)
emit (if sixtyfour then Psrll(X9, X X5, X X8) else Psrlw(X9, X X5, X X8));
(* if (Y == 0) goto lbl2 *)
emit (if sixtyfour then Pbeql(X X9, X0, lbl2) else Pbeqw(X X9, X0, lbl2));
(* N := N - S *)
emit (Psubw(X7, X X7, X X8));
(* X := Y *)
emit (Pmv(X5, X9));
(* lbl2: *)
emit (Plabel lbl2);
(* S := S / 2 *)
emit (Psrliw(X8, X X8, _1));
(* if (S != 0) goto lbl1; *)
emit (Pbnew(X X8, X0, lbl1));
(* N := N - X *)
emit (Psubw(X7, X X7, X X5))
(* Count trailing zeros. Algorithm 5-14 from Hacker's Delight,
re-rolled as a loop to produce more compact code. *)
let expand_ctz ~sixtyfour ~splitlong =
(* Input: X in X6 or (X5, X6) if splitlong
Result: N in X7
Temporaries: S in X8, Y in X9 *)
let lbl1 = new_label() in
let lbl2 = new_label() in
(* N := bitsize of X's type (32 or 64) *)
expand_loadimm32 X7 (coqint_of_camlint
(if sixtyfour || splitlong then 64l else 32l));
(* S := initial shift amount (16 or 32) *)
expand_loadimm32 X8 (coqint_of_camlint (if sixtyfour then 32l else 16l));
if splitlong then begin
(* if (Xlow == 0) goto lbl1 *)
emit (Pbeqw(X X5, X0, lbl1));
(* N := 32 *)
expand_loadimm32 X7 (coqint_of_camlint 32l);
(* X := Xlow *)
emit (Pmv(X6, X5))
end;
(* lbl1: *)
emit (Plabel lbl1);
(* Y := X >> S *)
emit (if sixtyfour then Pslll(X9, X X6, X X8) else Psllw(X9, X X6, X X8));
(* if (Y == 0) goto lbl2 *)
emit (if sixtyfour then Pbeql(X X9, X0, lbl2) else Pbeqw(X X9, X0, lbl2));
(* N := N - S *)
emit (Psubw(X7, X X7, X X8));
(* X := Y *)
emit (Pmv(X6, X9));
(* lbl2: *)
emit (Plabel lbl2);
(* S := S / 2 *)
emit (Psrliw(X8, X X8, _1));
(* if (S != 0) goto lbl1; *)
emit (Pbnew(X X8, X0, lbl1));
(* N := N - most significant bit of X *)
emit (if sixtyfour then Psrlil(X6, X X6, coqint_of_camlint 63l)
else Psrliw(X6, X X6, coqint_of_camlint 31l));
emit (Psubw(X7, X X7, X X6))
(* Handling of compiler-inlined builtins *)
let expand_builtin_inline name args res =
......@@ -418,6 +502,31 @@ let expand_builtin_inline name args res =
assert (ah = X6 && al = X5 && rh = X5 && rl = X6);
expand_bswap32 X5 X5;
expand_bswap32 X6 X6
(* Count zeros *)
| "__builtin_clz", [BA(IR a)], BR(IR res) ->
assert (a = X5 && res = X7);
expand_clz ~sixtyfour:false ~splitlong:false
| "__builtin_clzl", [BA(IR a)], BR(IR res) ->
assert (a = X5 && res = X7);
expand_clz ~sixtyfour:Archi.ptr64 ~splitlong:false
| "__builtin_clzll", [BA(IR a)], BR(IR res) ->
assert (a = X5 && res = X7);
expand_clz ~sixtyfour:true ~splitlong:false
| "__builtin_clzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) ->
assert (al = X5 && ah = X6 && res = X7);
expand_clz ~sixtyfour:false ~splitlong:true
| "__builtin_ctz", [BA(IR a)], BR(IR res) ->
assert (a = X6 && res = X7);
expand_ctz ~sixtyfour:false ~splitlong:false
| "__builtin_ctzl", [BA(IR a)], BR(IR res) ->
assert (a = X6 && res = X7);
expand_ctz ~sixtyfour:Archi.ptr64 ~splitlong:false
| "__builtin_ctzll", [BA(IR a)], BR(IR res) ->
assert (a = X6 && res = X7);
expand_ctz ~sixtyfour:true ~splitlong:false
| "__builtin_ctzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) ->
assert (al = X5 && ah = X6 && res = X7);
expand_ctz ~sixtyfour:false ~splitlong:true
(* Float arithmetic *)
| "__builtin_fsqrt", [BA(FR a1)], BR(FR res) ->
emit (Pfsqrtd(res, a1))
......
......@@ -194,6 +194,17 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
match ef with
| EF_inline_asm txt sg clob => destroyed_by_clobber clob
| EF_memcpy sz al => R5 :: R6 :: R7 :: F0 :: nil
| EF_builtin name sg =>
if string_dec name "__builtin_clz"
|| string_dec name "__builtin_clzl"
|| string_dec name "__builtin_clzll" then
R5 :: R8 :: R9 :: nil
else if string_dec name "__builtin_ctz"
|| string_dec name "__builtin_ctzl"
|| string_dec name "__builtin_ctzll" then
R6 :: R8 :: R9 :: nil
else
nil
| _ => nil
end.
......@@ -213,6 +224,20 @@ Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(
| EF_builtin name sg =>
if (negb Archi.ptr64) && string_dec name "__builtin_bswap64" then
(Some R6 :: Some R5 :: nil, Some R5 :: Some R6 :: nil)
else if string_dec name "__builtin_clz"
|| string_dec name "__builtin_clzl" then
(Some R5 :: nil, Some R7 :: nil)
else if string_dec name "__builtin_clzll" then
if Archi.ptr64
then (Some R5 :: nil, Some R7 :: nil)
else (Some R6 :: Some R5 :: nil, Some R7 :: nil)
else if string_dec name "__builtin_ctz"
|| string_dec name "__builtin_ctzl" then
(Some R6 :: nil, Some R7 :: nil)
else if string_dec name "__builtin_ctzll" then
if Archi.ptr64
then (Some R6 :: nil, Some R7 :: nil)
else (Some R6 :: Some R5 :: nil, Some R7 :: nil)
else
(nil, nil)
| _ =>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment