compiler-rt: Better selection of __clzsi implementation
To be honest all this detection logic is starting to become a real PITA, the ARM32 version can be possibly removed as the generic version optimizes pretty well...
This commit is contained in:
parent
389d1177a5
commit
4bf093f1a0
|
@ -26,6 +26,8 @@ fn __clzsi2_generic(a: i32) callconv(.C) i32 {
|
|||
}
|
||||
|
||||
fn __clzsi2_thumb1() callconv(.Naked) void {
|
||||
@setRuntimeSafety(false);
|
||||
|
||||
// Similar to the generic version with the last two rounds replaced by a LUT
|
||||
asm volatile (
|
||||
\\ movs r1, #32
|
||||
|
@ -58,6 +60,8 @@ fn __clzsi2_thumb1() callconv(.Naked) void {
|
|||
}
|
||||
|
||||
fn __clzsi2_arm32() callconv(.Naked) void {
|
||||
@setRuntimeSafety(false);
|
||||
|
||||
asm volatile (
|
||||
\\ // Assumption: n != 0
|
||||
\\ // r0: n
|
||||
|
@ -104,13 +108,22 @@ fn __clzsi2_arm32() callconv(.Naked) void {
|
|||
unreachable;
|
||||
}
|
||||
|
||||
pub const __clzsi2 = switch (std.Target.current.cpu.arch) {
|
||||
.arm, .armeb => if (std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm))
|
||||
__clzsi2_thumb1
|
||||
else
|
||||
__clzsi2_arm32,
|
||||
.thumb, .thumbeb => __clzsi2_thumb1,
|
||||
else => __clzsi2_generic,
|
||||
pub const __clzsi2 = impl: {
|
||||
switch (std.Target.current.cpu.arch) {
|
||||
.arm, .armeb, .thumb, .thumbeb => {
|
||||
const use_thumb1 =
|
||||
(std.Target.current.cpu.arch.isThumb() or
|
||||
std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm)) and
|
||||
!std.Target.arm.featureSetHas(std.Target.current.cpu.features, .thumb2);
|
||||
|
||||
if (use_thumb1) break :impl __clzsi2_thumb1
|
||||
// From here on we're either targeting Thumb2 or ARM.
|
||||
else if (!std.Target.current.cpu.arch.isThumb()) break :impl __clzsi2_arm32
|
||||
// Use the generic implementation otherwise.
|
||||
else break :impl __clzsi2_generic;
|
||||
},
|
||||
else => break :impl __clzsi2_generic,
|
||||
}
|
||||
};
|
||||
|
||||
test "test clzsi2" {
|
||||
|
|
|
@ -7,6 +7,8 @@ const clzsi2 = @import("clzsi2.zig");
|
|||
const testing = @import("std").testing;
|
||||
|
||||
fn test__clzsi2(a: u32, expected: i32) void {
|
||||
// XXX At high optimization levels this test may be horribly miscompiled if
|
||||
// one of the naked implementations is selected.
|
||||
var nakedClzsi2 = clzsi2.__clzsi2;
|
||||
var actualClzsi2 = @ptrCast(fn (a: i32) callconv(.C) i32, nakedClzsi2);
|
||||
var x = @bitCast(i32, a);
|
||||
|
|
Loading…
Reference in New Issue
Block a user