compiler-rt: Better selection of __clzsi implementation

To be honest all this detection logic is starting to become a real PITA,
the ARM32 version can be possibly removed as the generic version
optimizes pretty well...
This commit is contained in:
LemonBoy 2021-05-04 18:45:52 +02:00
parent 389d1177a5
commit 4bf093f1a0
2 changed files with 22 additions and 7 deletions

View File

@ -26,6 +26,8 @@ fn __clzsi2_generic(a: i32) callconv(.C) i32 {
}
fn __clzsi2_thumb1() callconv(.Naked) void {
@setRuntimeSafety(false);
// Similar to the generic version with the last two rounds replaced by a LUT
asm volatile (
\\ movs r1, #32
@ -58,6 +60,8 @@ fn __clzsi2_thumb1() callconv(.Naked) void {
}
fn __clzsi2_arm32() callconv(.Naked) void {
@setRuntimeSafety(false);
asm volatile (
\\ // Assumption: n != 0
\\ // r0: n
@ -104,13 +108,22 @@ fn __clzsi2_arm32() callconv(.Naked) void {
unreachable;
}
pub const __clzsi2 = switch (std.Target.current.cpu.arch) {
.arm, .armeb => if (std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm))
__clzsi2_thumb1
else
__clzsi2_arm32,
.thumb, .thumbeb => __clzsi2_thumb1,
else => __clzsi2_generic,
pub const __clzsi2 = impl: {
switch (std.Target.current.cpu.arch) {
.arm, .armeb, .thumb, .thumbeb => {
const use_thumb1 =
(std.Target.current.cpu.arch.isThumb() or
std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm)) and
!std.Target.arm.featureSetHas(std.Target.current.cpu.features, .thumb2);
if (use_thumb1) break :impl __clzsi2_thumb1
// From here on we're either targeting Thumb2 or ARM.
else if (!std.Target.current.cpu.arch.isThumb()) break :impl __clzsi2_arm32
// Use the generic implementation otherwise.
else break :impl __clzsi2_generic;
},
else => break :impl __clzsi2_generic,
}
};
test "test clzsi2" {

View File

@ -7,6 +7,8 @@ const clzsi2 = @import("clzsi2.zig");
const testing = @import("std").testing;
fn test__clzsi2(a: u32, expected: i32) void {
// XXX At high optimization levels this test may be horribly miscompiled if
// one of the naked implementations is selected.
var nakedClzsi2 = clzsi2.__clzsi2;
var actualClzsi2 = @ptrCast(fn (a: i32) callconv(.C) i32, nakedClzsi2);
var x = @bitCast(i32, a);