zig/lib/compiler_rt/sqrt.zig
Andrew Kelley 0556a2ba53 compiler-rt: finish cleanups
Finishes cleanups that I started in other commits in this branch.

 * Use common.linkage for all exports instead of redoing the logic in
   each file.
 * Remove pointless `@setRuntimeSafety` calls.
 * Avoid redundantly exporting multiple versions of functions. For
   example, if PPC wants `ceilf128` then don't also export `ceilq`;
   similarly if ARM wants `__aeabi_ddiv` then don't also export
   `__divdf3`.
 * Use `inline` for helper functions instead of making inline calls at
   callsites.
2022-06-17 18:10:00 -07:00

311 lines
7.8 KiB
Zig

const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const math = std.math;
const common = @import("common.zig");
pub const panic = common.panic;
comptime {
@export(__sqrth, .{ .name = "__sqrth", .linkage = common.linkage });
@export(sqrtf, .{ .name = "sqrtf", .linkage = common.linkage });
@export(sqrt, .{ .name = "sqrt", .linkage = common.linkage });
@export(__sqrtx, .{ .name = "__sqrtx", .linkage = common.linkage });
const sqrtq_sym_name = if (common.want_ppc_abi) "sqrtf128" else "sqrtq";
@export(sqrtq, .{ .name = sqrtq_sym_name, .linkage = common.linkage });
@export(sqrtl, .{ .name = "sqrtl", .linkage = common.linkage });
}
pub fn __sqrth(x: f16) callconv(.C) f16 {
// TODO: more efficient implementation
return @floatCast(f16, sqrtf(x));
}
pub fn sqrtf(x: f32) callconv(.C) f32 {
const tiny: f32 = 1.0e-30;
const sign: i32 = @bitCast(i32, @as(u32, 0x80000000));
var ix: i32 = @bitCast(i32, x);
if ((ix & 0x7F800000) == 0x7F800000) {
return x * x + x; // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = snan
}
// zero
if (ix <= 0) {
if (ix & ~sign == 0) {
return x; // sqrt (+-0) = +-0
}
if (ix < 0) {
return math.snan(f32);
}
}
// normalize
var m = ix >> 23;
if (m == 0) {
// subnormal
var i: i32 = 0;
while (ix & 0x00800000 == 0) : (i += 1) {
ix <<= 1;
}
m -= i - 1;
}
m -= 127; // unbias exponent
ix = (ix & 0x007FFFFF) | 0x00800000;
if (m & 1 != 0) { // odd m, double x to even
ix += ix;
}
m >>= 1; // m = [m / 2]
// sqrt(x) bit by bit
ix += ix;
var q: i32 = 0; // q = sqrt(x)
var s: i32 = 0;
var r: i32 = 0x01000000; // r = moving bit right -> left
while (r != 0) {
const t = s + r;
if (t <= ix) {
s = t + r;
ix -= t;
q += r;
}
ix += ix;
r >>= 1;
}
// floating add to find rounding direction
if (ix != 0) {
var z = 1.0 - tiny; // inexact
if (z >= 1.0) {
z = 1.0 + tiny;
if (z > 1.0) {
q += 2;
} else {
if (q & 1 != 0) {
q += 1;
}
}
}
}
ix = (q >> 1) + 0x3f000000;
ix += m << 23;
return @bitCast(f32, ix);
}
/// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
/// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
/// potentially some edge cases remaining that are not handled in the same way.
pub fn sqrt(x: f64) callconv(.C) f64 {
const tiny: f64 = 1.0e-300;
const sign: u32 = 0x80000000;
const u = @bitCast(u64, x);
var ix0 = @intCast(u32, u >> 32);
var ix1 = @intCast(u32, u & 0xFFFFFFFF);
// sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
if (ix0 & 0x7FF00000 == 0x7FF00000) {
return x * x + x;
}
// sqrt(+-0) = +-0
if (x == 0.0) {
return x;
}
// sqrt(-ve) = snan
if (ix0 & sign != 0) {
return math.snan(f64);
}
// normalize x
var m = @intCast(i32, ix0 >> 20);
if (m == 0) {
// subnormal
while (ix0 == 0) {
m -= 21;
ix0 |= ix1 >> 11;
ix1 <<= 21;
}
// subnormal
var i: u32 = 0;
while (ix0 & 0x00100000 == 0) : (i += 1) {
ix0 <<= 1;
}
m -= @intCast(i32, i) - 1;
ix0 |= ix1 >> @intCast(u5, 32 - i);
ix1 <<= @intCast(u5, i);
}
// unbias exponent
m -= 1023;
ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
if (m & 1 != 0) {
ix0 += ix0 + (ix1 >> 31);
ix1 = ix1 +% ix1;
}
m >>= 1;
// sqrt(x) bit by bit
ix0 += ix0 + (ix1 >> 31);
ix1 = ix1 +% ix1;
var q: u32 = 0;
var q1: u32 = 0;
var s0: u32 = 0;
var s1: u32 = 0;
var r: u32 = 0x00200000;
var t: u32 = undefined;
var t1: u32 = undefined;
while (r != 0) {
t = s0 +% r;
if (t <= ix0) {
s0 = t + r;
ix0 -= t;
q += r;
}
ix0 = ix0 +% ix0 +% (ix1 >> 31);
ix1 = ix1 +% ix1;
r >>= 1;
}
r = sign;
while (r != 0) {
t1 = s1 +% r;
t = s0;
if (t < ix0 or (t == ix0 and t1 <= ix1)) {
s1 = t1 +% r;
if (t1 & sign == sign and s1 & sign == 0) {
s0 += 1;
}
ix0 -= t;
if (ix1 < t1) {
ix0 -= 1;
}
ix1 = ix1 -% t1;
q1 += r;
}
ix0 = ix0 +% ix0 +% (ix1 >> 31);
ix1 = ix1 +% ix1;
r >>= 1;
}
// rounding direction
if (ix0 | ix1 != 0) {
var z = 1.0 - tiny; // raise inexact
if (z >= 1.0) {
z = 1.0 + tiny;
if (q1 == 0xFFFFFFFF) {
q1 = 0;
q += 1;
} else if (z > 1.0) {
if (q1 == 0xFFFFFFFE) {
q += 1;
}
q1 += 2;
} else {
q1 += q1 & 1;
}
}
}
ix0 = (q >> 1) + 0x3FE00000;
ix1 = q1 >> 1;
if (q & 1 != 0) {
ix1 |= 0x80000000;
}
// NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
// behaviour at least.
var iix0 = @intCast(i32, ix0);
iix0 = iix0 +% (m << 20);
const uz = (@intCast(u64, iix0) << 32) | ix1;
return @bitCast(f64, uz);
}
pub fn __sqrtx(x: f80) callconv(.C) f80 {
// TODO: more efficient implementation
return @floatCast(f80, sqrtq(x));
}
pub fn sqrtq(x: f128) callconv(.C) f128 {
// TODO: more correct implementation
return sqrt(@floatCast(f64, x));
}
pub fn sqrtl(x: c_longdouble) callconv(.C) c_longdouble {
switch (@typeInfo(c_longdouble).Float.bits) {
16 => return __sqrth(x),
32 => return sqrtf(x),
64 => return sqrt(x),
80 => return __sqrtx(x),
128 => return sqrtq(x),
else => @compileError("unreachable"),
}
}
test "sqrtf" {
const V = [_]f32{
0.0,
4.089288054930154,
7.538757127071935,
8.97780793672623,
5.304443821913729,
5.682408965311888,
0.5846878579110049,
3.650338664297043,
0.3178091951800732,
7.1505232436382835,
3.6589165881946464,
};
// Note that @sqrt will either generate the sqrt opcode (if supported by the
// target ISA) or a call to `sqrtf` otherwise.
for (V) |val|
try std.testing.expectEqual(@sqrt(val), sqrtf(val));
}
test "sqrtf special" {
try std.testing.expect(math.isPositiveInf(sqrtf(math.inf(f32))));
try std.testing.expect(sqrtf(0.0) == 0.0);
try std.testing.expect(sqrtf(-0.0) == -0.0);
try std.testing.expect(math.isNan(sqrtf(-1.0)));
try std.testing.expect(math.isNan(sqrtf(math.nan(f32))));
}
test "sqrt" {
const V = [_]f64{
0.0,
4.089288054930154,
7.538757127071935,
8.97780793672623,
5.304443821913729,
5.682408965311888,
0.5846878579110049,
3.650338664297043,
0.3178091951800732,
7.1505232436382835,
3.6589165881946464,
};
// Note that @sqrt will either generate the sqrt opcode (if supported by the
// target ISA) or a call to `sqrtf` otherwise.
for (V) |val|
try std.testing.expectEqual(@sqrt(val), sqrt(val));
}
test "sqrt special" {
try std.testing.expect(math.isPositiveInf(sqrt(math.inf(f64))));
try std.testing.expect(sqrt(0.0) == 0.0);
try std.testing.expect(sqrt(-0.0) == -0.0);
try std.testing.expect(math.isNan(sqrt(-1.0)));
try std.testing.expect(math.isNan(sqrt(math.nan(f64))));
}