diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index 638a9bb60..902f2da2f 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -248,8 +248,17 @@ comptime { switch (builtin.arch) { .i386 => { + @export("_alldiv", @import("compiler_rt/aulldiv.zig")._alldiv, strong_linkage); @export("_aulldiv", @import("compiler_rt/aulldiv.zig")._aulldiv, strong_linkage); + @export("_allrem", @import("compiler_rt/aullrem.zig")._allrem, strong_linkage); @export("_aullrem", @import("compiler_rt/aullrem.zig")._aullrem, strong_linkage); + + @export("__divti3", @import("compiler_rt/divti3.zig").__divti3, linkage); + @export("__modti3", @import("compiler_rt/modti3.zig").__modti3, linkage); + @export("__multi3", @import("compiler_rt/multi3.zig").__multi3, linkage); + @export("__udivti3", @import("compiler_rt/udivti3.zig").__udivti3, linkage); + @export("__udivmodti4", @import("compiler_rt/udivmodti4.zig").__udivmodti4, linkage); + @export("__umodti3", @import("compiler_rt/umodti3.zig").__umodti3, linkage); }, .x86_64 => { // The "ti" functions must use @Vector(2, u64) parameter types to adhere to the ABI diff --git a/lib/std/special/compiler_rt/aulldiv.zig b/lib/std/special/compiler_rt/aulldiv.zig index d99bc94ff..dfca4f4c4 100644 --- a/lib/std/special/compiler_rt/aulldiv.zig +++ b/lib/std/special/compiler_rt/aulldiv.zig @@ -1,55 +1,76 @@ +const builtin = @import("builtin"); + +pub extern stdcallcc fn _alldiv(a: i64, b: i64) i64 { + @setRuntimeSafety(builtin.is_test); + const s_a = a >> (i64.bit_count - 1); + const s_b = b >> (i64.bit_count - 1); + + const an = (a ^ s_a) -% s_a; + const bn = (b ^ s_b) -% s_b; + + const r = @bitCast(u64, an) / @bitCast(u64, bn); + const s = s_a ^ s_b; + return (@bitCast(i64, r) ^ s) -% s; +} + pub nakedcc fn _aulldiv() void { @setRuntimeSafety(false); + + // The stack layout is: + // ESP+16 divisor (hi) + // ESP+12 divisor (low) + // ESP+8 dividend (hi) + // ESP+4 dividend (low) + // ESP return address + asm volatile ( - \\.intel_syntax noprefix - \\ - \\ push ebx - \\ push esi - \\ mov eax,dword ptr [esp+18h] - \\ or eax,eax - \\ jne L1 - \\ mov ecx,dword ptr [esp+14h] - \\ mov eax,dword ptr [esp+10h] - \\ xor edx,edx - \\ div ecx - \\ mov ebx,eax - \\ mov eax,dword ptr [esp+0Ch] - \\ div ecx - \\ mov edx,ebx - \\ jmp L2 - \\ L1: - \\ mov ecx,eax - \\ mov ebx,dword ptr [esp+14h] - \\ mov edx,dword ptr [esp+10h] - \\ mov eax,dword ptr [esp+0Ch] - \\ L3: - \\ shr ecx,1 - \\ rcr ebx,1 - \\ shr edx,1 - \\ rcr eax,1 - \\ or ecx,ecx - \\ jne L3 - \\ div ebx - \\ mov esi,eax - \\ mul dword ptr [esp+18h] - \\ mov ecx,eax - \\ mov eax,dword ptr [esp+14h] - \\ mul esi - \\ add edx,ecx - \\ jb L4 - \\ cmp edx,dword ptr [esp+10h] - \\ ja L4 - \\ jb L5 - \\ cmp eax,dword ptr [esp+0Ch] - \\ jbe L5 - \\ L4: - \\ dec esi - \\ L5: - \\ xor edx,edx - \\ mov eax,esi - \\ L2: - \\ pop esi - \\ pop ebx - \\ ret 10h + \\ push %%ebx + \\ push %%esi + \\ mov 0x18(%%esp),%%eax + \\ or %%eax,%%eax + \\ jne 1f + \\ mov 0x14(%%esp),%%ecx + \\ mov 0x10(%%esp),%%eax + \\ xor %%edx,%%edx + \\ div %%ecx + \\ mov %%eax,%%ebx + \\ mov 0xc(%%esp),%%eax + \\ div %%ecx + \\ mov %%ebx,%%edx + \\ jmp 5f + \\ 1: + \\ mov %%eax,%%ecx + \\ mov 0x14(%%esp),%%ebx + \\ mov 0x10(%%esp),%%edx + \\ mov 0xc(%%esp),%%eax + \\ 2: + \\ shr %%ecx + \\ rcr %%ebx + \\ shr %%edx + \\ rcr %%eax + \\ or %%ecx,%%ecx + \\ jne 2b + \\ div %%ebx + \\ mov %%eax,%%esi + \\ mull 0x18(%%esp) + \\ mov %%eax,%%ecx + \\ mov 0x14(%%esp),%%eax + \\ mul %%esi + \\ add %%ecx,%%edx + \\ jb 3f + \\ cmp 0x10(%%esp),%%edx + \\ ja 3f + \\ jb 4f + \\ cmp 0xc(%%esp),%%eax + \\ jbe 4f + \\ 3: + \\ dec %%esi + \\ 4: + \\ xor %%edx,%%edx + \\ mov %%esi,%%eax + \\ 5: + \\ pop %%esi + \\ pop %%ebx + \\ ret $0x10 ); } diff --git a/lib/std/special/compiler_rt/aullrem.zig b/lib/std/special/compiler_rt/aullrem.zig index 51c4eebe3..c1fee7203 100644 --- a/lib/std/special/compiler_rt/aullrem.zig +++ b/lib/std/special/compiler_rt/aullrem.zig @@ -1,56 +1,77 @@ +const builtin = @import("builtin"); + +pub extern stdcallcc fn _allrem(a: i64, b: i64) i64 { + @setRuntimeSafety(builtin.is_test); + const s_a = a >> (i64.bit_count - 1); + const s_b = b >> (i64.bit_count - 1); + + const an = (a ^ s_a) -% s_a; + const bn = (b ^ s_b) -% s_b; + + const r = @bitCast(u64, an) % @bitCast(u64, bn); + const s = s_a ^ s_b; + return (@bitCast(i64, r) ^ s) -% s; +} + pub nakedcc fn _aullrem() void { @setRuntimeSafety(false); + + // The stack layout is: + // ESP+16 divisor (hi) + // ESP+12 divisor (low) + // ESP+8 dividend (hi) + // ESP+4 dividend (low) + // ESP return address + asm volatile ( - \\.intel_syntax noprefix - \\ - \\ push ebx - \\ mov eax,dword ptr [esp+14h] - \\ or eax,eax - \\ jne L1a - \\ mov ecx,dword ptr [esp+10h] - \\ mov eax,dword ptr [esp+0Ch] - \\ xor edx,edx - \\ div ecx - \\ mov eax,dword ptr [esp+8] - \\ div ecx - \\ mov eax,edx - \\ xor edx,edx - \\ jmp L2a - \\ L1a: - \\ mov ecx,eax - \\ mov ebx,dword ptr [esp+10h] - \\ mov edx,dword ptr [esp+0Ch] - \\ mov eax,dword ptr [esp+8] - \\ L3a: - \\ shr ecx,1 - \\ rcr ebx,1 - \\ shr edx,1 - \\ rcr eax,1 - \\ or ecx,ecx - \\ jne L3a - \\ div ebx - \\ mov ecx,eax - \\ mul dword ptr [esp+14h] - \\ xchg eax,ecx - \\ mul dword ptr [esp+10h] - \\ add edx,ecx - \\ jb L4a - \\ cmp edx,dword ptr [esp+0Ch] - \\ ja L4a - \\ jb L5a - \\ cmp eax,dword ptr [esp+8] - \\ jbe L5a - \\ L4a: - \\ sub eax,dword ptr [esp+10h] - \\ sbb edx,dword ptr [esp+14h] - \\ L5a: - \\ sub eax,dword ptr [esp+8] - \\ sbb edx,dword ptr [esp+0Ch] - \\ neg edx - \\ neg eax - \\ sbb edx,0 - \\ L2a: - \\ pop ebx - \\ ret 10h + \\ push %%ebx + \\ mov 0x14(%%esp),%%eax + \\ or %%eax,%%eax + \\ jne 1f + \\ mov 0x10(%%esp),%%ecx + \\ mov 0xc(%%esp),%%eax + \\ xor %%edx,%%edx + \\ div %%ecx + \\ mov 0x8(%%esp),%%eax + \\ div %%ecx + \\ mov %%edx,%%eax + \\ xor %%edx,%%edx + \\ jmp 6f + \\ 1: + \\ mov %%eax,%%ecx + \\ mov 0x10(%%esp),%%ebx + \\ mov 0xc(%%esp),%%edx + \\ mov 0x8(%%esp),%%eax + \\ 2: + \\ shr %%ecx + \\ rcr %%ebx + \\ shr %%edx + \\ rcr %%eax + \\ or %%ecx,%%ecx + \\ jne 2b + \\ div %%ebx + \\ mov %%eax,%%ecx + \\ mull 0x14(%%esp) + \\ xchg %%eax,%%ecx + \\ mull 0x10(%%esp) + \\ add %%ecx,%%edx + \\ jb 3f + \\ cmp 0xc(%%esp),%%edx + \\ ja 3f + \\ jb 4f + \\ cmp 0x8(%%esp),%%eax + \\ jbe 4f + \\ 3: + \\ sub 0x10(%%esp),%%eax + \\ sbb 0x14(%%esp),%%edx + \\ 4: + \\ sub 0x8(%%esp),%%eax + \\ sbb 0xc(%%esp),%%edx + \\ neg %%edx + \\ neg %%eax + \\ sbb $0x0,%%edx + \\ 6: + \\ pop %%ebx + \\ ret $0x10 ); } diff --git a/lib/std/special/compiler_rt/extendXfYf2_test.zig b/lib/std/special/compiler_rt/extendXfYf2_test.zig index 050a79982..6f8111c8f 100644 --- a/lib/std/special/compiler_rt/extendXfYf2_test.zig +++ b/lib/std/special/compiler_rt/extendXfYf2_test.zig @@ -1,3 +1,4 @@ +const builtin = @import("builtin"); const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2; const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2; const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2; @@ -87,7 +88,10 @@ test "extenddftf2" { test "extendhfsf2" { test__extendhfsf2(0x7e00, 0x7fc00000); // qNaN test__extendhfsf2(0x7f00, 0x7fe00000); // sNaN - test__extendhfsf2(0x7c01, 0x7f802000); // sNaN + // On x86 the NaN becomes quiet because the return is pushed on the x87 + // stack due to ABI requirements + if (builtin.arch != .i386 and builtin.os == .windows) + test__extendhfsf2(0x7c01, 0x7f802000); // sNaN test__extendhfsf2(0, 0); // 0 test__extendhfsf2(0x8000, 0x80000000); // -0 diff --git a/lib/std/special/start_windows_tls.zig b/lib/std/special/start_windows_tls.zig index 71165d355..bfd0e4412 100644 --- a/lib/std/special/start_windows_tls.zig +++ b/lib/std/special/start_windows_tls.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); export var _tls_index: u32 = std.os.windows.TLS_OUT_OF_INDEXES; export var _tls_start: u8 linksection(".tls") = 0; @@ -6,6 +7,17 @@ export var _tls_end: u8 linksection(".tls$ZZZ") = 0; export var __xl_a: std.os.windows.PIMAGE_TLS_CALLBACK linksection(".CRT$XLA") = null; export var __xl_z: std.os.windows.PIMAGE_TLS_CALLBACK linksection(".CRT$XLZ") = null; +comptime { + if (builtin.arch == .i386) { + // The __tls_array is the offset of the ThreadLocalStoragePointer field + // in the TEB block whose base address held in the %fs segment. + asm ( + \\ .global __tls_array + \\ __tls_array = 0x2C + ); + } +} + // TODO this is how I would like it to be expressed // TODO also note, ReactOS has a +1 on StartAddressOfRawData and AddressOfCallBacks. Investigate // why they do that. diff --git a/src/analyze.cpp b/src/analyze.cpp index cb3028580..82b09175d 100644 --- a/src/analyze.cpp +++ b/src/analyze.cpp @@ -913,7 +913,10 @@ bool want_first_arg_sret(CodeGen *g, FnTypeId *fn_type_id) { if (type_is_c_abi_int(g, fn_type_id->return_type)) { return false; } - if (g->zig_target->arch == ZigLLVM_x86_64) { + if (g->zig_target->arch == ZigLLVM_x86) { + X64CABIClass abi_class = type_c_abi_x86_64_class(g, fn_type_id->return_type); + return abi_class == X64CABIClass_MEMORY; + } else if (g->zig_target->arch == ZigLLVM_x86_64) { X64CABIClass abi_class = type_c_abi_x86_64_class(g, fn_type_id->return_type); return abi_class == X64CABIClass_MEMORY; } else if (target_is_arm(g->zig_target) || target_is_riscv(g->zig_target)) { diff --git a/src/codegen.cpp b/src/codegen.cpp index 420877c5a..3dbd0b853 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8727,6 +8727,9 @@ static void init(CodeGen *g) { // Be aware of https://github.com/ziglang/zig/issues/3275 target_specific_cpu_args = ""; target_specific_features = riscv_default_features; + } else if (g->zig_target->arch == ZigLLVM_x86) { + target_specific_cpu_args = "pentium4"; + target_specific_features = ""; } else { target_specific_cpu_args = ""; target_specific_features = "";