Merge pull request #3389 from LemonBoy/win32

Win32
2019-10-09 13:22:16 -04:00 · 2019-10-09 13:22:16 -04:00 · f929a58d5f
commit f929a58d5f
parent 0fc97015e2 6a687bda76
7 changed files with 176 additions and 103 deletions
--- a/lib/std/special/compiler_rt.zig
+++ b/lib/std/special/compiler_rt.zig
@ -248,8 +248,17 @@ comptime {

        switch (builtin.arch) {
            .i386 => {
+                @export("_alldiv", @import("compiler_rt/aulldiv.zig")._alldiv, strong_linkage);
                @export("_aulldiv", @import("compiler_rt/aulldiv.zig")._aulldiv, strong_linkage);
+                @export("_allrem", @import("compiler_rt/aullrem.zig")._allrem, strong_linkage);
                @export("_aullrem", @import("compiler_rt/aullrem.zig")._aullrem, strong_linkage);
+
+                @export("__divti3", @import("compiler_rt/divti3.zig").__divti3, linkage);
+                @export("__modti3", @import("compiler_rt/modti3.zig").__modti3, linkage);
+                @export("__multi3", @import("compiler_rt/multi3.zig").__multi3, linkage);
+                @export("__udivti3", @import("compiler_rt/udivti3.zig").__udivti3, linkage);
+                @export("__udivmodti4", @import("compiler_rt/udivmodti4.zig").__udivmodti4, linkage);
+                @export("__umodti3", @import("compiler_rt/umodti3.zig").__umodti3, linkage);
            },
            .x86_64 => {
                // The "ti" functions must use @Vector(2, u64) parameter types to adhere to the ABI
--- a/lib/std/special/compiler_rt/aulldiv.zig
+++ b/lib/std/special/compiler_rt/aulldiv.zig
@ -1,55 +1,76 @@
+const builtin = @import("builtin");
+
+pub extern stdcallcc fn _alldiv(a: i64, b: i64) i64 {
+    @setRuntimeSafety(builtin.is_test);
+    const s_a = a >> (i64.bit_count - 1);
+    const s_b = b >> (i64.bit_count - 1);
+
+    const an = (a ^ s_a) -% s_a;
+    const bn = (b ^ s_b) -% s_b;
+
+    const r = @bitCast(u64, an) / @bitCast(u64, bn);
+    const s = s_a ^ s_b;
+    return (@bitCast(i64, r) ^ s) -% s;
+}
+
 pub nakedcc fn _aulldiv() void {
    @setRuntimeSafety(false);
+
+    // The stack layout is:
+    // ESP+16 divisor (hi)
+    // ESP+12 divisor (low)
+    // ESP+8 dividend (hi)
+    // ESP+4 dividend (low)
+    // ESP   return address
+
    asm volatile (
-        \\.intel_syntax noprefix
-        \\
-        \\         push        ebx
-        \\         push        esi
-        \\         mov         eax,dword ptr [esp+18h]
-        \\         or          eax,eax
-        \\         jne         L1
-        \\         mov         ecx,dword ptr [esp+14h]
-        \\         mov         eax,dword ptr [esp+10h]
-        \\         xor         edx,edx
-        \\         div         ecx
-        \\         mov         ebx,eax
-        \\         mov         eax,dword ptr [esp+0Ch]
-        \\         div         ecx
-        \\         mov         edx,ebx
-        \\         jmp         L2
-        \\ L1:
-        \\         mov         ecx,eax
-        \\         mov         ebx,dword ptr [esp+14h]
-        \\         mov         edx,dword ptr [esp+10h]
-        \\         mov         eax,dword ptr [esp+0Ch]
-        \\ L3:
-        \\         shr         ecx,1
-        \\         rcr         ebx,1
-        \\         shr         edx,1
-        \\         rcr         eax,1
-        \\         or          ecx,ecx
-        \\         jne         L3
-        \\         div         ebx
-        \\         mov         esi,eax
-        \\         mul         dword ptr [esp+18h]
-        \\         mov         ecx,eax
-        \\         mov         eax,dword ptr [esp+14h]
-        \\         mul         esi
-        \\         add         edx,ecx
-        \\         jb          L4
-        \\         cmp         edx,dword ptr [esp+10h]
-        \\         ja          L4
-        \\         jb          L5
-        \\         cmp         eax,dword ptr [esp+0Ch]
-        \\         jbe         L5
-        \\ L4:
-        \\         dec         esi
-        \\ L5:
-        \\         xor         edx,edx
-        \\         mov         eax,esi
-        \\ L2:
-        \\         pop         esi
-        \\         pop         ebx
-        \\         ret         10h
+        \\  push   %%ebx
+        \\  push   %%esi
+        \\  mov    0x18(%%esp),%%eax
+        \\  or     %%eax,%%eax
+        \\  jne    1f
+        \\  mov    0x14(%%esp),%%ecx
+        \\  mov    0x10(%%esp),%%eax
+        \\  xor    %%edx,%%edx
+        \\  div    %%ecx
+        \\  mov    %%eax,%%ebx
+        \\  mov    0xc(%%esp),%%eax
+        \\  div    %%ecx
+        \\  mov    %%ebx,%%edx
+        \\  jmp    5f
+        \\ 1:
+        \\  mov    %%eax,%%ecx
+        \\  mov    0x14(%%esp),%%ebx
+        \\  mov    0x10(%%esp),%%edx
+        \\  mov    0xc(%%esp),%%eax
+        \\ 2:
+        \\  shr    %%ecx
+        \\  rcr    %%ebx
+        \\  shr    %%edx
+        \\  rcr    %%eax
+        \\  or     %%ecx,%%ecx
+        \\  jne    2b
+        \\  div    %%ebx
+        \\  mov    %%eax,%%esi
+        \\  mull   0x18(%%esp)
+        \\  mov    %%eax,%%ecx
+        \\  mov    0x14(%%esp),%%eax
+        \\  mul    %%esi
+        \\  add    %%ecx,%%edx
+        \\  jb     3f
+        \\  cmp    0x10(%%esp),%%edx
+        \\  ja     3f
+        \\  jb     4f
+        \\  cmp    0xc(%%esp),%%eax
+        \\  jbe    4f
+        \\ 3:
+        \\  dec    %%esi
+        \\ 4:
+        \\  xor    %%edx,%%edx
+        \\  mov    %%esi,%%eax
+        \\ 5:
+        \\  pop    %%esi
+        \\  pop    %%ebx
+        \\  ret    $0x10
    );
 }
--- a/lib/std/special/compiler_rt/aullrem.zig
+++ b/lib/std/special/compiler_rt/aullrem.zig
@ -1,56 +1,77 @@
+const builtin = @import("builtin");
+
+pub extern stdcallcc fn _allrem(a: i64, b: i64) i64 {
+    @setRuntimeSafety(builtin.is_test);
+    const s_a = a >> (i64.bit_count - 1);
+    const s_b = b >> (i64.bit_count - 1);
+
+    const an = (a ^ s_a) -% s_a;
+    const bn = (b ^ s_b) -% s_b;
+
+    const r = @bitCast(u64, an) % @bitCast(u64, bn);
+    const s = s_a ^ s_b;
+    return (@bitCast(i64, r) ^ s) -% s;
+}
+
 pub nakedcc fn _aullrem() void {
    @setRuntimeSafety(false);
+
+    // The stack layout is:
+    // ESP+16 divisor (hi)
+    // ESP+12 divisor (low)
+    // ESP+8 dividend (hi)
+    // ESP+4 dividend (low)
+    // ESP   return address
+
    asm volatile (
-        \\.intel_syntax noprefix
-        \\
-        \\         push        ebx
-        \\         mov         eax,dword ptr [esp+14h]
-        \\         or          eax,eax
-        \\         jne         L1a
-        \\         mov         ecx,dword ptr [esp+10h]
-        \\         mov         eax,dword ptr [esp+0Ch]
-        \\         xor         edx,edx
-        \\         div         ecx
-        \\         mov         eax,dword ptr [esp+8]
-        \\         div         ecx
-        \\         mov         eax,edx
-        \\         xor         edx,edx
-        \\         jmp         L2a
-        \\ L1a:
-        \\         mov         ecx,eax
-        \\         mov         ebx,dword ptr [esp+10h]
-        \\         mov         edx,dword ptr [esp+0Ch]
-        \\         mov         eax,dword ptr [esp+8]
-        \\ L3a:
-        \\         shr         ecx,1
-        \\         rcr         ebx,1
-        \\         shr         edx,1
-        \\         rcr         eax,1
-        \\         or          ecx,ecx
-        \\         jne         L3a
-        \\         div         ebx
-        \\         mov         ecx,eax
-        \\         mul         dword ptr [esp+14h]
-        \\         xchg        eax,ecx
-        \\         mul         dword ptr [esp+10h]
-        \\         add         edx,ecx
-        \\         jb          L4a
-        \\         cmp         edx,dword ptr [esp+0Ch]
-        \\         ja          L4a
-        \\         jb          L5a
-        \\         cmp         eax,dword ptr [esp+8]
-        \\         jbe         L5a
-        \\ L4a:
-        \\         sub         eax,dword ptr [esp+10h]
-        \\         sbb         edx,dword ptr [esp+14h]
-        \\ L5a:
-        \\         sub         eax,dword ptr [esp+8]
-        \\         sbb         edx,dword ptr [esp+0Ch]
-        \\         neg         edx
-        \\         neg         eax
-        \\         sbb         edx,0
-        \\ L2a:
-        \\         pop         ebx
-        \\         ret         10h
+        \\  push   %%ebx
+        \\  mov    0x14(%%esp),%%eax
+        \\  or     %%eax,%%eax
+        \\  jne    1f
+        \\  mov    0x10(%%esp),%%ecx
+        \\  mov    0xc(%%esp),%%eax
+        \\  xor    %%edx,%%edx
+        \\  div    %%ecx
+        \\  mov    0x8(%%esp),%%eax
+        \\  div    %%ecx
+        \\  mov    %%edx,%%eax
+        \\  xor    %%edx,%%edx
+        \\  jmp    6f
+        \\ 1:
+        \\  mov    %%eax,%%ecx
+        \\  mov    0x10(%%esp),%%ebx
+        \\  mov    0xc(%%esp),%%edx
+        \\  mov    0x8(%%esp),%%eax
+        \\ 2:
+        \\  shr    %%ecx
+        \\  rcr    %%ebx
+        \\  shr    %%edx
+        \\  rcr    %%eax
+        \\  or     %%ecx,%%ecx
+        \\  jne    2b
+        \\  div    %%ebx
+        \\  mov    %%eax,%%ecx
+        \\  mull   0x14(%%esp)
+        \\  xchg   %%eax,%%ecx
+        \\  mull   0x10(%%esp)
+        \\  add    %%ecx,%%edx
+        \\  jb     3f
+        \\  cmp    0xc(%%esp),%%edx
+        \\  ja     3f
+        \\  jb     4f
+        \\  cmp    0x8(%%esp),%%eax
+        \\  jbe    4f
+        \\ 3:
+        \\  sub    0x10(%%esp),%%eax
+        \\  sbb    0x14(%%esp),%%edx
+        \\ 4:
+        \\  sub    0x8(%%esp),%%eax
+        \\  sbb    0xc(%%esp),%%edx
+        \\  neg    %%edx
+        \\  neg    %%eax
+        \\  sbb    $0x0,%%edx
+        \\ 6:
+        \\  pop    %%ebx
+        \\  ret    $0x10
    );
 }
--- a/lib/std/special/compiler_rt/extendXfYf2_test.zig
+++ b/lib/std/special/compiler_rt/extendXfYf2_test.zig
@ -1,3 +1,4 @@
+const builtin = @import("builtin");
 const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2;
 const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2;
 const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2;
@ -87,7 +88,10 @@ test "extenddftf2" {
 test "extendhfsf2" {
    test__extendhfsf2(0x7e00, 0x7fc00000); // qNaN
    test__extendhfsf2(0x7f00, 0x7fe00000); // sNaN
-    test__extendhfsf2(0x7c01, 0x7f802000); // sNaN
+    // On x86 the NaN becomes quiet because the return is pushed on the x87
+    // stack due to ABI requirements
+    if (builtin.arch != .i386 and builtin.os == .windows)
+        test__extendhfsf2(0x7c01, 0x7f802000); // sNaN

    test__extendhfsf2(0, 0); // 0
    test__extendhfsf2(0x8000, 0x80000000); // -0
--- a/lib/std/special/start_windows_tls.zig
+++ b/lib/std/special/start_windows_tls.zig
@ -1,4 +1,5 @@
 const std = @import("std");
+const builtin = @import("builtin");

 export var _tls_index: u32 = std.os.windows.TLS_OUT_OF_INDEXES;
 export var _tls_start: u8 linksection(".tls") = 0;
@ -6,6 +7,17 @@ export var _tls_end: u8 linksection(".tls$ZZZ") = 0;
 export var __xl_a: std.os.windows.PIMAGE_TLS_CALLBACK linksection(".CRT$XLA") = null;
 export var __xl_z: std.os.windows.PIMAGE_TLS_CALLBACK linksection(".CRT$XLZ") = null;

+comptime {
+    if (builtin.arch == .i386) {
+        // The __tls_array is the offset of the ThreadLocalStoragePointer field
+        // in the TEB block whose base address held in the %fs segment.
+        asm (
+            \\ .global __tls_array
+            \\ __tls_array = 0x2C
+        );
+    }
+}
+
 // TODO this is how I would like it to be expressed
 // TODO also note, ReactOS has a +1 on StartAddressOfRawData and AddressOfCallBacks. Investigate
 // why they do that.
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@ -913,7 +913,10 @@ bool want_first_arg_sret(CodeGen *g, FnTypeId *fn_type_id) {
    if (type_is_c_abi_int(g, fn_type_id->return_type)) {
        return false;
    }
-    if (g->zig_target->arch == ZigLLVM_x86_64) {
+    if (g->zig_target->arch == ZigLLVM_x86) {
+        X64CABIClass abi_class = type_c_abi_x86_64_class(g, fn_type_id->return_type);
+        return abi_class == X64CABIClass_MEMORY;
+    } else if (g->zig_target->arch == ZigLLVM_x86_64) {
        X64CABIClass abi_class = type_c_abi_x86_64_class(g, fn_type_id->return_type);
        return abi_class == X64CABIClass_MEMORY;
    } else if (target_is_arm(g->zig_target) || target_is_riscv(g->zig_target)) {
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@ -8727,6 +8727,9 @@ static void init(CodeGen *g) {
        // Be aware of https://github.com/ziglang/zig/issues/3275
        target_specific_cpu_args = "";
        target_specific_features = riscv_default_features;
+    } else if (g->zig_target->arch == ZigLLVM_x86) {
+        target_specific_cpu_args = "pentium4";
+        target_specific_features = "";
    } else {
        target_specific_cpu_args = "";
        target_specific_features = "";