Merge pull request #3389 from LemonBoy/win32

Win32
This commit is contained in:
Andrew Kelley 2019-10-09 13:22:16 -04:00 committed by GitHub
commit f929a58d5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 176 additions and 103 deletions

View File

@ -248,8 +248,17 @@ comptime {
switch (builtin.arch) {
.i386 => {
@export("_alldiv", @import("compiler_rt/aulldiv.zig")._alldiv, strong_linkage);
@export("_aulldiv", @import("compiler_rt/aulldiv.zig")._aulldiv, strong_linkage);
@export("_allrem", @import("compiler_rt/aullrem.zig")._allrem, strong_linkage);
@export("_aullrem", @import("compiler_rt/aullrem.zig")._aullrem, strong_linkage);
@export("__divti3", @import("compiler_rt/divti3.zig").__divti3, linkage);
@export("__modti3", @import("compiler_rt/modti3.zig").__modti3, linkage);
@export("__multi3", @import("compiler_rt/multi3.zig").__multi3, linkage);
@export("__udivti3", @import("compiler_rt/udivti3.zig").__udivti3, linkage);
@export("__udivmodti4", @import("compiler_rt/udivmodti4.zig").__udivmodti4, linkage);
@export("__umodti3", @import("compiler_rt/umodti3.zig").__umodti3, linkage);
},
.x86_64 => {
// The "ti" functions must use @Vector(2, u64) parameter types to adhere to the ABI

View File

@ -1,55 +1,76 @@
const builtin = @import("builtin");
pub extern stdcallcc fn _alldiv(a: i64, b: i64) i64 {
@setRuntimeSafety(builtin.is_test);
const s_a = a >> (i64.bit_count - 1);
const s_b = b >> (i64.bit_count - 1);
const an = (a ^ s_a) -% s_a;
const bn = (b ^ s_b) -% s_b;
const r = @bitCast(u64, an) / @bitCast(u64, bn);
const s = s_a ^ s_b;
return (@bitCast(i64, r) ^ s) -% s;
}
pub nakedcc fn _aulldiv() void {
@setRuntimeSafety(false);
// The stack layout is:
// ESP+16 divisor (hi)
// ESP+12 divisor (low)
// ESP+8 dividend (hi)
// ESP+4 dividend (low)
// ESP return address
asm volatile (
\\.intel_syntax noprefix
\\
\\ push ebx
\\ push esi
\\ mov eax,dword ptr [esp+18h]
\\ or eax,eax
\\ jne L1
\\ mov ecx,dword ptr [esp+14h]
\\ mov eax,dword ptr [esp+10h]
\\ xor edx,edx
\\ div ecx
\\ mov ebx,eax
\\ mov eax,dword ptr [esp+0Ch]
\\ div ecx
\\ mov edx,ebx
\\ jmp L2
\\ L1:
\\ mov ecx,eax
\\ mov ebx,dword ptr [esp+14h]
\\ mov edx,dword ptr [esp+10h]
\\ mov eax,dword ptr [esp+0Ch]
\\ L3:
\\ shr ecx,1
\\ rcr ebx,1
\\ shr edx,1
\\ rcr eax,1
\\ or ecx,ecx
\\ jne L3
\\ div ebx
\\ mov esi,eax
\\ mul dword ptr [esp+18h]
\\ mov ecx,eax
\\ mov eax,dword ptr [esp+14h]
\\ mul esi
\\ add edx,ecx
\\ jb L4
\\ cmp edx,dword ptr [esp+10h]
\\ ja L4
\\ jb L5
\\ cmp eax,dword ptr [esp+0Ch]
\\ jbe L5
\\ L4:
\\ dec esi
\\ L5:
\\ xor edx,edx
\\ mov eax,esi
\\ L2:
\\ pop esi
\\ pop ebx
\\ ret 10h
\\ push %%ebx
\\ push %%esi
\\ mov 0x18(%%esp),%%eax
\\ or %%eax,%%eax
\\ jne 1f
\\ mov 0x14(%%esp),%%ecx
\\ mov 0x10(%%esp),%%eax
\\ xor %%edx,%%edx
\\ div %%ecx
\\ mov %%eax,%%ebx
\\ mov 0xc(%%esp),%%eax
\\ div %%ecx
\\ mov %%ebx,%%edx
\\ jmp 5f
\\ 1:
\\ mov %%eax,%%ecx
\\ mov 0x14(%%esp),%%ebx
\\ mov 0x10(%%esp),%%edx
\\ mov 0xc(%%esp),%%eax
\\ 2:
\\ shr %%ecx
\\ rcr %%ebx
\\ shr %%edx
\\ rcr %%eax
\\ or %%ecx,%%ecx
\\ jne 2b
\\ div %%ebx
\\ mov %%eax,%%esi
\\ mull 0x18(%%esp)
\\ mov %%eax,%%ecx
\\ mov 0x14(%%esp),%%eax
\\ mul %%esi
\\ add %%ecx,%%edx
\\ jb 3f
\\ cmp 0x10(%%esp),%%edx
\\ ja 3f
\\ jb 4f
\\ cmp 0xc(%%esp),%%eax
\\ jbe 4f
\\ 3:
\\ dec %%esi
\\ 4:
\\ xor %%edx,%%edx
\\ mov %%esi,%%eax
\\ 5:
\\ pop %%esi
\\ pop %%ebx
\\ ret $0x10
);
}

View File

@ -1,56 +1,77 @@
const builtin = @import("builtin");
pub extern stdcallcc fn _allrem(a: i64, b: i64) i64 {
@setRuntimeSafety(builtin.is_test);
const s_a = a >> (i64.bit_count - 1);
const s_b = b >> (i64.bit_count - 1);
const an = (a ^ s_a) -% s_a;
const bn = (b ^ s_b) -% s_b;
const r = @bitCast(u64, an) % @bitCast(u64, bn);
const s = s_a ^ s_b;
return (@bitCast(i64, r) ^ s) -% s;
}
pub nakedcc fn _aullrem() void {
@setRuntimeSafety(false);
// The stack layout is:
// ESP+16 divisor (hi)
// ESP+12 divisor (low)
// ESP+8 dividend (hi)
// ESP+4 dividend (low)
// ESP return address
asm volatile (
\\.intel_syntax noprefix
\\
\\ push ebx
\\ mov eax,dword ptr [esp+14h]
\\ or eax,eax
\\ jne L1a
\\ mov ecx,dword ptr [esp+10h]
\\ mov eax,dword ptr [esp+0Ch]
\\ xor edx,edx
\\ div ecx
\\ mov eax,dword ptr [esp+8]
\\ div ecx
\\ mov eax,edx
\\ xor edx,edx
\\ jmp L2a
\\ L1a:
\\ mov ecx,eax
\\ mov ebx,dword ptr [esp+10h]
\\ mov edx,dword ptr [esp+0Ch]
\\ mov eax,dword ptr [esp+8]
\\ L3a:
\\ shr ecx,1
\\ rcr ebx,1
\\ shr edx,1
\\ rcr eax,1
\\ or ecx,ecx
\\ jne L3a
\\ div ebx
\\ mov ecx,eax
\\ mul dword ptr [esp+14h]
\\ xchg eax,ecx
\\ mul dword ptr [esp+10h]
\\ add edx,ecx
\\ jb L4a
\\ cmp edx,dword ptr [esp+0Ch]
\\ ja L4a
\\ jb L5a
\\ cmp eax,dword ptr [esp+8]
\\ jbe L5a
\\ L4a:
\\ sub eax,dword ptr [esp+10h]
\\ sbb edx,dword ptr [esp+14h]
\\ L5a:
\\ sub eax,dword ptr [esp+8]
\\ sbb edx,dword ptr [esp+0Ch]
\\ neg edx
\\ neg eax
\\ sbb edx,0
\\ L2a:
\\ pop ebx
\\ ret 10h
\\ push %%ebx
\\ mov 0x14(%%esp),%%eax
\\ or %%eax,%%eax
\\ jne 1f
\\ mov 0x10(%%esp),%%ecx
\\ mov 0xc(%%esp),%%eax
\\ xor %%edx,%%edx
\\ div %%ecx
\\ mov 0x8(%%esp),%%eax
\\ div %%ecx
\\ mov %%edx,%%eax
\\ xor %%edx,%%edx
\\ jmp 6f
\\ 1:
\\ mov %%eax,%%ecx
\\ mov 0x10(%%esp),%%ebx
\\ mov 0xc(%%esp),%%edx
\\ mov 0x8(%%esp),%%eax
\\ 2:
\\ shr %%ecx
\\ rcr %%ebx
\\ shr %%edx
\\ rcr %%eax
\\ or %%ecx,%%ecx
\\ jne 2b
\\ div %%ebx
\\ mov %%eax,%%ecx
\\ mull 0x14(%%esp)
\\ xchg %%eax,%%ecx
\\ mull 0x10(%%esp)
\\ add %%ecx,%%edx
\\ jb 3f
\\ cmp 0xc(%%esp),%%edx
\\ ja 3f
\\ jb 4f
\\ cmp 0x8(%%esp),%%eax
\\ jbe 4f
\\ 3:
\\ sub 0x10(%%esp),%%eax
\\ sbb 0x14(%%esp),%%edx
\\ 4:
\\ sub 0x8(%%esp),%%eax
\\ sbb 0xc(%%esp),%%edx
\\ neg %%edx
\\ neg %%eax
\\ sbb $0x0,%%edx
\\ 6:
\\ pop %%ebx
\\ ret $0x10
);
}

View File

@ -1,3 +1,4 @@
const builtin = @import("builtin");
const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2;
const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2;
const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2;
@ -87,7 +88,10 @@ test "extenddftf2" {
test "extendhfsf2" {
test__extendhfsf2(0x7e00, 0x7fc00000); // qNaN
test__extendhfsf2(0x7f00, 0x7fe00000); // sNaN
test__extendhfsf2(0x7c01, 0x7f802000); // sNaN
// On x86 the NaN becomes quiet because the return is pushed on the x87
// stack due to ABI requirements
if (builtin.arch != .i386 and builtin.os == .windows)
test__extendhfsf2(0x7c01, 0x7f802000); // sNaN
test__extendhfsf2(0, 0); // 0
test__extendhfsf2(0x8000, 0x80000000); // -0

View File

@ -1,4 +1,5 @@
const std = @import("std");
const builtin = @import("builtin");
export var _tls_index: u32 = std.os.windows.TLS_OUT_OF_INDEXES;
export var _tls_start: u8 linksection(".tls") = 0;
@ -6,6 +7,17 @@ export var _tls_end: u8 linksection(".tls$ZZZ") = 0;
export var __xl_a: std.os.windows.PIMAGE_TLS_CALLBACK linksection(".CRT$XLA") = null;
export var __xl_z: std.os.windows.PIMAGE_TLS_CALLBACK linksection(".CRT$XLZ") = null;
comptime {
if (builtin.arch == .i386) {
// The __tls_array is the offset of the ThreadLocalStoragePointer field
// in the TEB block whose base address held in the %fs segment.
asm (
\\ .global __tls_array
\\ __tls_array = 0x2C
);
}
}
// TODO this is how I would like it to be expressed
// TODO also note, ReactOS has a +1 on StartAddressOfRawData and AddressOfCallBacks. Investigate
// why they do that.

View File

@ -913,7 +913,10 @@ bool want_first_arg_sret(CodeGen *g, FnTypeId *fn_type_id) {
if (type_is_c_abi_int(g, fn_type_id->return_type)) {
return false;
}
if (g->zig_target->arch == ZigLLVM_x86_64) {
if (g->zig_target->arch == ZigLLVM_x86) {
X64CABIClass abi_class = type_c_abi_x86_64_class(g, fn_type_id->return_type);
return abi_class == X64CABIClass_MEMORY;
} else if (g->zig_target->arch == ZigLLVM_x86_64) {
X64CABIClass abi_class = type_c_abi_x86_64_class(g, fn_type_id->return_type);
return abi_class == X64CABIClass_MEMORY;
} else if (target_is_arm(g->zig_target) || target_is_riscv(g->zig_target)) {

View File

@ -8727,6 +8727,9 @@ static void init(CodeGen *g) {
// Be aware of https://github.com/ziglang/zig/issues/3275
target_specific_cpu_args = "";
target_specific_features = riscv_default_features;
} else if (g->zig_target->arch == ZigLLVM_x86) {
target_specific_cpu_args = "pentium4";
target_specific_features = "";
} else {
target_specific_cpu_args = "";
target_specific_features = "";