51fdbf7f8c
Some performance comparisons to C. We take the fastest time measurement taken across multiple runs. The block hashing functions use the same md5/sha1 methods. ``` Cpu: Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz Gcc: 7.2.1 20171224 Clang: 5.0.1 Zig: 0.1.1.304f6f1d ``` See https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly: ``` gcc -O2 661 Mb/s clang -O2 490 Mb/s zig --release-fast and zig --release-safe 570 Mb/s zig 50 Mb/s ``` See https://www.nayuki.io/page/fast-sha1-hash-implementation-in-x86-assembly: ``` gcc -O2 588 Mb/s clang -O2 563 Mb/s zig --release-fast and zig --release-safe 610 Mb/s zig 21 Mb/s ``` In short, zig provides pretty useful tools for writing this sort of code. We are in the lead against clang (which uses the same LLVM backend) with us being slower only against md5 with GCC.
281 lines
8.8 KiB
Zig
281 lines
8.8 KiB
Zig
const mem = @import("../mem.zig");
|
|
const math = @import("../math/index.zig");
|
|
const endian = @import("../endian.zig");
|
|
const debug = @import("../debug/index.zig");
|
|
|
|
pub const u160 = @IntType(false, 160);
|
|
|
|
const RoundParam = struct {
|
|
a: u32, b: u32, c: u32, d: u32, e: u32, i: u32,
|
|
};
|
|
|
|
fn Rp(a: u32, b: u32, c: u32, d: u32, e: u32, i: u32) -> RoundParam {
|
|
return RoundParam { .a = a, .b = b, .c = c, .d = d, .e = e, .i = i };
|
|
}
|
|
|
|
pub const Sha1 = struct {
|
|
const Self = this;
|
|
|
|
s: [5]u32,
|
|
// Streaming Cache
|
|
buf: [64]u8,
|
|
buf_len: u8,
|
|
total_len: u64,
|
|
|
|
pub fn init() -> Self {
|
|
var d: Self = undefined;
|
|
d.reset();
|
|
return d;
|
|
}
|
|
|
|
pub fn reset(d: &Self) {
|
|
d.s[0] = 0x67452301;
|
|
d.s[1] = 0xEFCDAB89;
|
|
d.s[2] = 0x98BADCFE;
|
|
d.s[3] = 0x10325476;
|
|
d.s[4] = 0xC3D2E1F0;
|
|
d.buf_len = 0;
|
|
d.total_len = 0;
|
|
}
|
|
|
|
pub fn hash(b: []const u8) -> u160 {
|
|
var d = Sha1.init();
|
|
d.update(b);
|
|
return d.final();
|
|
}
|
|
|
|
pub fn update(d: &Self, b: []const u8) {
|
|
var off: usize = 0;
|
|
|
|
// Partial buffer exists from previous update. Copy into buffer then hash.
|
|
if (d.buf_len != 0 and d.buf_len + b.len > 64) {
|
|
off += 64 - d.buf_len;
|
|
mem.copy(u8, d.buf[d.buf_len..], b[0..off]);
|
|
|
|
d.round(d.buf[0..]);
|
|
d.buf_len = 0;
|
|
}
|
|
|
|
// Full middle blocks.
|
|
while (off + 64 < b.len) : (off += 64) {
|
|
d.round(b[off..off + 64]);
|
|
}
|
|
|
|
// Copy any remainder for next pass.
|
|
mem.copy(u8, d.buf[d.buf_len..], b[off..]);
|
|
d.buf_len += u8(b[off..].len);
|
|
|
|
d.total_len += b.len;
|
|
}
|
|
|
|
pub fn final(d: &Self) -> u160 {
|
|
// The buffer here will never be completely full.
|
|
mem.set(u8, d.buf[d.buf_len..], 0);
|
|
|
|
// Append padding bits.
|
|
d.buf[d.buf_len] = 0x80;
|
|
d.buf_len += 1;
|
|
|
|
// > 448 mod 512 so need to add an extra round to wrap around.
|
|
if (64 - d.buf_len < 8) {
|
|
d.round(d.buf[0..]);
|
|
mem.set(u8, d.buf[0..], 0);
|
|
}
|
|
|
|
// Append message length.
|
|
var i: usize = 1;
|
|
var len = d.total_len >> 5;
|
|
d.buf[63] = u8(d.total_len & 0x1f) << 3;
|
|
while (i < 8) : (i += 1) {
|
|
d.buf[63 - i] = u8(len & 0xff);
|
|
len >>= 8;
|
|
}
|
|
|
|
d.round(d.buf[0..]);
|
|
|
|
const r =
|
|
(u160(d.s[0]) << 128) |
|
|
(u160(d.s[1]) << 96) |
|
|
(u160(d.s[2]) << 64) |
|
|
(u160(d.s[3]) << 32) |
|
|
(u160(d.s[4]) << 0);
|
|
|
|
return endian.swapIfBe(u160, r);
|
|
}
|
|
|
|
fn round(d: &Self, b: []const u8) {
|
|
debug.assert(b.len == 64);
|
|
|
|
var s: [16]u32 = undefined;
|
|
|
|
var v: [5]u32 = []u32 {
|
|
d.s[0], d.s[1], d.s[2], d.s[3], d.s[4],
|
|
};
|
|
|
|
const round0a = comptime []RoundParam {
|
|
Rp(0, 1, 2, 3, 4, 0),
|
|
Rp(4, 0, 1, 2, 3, 1),
|
|
Rp(3, 4, 0, 1, 2, 2),
|
|
Rp(2, 3, 4, 0, 1, 3),
|
|
Rp(1, 2, 3, 4, 0, 4),
|
|
Rp(0, 1, 2, 3, 4, 5),
|
|
Rp(4, 0, 1, 2, 3, 6),
|
|
Rp(3, 4, 0, 1, 2, 7),
|
|
Rp(2, 3, 4, 0, 1, 8),
|
|
Rp(1, 2, 3, 4, 0, 9),
|
|
Rp(0, 1, 2, 3, 4, 10),
|
|
Rp(4, 0, 1, 2, 3, 11),
|
|
Rp(3, 4, 0, 1, 2, 12),
|
|
Rp(2, 3, 4, 0, 1, 13),
|
|
Rp(1, 2, 3, 4, 0, 14),
|
|
Rp(0, 1, 2, 3, 4, 15),
|
|
};
|
|
inline for (round0a) |r| {
|
|
s[r.i] = (u32(b[r.i * 4 + 0]) << 24) |
|
|
(u32(b[r.i * 4 + 1]) << 16) |
|
|
(u32(b[r.i * 4 + 2]) << 8) |
|
|
(u32(b[r.i * 4 + 3]) << 0);
|
|
|
|
v[r.e] = v[r.e] +% math.rotl(u32, v[r.a], u32(5)) +% 0x5A827999 +% s[r.i & 0xf]
|
|
+% ((v[r.b] & v[r.c]) | (~v[r.b] & v[r.d]));
|
|
v[r.b] = math.rotl(u32, v[r.b], u32(30));
|
|
}
|
|
|
|
const round0b = comptime []RoundParam {
|
|
Rp(4, 0, 1, 2, 3, 16),
|
|
Rp(3, 4, 0, 1, 2, 17),
|
|
Rp(2, 3, 4, 0, 1, 18),
|
|
Rp(1, 2, 3, 4, 0, 19),
|
|
};
|
|
inline for (round0b) |r| {
|
|
const t = s[(r.i-3) & 0xf] ^ s[(r.i-8) & 0xf] ^ s[(r.i-14) & 0xf] ^ s[(r.i-16) & 0xf];
|
|
s[r.i & 0xf] = math.rotl(u32, t, u32(1));
|
|
|
|
v[r.e] = v[r.e] +% math.rotl(u32, v[r.a], u32(5)) +% 0x5A827999 +% s[r.i & 0xf]
|
|
+% ((v[r.b] & v[r.c]) | (~v[r.b] & v[r.d]));
|
|
v[r.b] = math.rotl(u32, v[r.b], u32(30));
|
|
}
|
|
|
|
const round1 = comptime []RoundParam {
|
|
Rp(0, 1, 2, 3, 4, 20),
|
|
Rp(4, 0, 1, 2, 3, 21),
|
|
Rp(3, 4, 0, 1, 2, 22),
|
|
Rp(2, 3, 4, 0, 1, 23),
|
|
Rp(1, 2, 3, 4, 0, 24),
|
|
Rp(0, 1, 2, 3, 4, 25),
|
|
Rp(4, 0, 1, 2, 3, 26),
|
|
Rp(3, 4, 0, 1, 2, 27),
|
|
Rp(2, 3, 4, 0, 1, 28),
|
|
Rp(1, 2, 3, 4, 0, 29),
|
|
Rp(0, 1, 2, 3, 4, 30),
|
|
Rp(4, 0, 1, 2, 3, 31),
|
|
Rp(3, 4, 0, 1, 2, 32),
|
|
Rp(2, 3, 4, 0, 1, 33),
|
|
Rp(1, 2, 3, 4, 0, 34),
|
|
Rp(0, 1, 2, 3, 4, 35),
|
|
Rp(4, 0, 1, 2, 3, 36),
|
|
Rp(3, 4, 0, 1, 2, 37),
|
|
Rp(2, 3, 4, 0, 1, 38),
|
|
Rp(1, 2, 3, 4, 0, 39),
|
|
};
|
|
inline for (round1) |r| {
|
|
const t = s[(r.i-3) & 0xf] ^ s[(r.i-8) & 0xf] ^ s[(r.i-14) & 0xf] ^ s[(r.i-16) & 0xf];
|
|
s[r.i & 0xf] = math.rotl(u32, t, u32(1));
|
|
|
|
v[r.e] = v[r.e] +% math.rotl(u32, v[r.a], u32(5)) +% 0x6ED9EBA1 +% s[r.i & 0xf]
|
|
+% (v[r.b] ^ v[r.c] ^ v[r.d]);
|
|
v[r.b] = math.rotl(u32, v[r.b], u32(30));
|
|
}
|
|
|
|
const round2 = comptime []RoundParam {
|
|
Rp(0, 1, 2, 3, 4, 40),
|
|
Rp(4, 0, 1, 2, 3, 41),
|
|
Rp(3, 4, 0, 1, 2, 42),
|
|
Rp(2, 3, 4, 0, 1, 43),
|
|
Rp(1, 2, 3, 4, 0, 44),
|
|
Rp(0, 1, 2, 3, 4, 45),
|
|
Rp(4, 0, 1, 2, 3, 46),
|
|
Rp(3, 4, 0, 1, 2, 47),
|
|
Rp(2, 3, 4, 0, 1, 48),
|
|
Rp(1, 2, 3, 4, 0, 49),
|
|
Rp(0, 1, 2, 3, 4, 50),
|
|
Rp(4, 0, 1, 2, 3, 51),
|
|
Rp(3, 4, 0, 1, 2, 52),
|
|
Rp(2, 3, 4, 0, 1, 53),
|
|
Rp(1, 2, 3, 4, 0, 54),
|
|
Rp(0, 1, 2, 3, 4, 55),
|
|
Rp(4, 0, 1, 2, 3, 56),
|
|
Rp(3, 4, 0, 1, 2, 57),
|
|
Rp(2, 3, 4, 0, 1, 58),
|
|
Rp(1, 2, 3, 4, 0, 59),
|
|
};
|
|
inline for (round2) |r| {
|
|
const t = s[(r.i-3) & 0xf] ^ s[(r.i-8) & 0xf] ^ s[(r.i-14) & 0xf] ^ s[(r.i-16) & 0xf];
|
|
s[r.i & 0xf] = math.rotl(u32, t, u32(1));
|
|
|
|
v[r.e] = v[r.e] +% math.rotl(u32, v[r.a], u32(5)) +% 0x8F1BBCDC +% s[r.i & 0xf]
|
|
+% ((v[r.b] & v[r.c]) ^ (v[r.b] & v[r.d]) ^ (v[r.c] & v[r.d]));
|
|
v[r.b] = math.rotl(u32, v[r.b], u32(30));
|
|
}
|
|
|
|
const round3 = comptime []RoundParam {
|
|
Rp(0, 1, 2, 3, 4, 60),
|
|
Rp(4, 0, 1, 2, 3, 61),
|
|
Rp(3, 4, 0, 1, 2, 62),
|
|
Rp(2, 3, 4, 0, 1, 63),
|
|
Rp(1, 2, 3, 4, 0, 64),
|
|
Rp(0, 1, 2, 3, 4, 65),
|
|
Rp(4, 0, 1, 2, 3, 66),
|
|
Rp(3, 4, 0, 1, 2, 67),
|
|
Rp(2, 3, 4, 0, 1, 68),
|
|
Rp(1, 2, 3, 4, 0, 69),
|
|
Rp(0, 1, 2, 3, 4, 70),
|
|
Rp(4, 0, 1, 2, 3, 71),
|
|
Rp(3, 4, 0, 1, 2, 72),
|
|
Rp(2, 3, 4, 0, 1, 73),
|
|
Rp(1, 2, 3, 4, 0, 74),
|
|
Rp(0, 1, 2, 3, 4, 75),
|
|
Rp(4, 0, 1, 2, 3, 76),
|
|
Rp(3, 4, 0, 1, 2, 77),
|
|
Rp(2, 3, 4, 0, 1, 78),
|
|
Rp(1, 2, 3, 4, 0, 79),
|
|
};
|
|
inline for (round3) |r| {
|
|
const t = s[(r.i-3) & 0xf] ^ s[(r.i-8) & 0xf] ^ s[(r.i-14) & 0xf] ^ s[(r.i-16) & 0xf];
|
|
s[r.i & 0xf] = math.rotl(u32, t, u32(1));
|
|
|
|
v[r.e] = v[r.e] +% math.rotl(u32, v[r.a], u32(5)) +% 0xCA62C1D6 +% s[r.i & 0xf]
|
|
+% (v[r.b] ^ v[r.c] ^ v[r.d]);
|
|
v[r.b] = math.rotl(u32, v[r.b], u32(30));
|
|
}
|
|
|
|
d.s[0] +%= v[0];
|
|
d.s[1] +%= v[1];
|
|
d.s[2] +%= v[2];
|
|
d.s[3] +%= v[3];
|
|
d.s[4] +%= v[4];
|
|
}
|
|
};
|
|
|
|
test "sha1 single" {
|
|
debug.assert(0xda39a3ee5e6b4b0d3255bfef95601890afd80709 == Sha1.hash(""));
|
|
debug.assert(0xa9993e364706816aba3e25717850c26c9cd0d89d == Sha1.hash("abc"));
|
|
debug.assert(0xa49b2446a02c645bf419f995b67091253a04a259 == Sha1.hash("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"));
|
|
}
|
|
|
|
test "sha1 streaming" {
|
|
var h = Sha1.init();
|
|
|
|
debug.assert(0xda39a3ee5e6b4b0d3255bfef95601890afd80709 == h.final());
|
|
|
|
h.reset();
|
|
h.update("abc");
|
|
debug.assert(0xa9993e364706816aba3e25717850c26c9cd0d89d == h.final());
|
|
|
|
h.reset();
|
|
h.update("a");
|
|
h.update("b");
|
|
h.update("c");
|
|
debug.assert(0xa9993e364706816aba3e25717850c26c9cd0d89d == h.final());
|
|
}
|