init
This commit is contained in:
commit
f7508ba226
288
DiffMatchPatch.zig
Normal file
288
DiffMatchPatch.zig
Normal file
|
@ -0,0 +1,288 @@
|
||||||
|
const DiffMatchPatch = @This();
|
||||||
|
|
||||||
|
const std = @import("std");
|
||||||
|
const ArrayListUnmanaged = std.ArrayListUnmanaged;
|
||||||
|
|
||||||
|
/// DMP with default configuration options
|
||||||
|
pub const default = DiffMatchPatch{};
|
||||||
|
|
||||||
|
pub const Diff = struct {
|
||||||
|
pub const Operation = enum {
|
||||||
|
insert,
|
||||||
|
delete,
|
||||||
|
equal,
|
||||||
|
};
|
||||||
|
|
||||||
|
operation: Operation,
|
||||||
|
text: []const u8,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Number of microseconds to map a diff before giving up (0 for infinity).
|
||||||
|
diff_timeout: i64 = 1 * std.time.us_per_s,
|
||||||
|
/// Cost of an empty edit operation in terms of edit characters.
|
||||||
|
diff_edit_cost: u16 = 4,
|
||||||
|
|
||||||
|
/// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
|
||||||
|
match_threshold: f32 = 0.5,
|
||||||
|
/// How far to search for a match (0 = exact location, 1000+ = broad match).
|
||||||
|
/// A match this many characters away from the expected location will add
|
||||||
|
/// 1.0 to the score (0.0 is a perfect match).
|
||||||
|
match_distance: u32 = 1000,
|
||||||
|
/// The number of bits in an int.
|
||||||
|
match_max_bits: u16 = 32,
|
||||||
|
|
||||||
|
/// When deleting a large block of text (over ~64 characters), how close
|
||||||
|
/// do the contents have to be to match the expected contents. (0.0 =
|
||||||
|
/// perfection, 1.0 = very loose). Note that Match_Threshold controls
|
||||||
|
/// how closely the end points of a delete need to match.
|
||||||
|
patch_delete_threshold: f32 = 0.5,
|
||||||
|
/// Chunk size for context length.
|
||||||
|
patch_margin: u16 = 4,
|
||||||
|
|
||||||
|
pub const DiffError = error{OutOfMemory};
|
||||||
|
|
||||||
|
/// It is recommended that you use an Arena for this operation.
|
||||||
|
pub fn diff(
|
||||||
|
dmp: DiffMatchPatch,
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
before: []const u8,
|
||||||
|
after: []const u8,
|
||||||
|
/// If false, then don't run a line-level diff first
|
||||||
|
/// to identify the changed areas. If true, then run
|
||||||
|
/// a faster slightly less optimal diff.
|
||||||
|
check_lines: bool,
|
||||||
|
) DiffError!ArrayListUnmanaged(Diff) {
|
||||||
|
const deadline = std.time.microTimestamp() + dmp.diff_timeout;
|
||||||
|
return dmp.diffInternal(allocator, before, after, check_lines, deadline);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diffInternal(
|
||||||
|
dmp: DiffMatchPatch,
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
before: []const u8,
|
||||||
|
after: []const u8,
|
||||||
|
check_lines: bool,
|
||||||
|
deadline: u64,
|
||||||
|
) DiffError!ArrayListUnmanaged(Diff) {
|
||||||
|
// Check for equality (speedup).
|
||||||
|
var diffs = ArrayListUnmanaged(Diff){};
|
||||||
|
if (std.mem.eql(u8, before, after)) {
|
||||||
|
if (before.len != 0) {
|
||||||
|
diffs.append(allocator, Diff{ .operation = .equal, .text = before });
|
||||||
|
}
|
||||||
|
return diffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim off common prefix (speedup).
|
||||||
|
var common_length = diffCommonPrefix(before, after);
|
||||||
|
const common_prefix = before[0..common_length];
|
||||||
|
var trimmed_before = before[common_length..];
|
||||||
|
var trimmed_after = after[common_length..];
|
||||||
|
|
||||||
|
// Trim off common suffix (speedup).
|
||||||
|
common_length = diffCommonSuffix(before, after);
|
||||||
|
var common_suffix = before[before.len - common_length ..];
|
||||||
|
trimmed_before = trimmed_before[0 .. before.len - common_length];
|
||||||
|
trimmed_after = trimmed_after[0 .. after.len - common_length];
|
||||||
|
|
||||||
|
// Compute the diff on the middle block.
|
||||||
|
diffs = try dmp.diffCompute(allocator, before, after, checklines, deadline);
|
||||||
|
|
||||||
|
// Restore the prefix and suffix.
|
||||||
|
if (common_prefix.len != 0) {
|
||||||
|
try diffs.insert(allocator, 0, Diff{ .operation = .equal, .text = common_prefix });
|
||||||
|
}
|
||||||
|
if (common_suffix.len != 0) {
|
||||||
|
try diffs.append(allocator, Diff{ .operation = .equal, .text = common_suffix });
|
||||||
|
}
|
||||||
|
|
||||||
|
diffCleanupMerge(diffs);
|
||||||
|
return diffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diffCommonPrefix(before: []const u8, after: []const u8) usize {
|
||||||
|
const n = std.math.min(before.len, after.len);
|
||||||
|
var i: usize = 0;
|
||||||
|
|
||||||
|
while (i < n) : (i += 1) {
|
||||||
|
if (before[i] != after[i]) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diffCommonSuffix(before: []const u8, after: []const u8) usize {
|
||||||
|
const n = std.math.min(before.len, after.len);
|
||||||
|
var i: usize = 1;
|
||||||
|
|
||||||
|
while (i <= n) : (i += 1) {
|
||||||
|
if (before[before.len - i] != after[after.len - i]) {
|
||||||
|
return i - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diffCompute(
|
||||||
|
dmp: DiffMatchPatch,
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
before: []const u8,
|
||||||
|
after: []const u8,
|
||||||
|
check_lines: bool,
|
||||||
|
deadline: u64,
|
||||||
|
) DiffError!ArrayListUnmanaged(Diff) {
|
||||||
|
var diffs = ArrayListUnmanaged(Diff){};
|
||||||
|
|
||||||
|
if (before.len == 0) {
|
||||||
|
// Just add some text (speedup).
|
||||||
|
try diffs.append(allocator, Diff{ .operation = .insert, .text = after });
|
||||||
|
return diffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (after.len == 0) {
|
||||||
|
// Just delete some text (speedup).
|
||||||
|
try diffs.append(allocator, Diff{ .operation = .delete, .text = before });
|
||||||
|
return diffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
const long_text = if (before.len > after.len) before else after;
|
||||||
|
const short_text = if (before.len > after.len) after else before;
|
||||||
|
|
||||||
|
var short_text_in_long_text_index = std.mem.indexOf(u8, long_text, short_text);
|
||||||
|
if (short_text_in_long_text_index) |index| {
|
||||||
|
// Shorter text is inside the longer text (speedup).
|
||||||
|
const op: Diff.Operation = if (before.len > after.len)
|
||||||
|
.delete
|
||||||
|
else
|
||||||
|
.insert;
|
||||||
|
try diffs.insert(allocator, Diff{ .operation = op, .text = long_text[0..index] });
|
||||||
|
try diffs.insert(allocator, Diff{ .operation = .equal, .text = short_text });
|
||||||
|
try diffs.insert(allocator, Diff{ .operation = op, .text = long_text[index + short_text.len ..] });
|
||||||
|
return diffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (short_text.len == 1) {
|
||||||
|
// Single character string.
|
||||||
|
// After the previous speedup, the character can't be an equality.
|
||||||
|
try diffs.insert(allocator, Diff{ .operation = .delete, .text = before });
|
||||||
|
try diffs.insert(allocator, Diff{ .operation = .insert, .text = after });
|
||||||
|
return diffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check to see if the problem can be split in two.
|
||||||
|
var maybe_half_match = dmp.diffHalfMatch(allocator, before, after);
|
||||||
|
if (maybe_half_match) |half_match| {
|
||||||
|
// A half-match was found, sort out the return data.
|
||||||
|
|
||||||
|
// Send both pairs off for separate processing.
|
||||||
|
var diffs_a = try dmp.diffInternal(allocator, half_match.prefix_before, half_match.prefix_after, check_lines, deadline);
|
||||||
|
var diffs_b = try dmp.diffInternal(allocator, half_match.suffix_before, half_match.suffix_after, check_lines, deadline);
|
||||||
|
defer diffs_b.deinit(allocator);
|
||||||
|
|
||||||
|
// Merge the results.
|
||||||
|
diffs = diffs_a;
|
||||||
|
try diffs.append(allocator, Diff{ .operation = .equal, .text = half_match.common_middle });
|
||||||
|
try diffs.appendSlice(allocator, diffs_b);
|
||||||
|
return diffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (check_lines and before.len > 100 and after.len > 100) {
|
||||||
|
return diffLineMode(text1, text2, deadline);
|
||||||
|
}
|
||||||
|
|
||||||
|
return diffBisect(text1, text2, deadline);
|
||||||
|
}
|
||||||
|
|
||||||
|
const HalfMatchResult = ?struct {
|
||||||
|
prefix_before: []const u8,
|
||||||
|
suffix_before: []const u8,
|
||||||
|
prefix_after: []const u8,
|
||||||
|
suffix_after: []const u8,
|
||||||
|
common_middle: []const u8,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn diffHalfMatch(
|
||||||
|
dmp: DiffMatchPatch,
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
before: []const u8,
|
||||||
|
after: []const u8,
|
||||||
|
) DiffError!HalfMatchResult {
|
||||||
|
if (dmp.diff_timeout <= 0) {
|
||||||
|
// Don't risk returning a non-optimal diff if we have unlimited time.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const long_text = if (before.len > after.len) before else after;
|
||||||
|
const short_text = if (before.len > after.len) after else before;
|
||||||
|
|
||||||
|
if (long_text.len < 4 or short_text.len * 2 < long_text.len) {
|
||||||
|
return null; // Pointless.
|
||||||
|
}
|
||||||
|
|
||||||
|
// First check if the second quarter is the seed for a half-match.
|
||||||
|
var half_match_1 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 3) / 4);
|
||||||
|
// Check again based on the third quarter.
|
||||||
|
var half_match_2 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 1) / 2);
|
||||||
|
|
||||||
|
var half_match: HalfMatchResult = undefined;
|
||||||
|
if (half_match_1 == null and half_match_2 == null) {
|
||||||
|
return null;
|
||||||
|
} else if (half_match_2 == null) {
|
||||||
|
half_match = half_match_1.?;
|
||||||
|
} else if (half_match_1 == null) {
|
||||||
|
half_match = half_match_2.?;
|
||||||
|
} else {
|
||||||
|
// Both matched. Select the longest.
|
||||||
|
half_match = if (half_match_1.common_midle.len > half_match_2.common_midle.len) half_match_1 else half_match_2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A half-match was found, sort out the return data.
|
||||||
|
if (before.len > after.len) {
|
||||||
|
return half_match;
|
||||||
|
} else {
|
||||||
|
return .{
|
||||||
|
.prefix_before = half_match[2],
|
||||||
|
.suffix_before = half_match[3],
|
||||||
|
.prefix_after = half_match[0],
|
||||||
|
.suffix_after = half_match[1],
|
||||||
|
.common_middle = half_match[4],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diffHalfMatchInternal(
|
||||||
|
dmp: DiffMatchPatch,
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
before: []const u8,
|
||||||
|
after: []const u8,
|
||||||
|
) DiffError!HalfMatchResult {
|
||||||
|
// Start with a 1/4 length Substring at position i as a seed.
|
||||||
|
// string seed = longtext.Substring(i, longtext.Length / 4);
|
||||||
|
// int j = -1;
|
||||||
|
// string best_common = string.Empty;
|
||||||
|
// string best_long_text_a = string.Empty, best_long_text_b = string.Empty;
|
||||||
|
// string best_short_text_a = string.Empty, best_short_text_b = string.Empty;
|
||||||
|
// while (j < shorttext.Length && (j = shorttext.IndexOf(seed, j + 1,
|
||||||
|
// StringComparison.Ordinal)) != -1) {
|
||||||
|
// int prefixLength = diffCommonPrefix(longtext.Substring(i),
|
||||||
|
// shorttext.Substring(j));
|
||||||
|
// int suffixLength = diffCommonSuffix(longtext.Substring(0, i),
|
||||||
|
// shorttext.Substring(0, j));
|
||||||
|
// if (best_common.Length < suffixLength + prefixLength) {
|
||||||
|
// best_common = shorttext.Substring(j - suffixLength, suffixLength)
|
||||||
|
// + shorttext.Substring(j, prefixLength);
|
||||||
|
// best_long_text_a = longtext.Substring(0, i - suffixLength);
|
||||||
|
// best_long_text_b = longtext.Substring(i + prefixLength);
|
||||||
|
// best_short_text_a = shorttext.Substring(0, j - suffixLength);
|
||||||
|
// best_short_text_b = shorttext.Substring(j + prefixLength);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// if (best_common.Length * 2 >= longtext.Length) {
|
||||||
|
// return new string[]{best_long_text_a, best_long_text_b,
|
||||||
|
// best_short_text_a, best_short_text_b, best_common};
|
||||||
|
// } else {
|
||||||
|
// return null;
|
||||||
|
// }
|
||||||
|
}
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2023 diffz authors
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
5
README.md
Normal file
5
README.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# diffz
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This library is based off of https://github.com/google/diff-match-patch, which is licensed under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0). This library itself is licensed under the MIT License, see `LICENSE`.
|
Loading…
Reference in New Issue
Block a user