This commit is contained in:
Auguste Rame 2023-02-08 23:44:16 -05:00
commit f7508ba226
No known key found for this signature in database
GPG Key ID: 3A5E3F90DF2AAEFE
3 changed files with 314 additions and 0 deletions

288
DiffMatchPatch.zig Normal file
View File

@ -0,0 +1,288 @@
const DiffMatchPatch = @This();
const std = @import("std");
const ArrayListUnmanaged = std.ArrayListUnmanaged;
/// DMP with default configuration options
pub const default = DiffMatchPatch{};
pub const Diff = struct {
pub const Operation = enum {
insert,
delete,
equal,
};
operation: Operation,
text: []const u8,
};
/// Number of microseconds to map a diff before giving up (0 for infinity).
diff_timeout: i64 = 1 * std.time.us_per_s,
/// Cost of an empty edit operation in terms of edit characters.
diff_edit_cost: u16 = 4,
/// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
match_threshold: f32 = 0.5,
/// How far to search for a match (0 = exact location, 1000+ = broad match).
/// A match this many characters away from the expected location will add
/// 1.0 to the score (0.0 is a perfect match).
match_distance: u32 = 1000,
/// The number of bits in an int.
match_max_bits: u16 = 32,
/// When deleting a large block of text (over ~64 characters), how close
/// do the contents have to be to match the expected contents. (0.0 =
/// perfection, 1.0 = very loose). Note that Match_Threshold controls
/// how closely the end points of a delete need to match.
patch_delete_threshold: f32 = 0.5,
/// Chunk size for context length.
patch_margin: u16 = 4,
pub const DiffError = error{OutOfMemory};
/// It is recommended that you use an Arena for this operation.
pub fn diff(
dmp: DiffMatchPatch,
allocator: std.mem.Allocator,
before: []const u8,
after: []const u8,
/// If false, then don't run a line-level diff first
/// to identify the changed areas. If true, then run
/// a faster slightly less optimal diff.
check_lines: bool,
) DiffError!ArrayListUnmanaged(Diff) {
const deadline = std.time.microTimestamp() + dmp.diff_timeout;
return dmp.diffInternal(allocator, before, after, check_lines, deadline);
}
fn diffInternal(
dmp: DiffMatchPatch,
allocator: std.mem.Allocator,
before: []const u8,
after: []const u8,
check_lines: bool,
deadline: u64,
) DiffError!ArrayListUnmanaged(Diff) {
// Check for equality (speedup).
var diffs = ArrayListUnmanaged(Diff){};
if (std.mem.eql(u8, before, after)) {
if (before.len != 0) {
diffs.append(allocator, Diff{ .operation = .equal, .text = before });
}
return diffs;
}
// Trim off common prefix (speedup).
var common_length = diffCommonPrefix(before, after);
const common_prefix = before[0..common_length];
var trimmed_before = before[common_length..];
var trimmed_after = after[common_length..];
// Trim off common suffix (speedup).
common_length = diffCommonSuffix(before, after);
var common_suffix = before[before.len - common_length ..];
trimmed_before = trimmed_before[0 .. before.len - common_length];
trimmed_after = trimmed_after[0 .. after.len - common_length];
// Compute the diff on the middle block.
diffs = try dmp.diffCompute(allocator, before, after, checklines, deadline);
// Restore the prefix and suffix.
if (common_prefix.len != 0) {
try diffs.insert(allocator, 0, Diff{ .operation = .equal, .text = common_prefix });
}
if (common_suffix.len != 0) {
try diffs.append(allocator, Diff{ .operation = .equal, .text = common_suffix });
}
diffCleanupMerge(diffs);
return diffs;
}
fn diffCommonPrefix(before: []const u8, after: []const u8) usize {
const n = std.math.min(before.len, after.len);
var i: usize = 0;
while (i < n) : (i += 1) {
if (before[i] != after[i]) {
return i;
}
}
return n;
}
fn diffCommonSuffix(before: []const u8, after: []const u8) usize {
const n = std.math.min(before.len, after.len);
var i: usize = 1;
while (i <= n) : (i += 1) {
if (before[before.len - i] != after[after.len - i]) {
return i - 1;
}
}
return n;
}
fn diffCompute(
dmp: DiffMatchPatch,
allocator: std.mem.Allocator,
before: []const u8,
after: []const u8,
check_lines: bool,
deadline: u64,
) DiffError!ArrayListUnmanaged(Diff) {
var diffs = ArrayListUnmanaged(Diff){};
if (before.len == 0) {
// Just add some text (speedup).
try diffs.append(allocator, Diff{ .operation = .insert, .text = after });
return diffs;
}
if (after.len == 0) {
// Just delete some text (speedup).
try diffs.append(allocator, Diff{ .operation = .delete, .text = before });
return diffs;
}
const long_text = if (before.len > after.len) before else after;
const short_text = if (before.len > after.len) after else before;
var short_text_in_long_text_index = std.mem.indexOf(u8, long_text, short_text);
if (short_text_in_long_text_index) |index| {
// Shorter text is inside the longer text (speedup).
const op: Diff.Operation = if (before.len > after.len)
.delete
else
.insert;
try diffs.insert(allocator, Diff{ .operation = op, .text = long_text[0..index] });
try diffs.insert(allocator, Diff{ .operation = .equal, .text = short_text });
try diffs.insert(allocator, Diff{ .operation = op, .text = long_text[index + short_text.len ..] });
return diffs;
}
if (short_text.len == 1) {
// Single character string.
// After the previous speedup, the character can't be an equality.
try diffs.insert(allocator, Diff{ .operation = .delete, .text = before });
try diffs.insert(allocator, Diff{ .operation = .insert, .text = after });
return diffs;
}
// Check to see if the problem can be split in two.
var maybe_half_match = dmp.diffHalfMatch(allocator, before, after);
if (maybe_half_match) |half_match| {
// A half-match was found, sort out the return data.
// Send both pairs off for separate processing.
var diffs_a = try dmp.diffInternal(allocator, half_match.prefix_before, half_match.prefix_after, check_lines, deadline);
var diffs_b = try dmp.diffInternal(allocator, half_match.suffix_before, half_match.suffix_after, check_lines, deadline);
defer diffs_b.deinit(allocator);
// Merge the results.
diffs = diffs_a;
try diffs.append(allocator, Diff{ .operation = .equal, .text = half_match.common_middle });
try diffs.appendSlice(allocator, diffs_b);
return diffs;
}
if (check_lines and before.len > 100 and after.len > 100) {
return diffLineMode(text1, text2, deadline);
}
return diffBisect(text1, text2, deadline);
}
const HalfMatchResult = ?struct {
prefix_before: []const u8,
suffix_before: []const u8,
prefix_after: []const u8,
suffix_after: []const u8,
common_middle: []const u8,
};
fn diffHalfMatch(
dmp: DiffMatchPatch,
allocator: std.mem.Allocator,
before: []const u8,
after: []const u8,
) DiffError!HalfMatchResult {
if (dmp.diff_timeout <= 0) {
// Don't risk returning a non-optimal diff if we have unlimited time.
return null;
}
const long_text = if (before.len > after.len) before else after;
const short_text = if (before.len > after.len) after else before;
if (long_text.len < 4 or short_text.len * 2 < long_text.len) {
return null; // Pointless.
}
// First check if the second quarter is the seed for a half-match.
var half_match_1 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 3) / 4);
// Check again based on the third quarter.
var half_match_2 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 1) / 2);
var half_match: HalfMatchResult = undefined;
if (half_match_1 == null and half_match_2 == null) {
return null;
} else if (half_match_2 == null) {
half_match = half_match_1.?;
} else if (half_match_1 == null) {
half_match = half_match_2.?;
} else {
// Both matched. Select the longest.
half_match = if (half_match_1.common_midle.len > half_match_2.common_midle.len) half_match_1 else half_match_2;
}
// A half-match was found, sort out the return data.
if (before.len > after.len) {
return half_match;
} else {
return .{
.prefix_before = half_match[2],
.suffix_before = half_match[3],
.prefix_after = half_match[0],
.suffix_after = half_match[1],
.common_middle = half_match[4],
};
}
}
fn diffHalfMatchInternal(
dmp: DiffMatchPatch,
allocator: std.mem.Allocator,
before: []const u8,
after: []const u8,
) DiffError!HalfMatchResult {
// Start with a 1/4 length Substring at position i as a seed.
// string seed = longtext.Substring(i, longtext.Length / 4);
// int j = -1;
// string best_common = string.Empty;
// string best_long_text_a = string.Empty, best_long_text_b = string.Empty;
// string best_short_text_a = string.Empty, best_short_text_b = string.Empty;
// while (j < shorttext.Length && (j = shorttext.IndexOf(seed, j + 1,
// StringComparison.Ordinal)) != -1) {
// int prefixLength = diffCommonPrefix(longtext.Substring(i),
// shorttext.Substring(j));
// int suffixLength = diffCommonSuffix(longtext.Substring(0, i),
// shorttext.Substring(0, j));
// if (best_common.Length < suffixLength + prefixLength) {
// best_common = shorttext.Substring(j - suffixLength, suffixLength)
// + shorttext.Substring(j, prefixLength);
// best_long_text_a = longtext.Substring(0, i - suffixLength);
// best_long_text_b = longtext.Substring(i + prefixLength);
// best_short_text_a = shorttext.Substring(0, j - suffixLength);
// best_short_text_b = shorttext.Substring(j + prefixLength);
// }
// }
// if (best_common.Length * 2 >= longtext.Length) {
// return new string[]{best_long_text_a, best_long_text_b,
// best_short_text_a, best_short_text_b, best_common};
// } else {
// return null;
// }
}

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 diffz authors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

5
README.md Normal file
View File

@ -0,0 +1,5 @@
# diffz
## License
This library is based off of https://github.com/google/diff-match-patch, which is licensed under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0). This library itself is licensed under the MIT License, see `LICENSE`.