init
This commit is contained in:
commit
f7508ba226
288
DiffMatchPatch.zig
Normal file
288
DiffMatchPatch.zig
Normal file
|
@ -0,0 +1,288 @@
|
|||
const DiffMatchPatch = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const ArrayListUnmanaged = std.ArrayListUnmanaged;
|
||||
|
||||
/// DMP with default configuration options
|
||||
pub const default = DiffMatchPatch{};
|
||||
|
||||
pub const Diff = struct {
|
||||
pub const Operation = enum {
|
||||
insert,
|
||||
delete,
|
||||
equal,
|
||||
};
|
||||
|
||||
operation: Operation,
|
||||
text: []const u8,
|
||||
};
|
||||
|
||||
/// Number of microseconds to map a diff before giving up (0 for infinity).
|
||||
diff_timeout: i64 = 1 * std.time.us_per_s,
|
||||
/// Cost of an empty edit operation in terms of edit characters.
|
||||
diff_edit_cost: u16 = 4,
|
||||
|
||||
/// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
|
||||
match_threshold: f32 = 0.5,
|
||||
/// How far to search for a match (0 = exact location, 1000+ = broad match).
|
||||
/// A match this many characters away from the expected location will add
|
||||
/// 1.0 to the score (0.0 is a perfect match).
|
||||
match_distance: u32 = 1000,
|
||||
/// The number of bits in an int.
|
||||
match_max_bits: u16 = 32,
|
||||
|
||||
/// When deleting a large block of text (over ~64 characters), how close
|
||||
/// do the contents have to be to match the expected contents. (0.0 =
|
||||
/// perfection, 1.0 = very loose). Note that Match_Threshold controls
|
||||
/// how closely the end points of a delete need to match.
|
||||
patch_delete_threshold: f32 = 0.5,
|
||||
/// Chunk size for context length.
|
||||
patch_margin: u16 = 4,
|
||||
|
||||
pub const DiffError = error{OutOfMemory};
|
||||
|
||||
/// It is recommended that you use an Arena for this operation.
|
||||
pub fn diff(
|
||||
dmp: DiffMatchPatch,
|
||||
allocator: std.mem.Allocator,
|
||||
before: []const u8,
|
||||
after: []const u8,
|
||||
/// If false, then don't run a line-level diff first
|
||||
/// to identify the changed areas. If true, then run
|
||||
/// a faster slightly less optimal diff.
|
||||
check_lines: bool,
|
||||
) DiffError!ArrayListUnmanaged(Diff) {
|
||||
const deadline = std.time.microTimestamp() + dmp.diff_timeout;
|
||||
return dmp.diffInternal(allocator, before, after, check_lines, deadline);
|
||||
}
|
||||
|
||||
fn diffInternal(
|
||||
dmp: DiffMatchPatch,
|
||||
allocator: std.mem.Allocator,
|
||||
before: []const u8,
|
||||
after: []const u8,
|
||||
check_lines: bool,
|
||||
deadline: u64,
|
||||
) DiffError!ArrayListUnmanaged(Diff) {
|
||||
// Check for equality (speedup).
|
||||
var diffs = ArrayListUnmanaged(Diff){};
|
||||
if (std.mem.eql(u8, before, after)) {
|
||||
if (before.len != 0) {
|
||||
diffs.append(allocator, Diff{ .operation = .equal, .text = before });
|
||||
}
|
||||
return diffs;
|
||||
}
|
||||
|
||||
// Trim off common prefix (speedup).
|
||||
var common_length = diffCommonPrefix(before, after);
|
||||
const common_prefix = before[0..common_length];
|
||||
var trimmed_before = before[common_length..];
|
||||
var trimmed_after = after[common_length..];
|
||||
|
||||
// Trim off common suffix (speedup).
|
||||
common_length = diffCommonSuffix(before, after);
|
||||
var common_suffix = before[before.len - common_length ..];
|
||||
trimmed_before = trimmed_before[0 .. before.len - common_length];
|
||||
trimmed_after = trimmed_after[0 .. after.len - common_length];
|
||||
|
||||
// Compute the diff on the middle block.
|
||||
diffs = try dmp.diffCompute(allocator, before, after, checklines, deadline);
|
||||
|
||||
// Restore the prefix and suffix.
|
||||
if (common_prefix.len != 0) {
|
||||
try diffs.insert(allocator, 0, Diff{ .operation = .equal, .text = common_prefix });
|
||||
}
|
||||
if (common_suffix.len != 0) {
|
||||
try diffs.append(allocator, Diff{ .operation = .equal, .text = common_suffix });
|
||||
}
|
||||
|
||||
diffCleanupMerge(diffs);
|
||||
return diffs;
|
||||
}
|
||||
|
||||
fn diffCommonPrefix(before: []const u8, after: []const u8) usize {
|
||||
const n = std.math.min(before.len, after.len);
|
||||
var i: usize = 0;
|
||||
|
||||
while (i < n) : (i += 1) {
|
||||
if (before[i] != after[i]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
fn diffCommonSuffix(before: []const u8, after: []const u8) usize {
|
||||
const n = std.math.min(before.len, after.len);
|
||||
var i: usize = 1;
|
||||
|
||||
while (i <= n) : (i += 1) {
|
||||
if (before[before.len - i] != after[after.len - i]) {
|
||||
return i - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
fn diffCompute(
|
||||
dmp: DiffMatchPatch,
|
||||
allocator: std.mem.Allocator,
|
||||
before: []const u8,
|
||||
after: []const u8,
|
||||
check_lines: bool,
|
||||
deadline: u64,
|
||||
) DiffError!ArrayListUnmanaged(Diff) {
|
||||
var diffs = ArrayListUnmanaged(Diff){};
|
||||
|
||||
if (before.len == 0) {
|
||||
// Just add some text (speedup).
|
||||
try diffs.append(allocator, Diff{ .operation = .insert, .text = after });
|
||||
return diffs;
|
||||
}
|
||||
|
||||
if (after.len == 0) {
|
||||
// Just delete some text (speedup).
|
||||
try diffs.append(allocator, Diff{ .operation = .delete, .text = before });
|
||||
return diffs;
|
||||
}
|
||||
|
||||
const long_text = if (before.len > after.len) before else after;
|
||||
const short_text = if (before.len > after.len) after else before;
|
||||
|
||||
var short_text_in_long_text_index = std.mem.indexOf(u8, long_text, short_text);
|
||||
if (short_text_in_long_text_index) |index| {
|
||||
// Shorter text is inside the longer text (speedup).
|
||||
const op: Diff.Operation = if (before.len > after.len)
|
||||
.delete
|
||||
else
|
||||
.insert;
|
||||
try diffs.insert(allocator, Diff{ .operation = op, .text = long_text[0..index] });
|
||||
try diffs.insert(allocator, Diff{ .operation = .equal, .text = short_text });
|
||||
try diffs.insert(allocator, Diff{ .operation = op, .text = long_text[index + short_text.len ..] });
|
||||
return diffs;
|
||||
}
|
||||
|
||||
if (short_text.len == 1) {
|
||||
// Single character string.
|
||||
// After the previous speedup, the character can't be an equality.
|
||||
try diffs.insert(allocator, Diff{ .operation = .delete, .text = before });
|
||||
try diffs.insert(allocator, Diff{ .operation = .insert, .text = after });
|
||||
return diffs;
|
||||
}
|
||||
|
||||
// Check to see if the problem can be split in two.
|
||||
var maybe_half_match = dmp.diffHalfMatch(allocator, before, after);
|
||||
if (maybe_half_match) |half_match| {
|
||||
// A half-match was found, sort out the return data.
|
||||
|
||||
// Send both pairs off for separate processing.
|
||||
var diffs_a = try dmp.diffInternal(allocator, half_match.prefix_before, half_match.prefix_after, check_lines, deadline);
|
||||
var diffs_b = try dmp.diffInternal(allocator, half_match.suffix_before, half_match.suffix_after, check_lines, deadline);
|
||||
defer diffs_b.deinit(allocator);
|
||||
|
||||
// Merge the results.
|
||||
diffs = diffs_a;
|
||||
try diffs.append(allocator, Diff{ .operation = .equal, .text = half_match.common_middle });
|
||||
try diffs.appendSlice(allocator, diffs_b);
|
||||
return diffs;
|
||||
}
|
||||
|
||||
if (check_lines and before.len > 100 and after.len > 100) {
|
||||
return diffLineMode(text1, text2, deadline);
|
||||
}
|
||||
|
||||
return diffBisect(text1, text2, deadline);
|
||||
}
|
||||
|
||||
const HalfMatchResult = ?struct {
|
||||
prefix_before: []const u8,
|
||||
suffix_before: []const u8,
|
||||
prefix_after: []const u8,
|
||||
suffix_after: []const u8,
|
||||
common_middle: []const u8,
|
||||
};
|
||||
|
||||
fn diffHalfMatch(
|
||||
dmp: DiffMatchPatch,
|
||||
allocator: std.mem.Allocator,
|
||||
before: []const u8,
|
||||
after: []const u8,
|
||||
) DiffError!HalfMatchResult {
|
||||
if (dmp.diff_timeout <= 0) {
|
||||
// Don't risk returning a non-optimal diff if we have unlimited time.
|
||||
return null;
|
||||
}
|
||||
const long_text = if (before.len > after.len) before else after;
|
||||
const short_text = if (before.len > after.len) after else before;
|
||||
|
||||
if (long_text.len < 4 or short_text.len * 2 < long_text.len) {
|
||||
return null; // Pointless.
|
||||
}
|
||||
|
||||
// First check if the second quarter is the seed for a half-match.
|
||||
var half_match_1 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 3) / 4);
|
||||
// Check again based on the third quarter.
|
||||
var half_match_2 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 1) / 2);
|
||||
|
||||
var half_match: HalfMatchResult = undefined;
|
||||
if (half_match_1 == null and half_match_2 == null) {
|
||||
return null;
|
||||
} else if (half_match_2 == null) {
|
||||
half_match = half_match_1.?;
|
||||
} else if (half_match_1 == null) {
|
||||
half_match = half_match_2.?;
|
||||
} else {
|
||||
// Both matched. Select the longest.
|
||||
half_match = if (half_match_1.common_midle.len > half_match_2.common_midle.len) half_match_1 else half_match_2;
|
||||
}
|
||||
|
||||
// A half-match was found, sort out the return data.
|
||||
if (before.len > after.len) {
|
||||
return half_match;
|
||||
} else {
|
||||
return .{
|
||||
.prefix_before = half_match[2],
|
||||
.suffix_before = half_match[3],
|
||||
.prefix_after = half_match[0],
|
||||
.suffix_after = half_match[1],
|
||||
.common_middle = half_match[4],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn diffHalfMatchInternal(
|
||||
dmp: DiffMatchPatch,
|
||||
allocator: std.mem.Allocator,
|
||||
before: []const u8,
|
||||
after: []const u8,
|
||||
) DiffError!HalfMatchResult {
|
||||
// Start with a 1/4 length Substring at position i as a seed.
|
||||
// string seed = longtext.Substring(i, longtext.Length / 4);
|
||||
// int j = -1;
|
||||
// string best_common = string.Empty;
|
||||
// string best_long_text_a = string.Empty, best_long_text_b = string.Empty;
|
||||
// string best_short_text_a = string.Empty, best_short_text_b = string.Empty;
|
||||
// while (j < shorttext.Length && (j = shorttext.IndexOf(seed, j + 1,
|
||||
// StringComparison.Ordinal)) != -1) {
|
||||
// int prefixLength = diffCommonPrefix(longtext.Substring(i),
|
||||
// shorttext.Substring(j));
|
||||
// int suffixLength = diffCommonSuffix(longtext.Substring(0, i),
|
||||
// shorttext.Substring(0, j));
|
||||
// if (best_common.Length < suffixLength + prefixLength) {
|
||||
// best_common = shorttext.Substring(j - suffixLength, suffixLength)
|
||||
// + shorttext.Substring(j, prefixLength);
|
||||
// best_long_text_a = longtext.Substring(0, i - suffixLength);
|
||||
// best_long_text_b = longtext.Substring(i + prefixLength);
|
||||
// best_short_text_a = shorttext.Substring(0, j - suffixLength);
|
||||
// best_short_text_b = shorttext.Substring(j + prefixLength);
|
||||
// }
|
||||
// }
|
||||
// if (best_common.Length * 2 >= longtext.Length) {
|
||||
// return new string[]{best_long_text_a, best_long_text_b,
|
||||
// best_short_text_a, best_short_text_b, best_common};
|
||||
// } else {
|
||||
// return null;
|
||||
// }
|
||||
}
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2023 diffz authors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
5
README.md
Normal file
5
README.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
# diffz
|
||||
|
||||
## License
|
||||
|
||||
This library is based off of https://github.com/google/diff-match-patch, which is licensed under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0). This library itself is licensed under the MIT License, see `LICENSE`.
|
Loading…
Reference in New Issue
Block a user