diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index daa9885..4807c4b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,24 +1,24 @@ name: CI -on: push +on: + push: + pull_request: + jobs: test: - strategy: - matrix: - os: [ubuntu-latest, macos-latest] - runs-on: ${{matrix.os}} + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2.3.4 with: submodules: recursive - - uses: goto-bus-stop/setup-zig@v1.0.0 + - uses: goto-bus-stop/setup-zig@v1.2.5 with: version: 0.7.0 - run: zig build lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - uses: goto-bus-stop/setup-zig@v1.0.0 + - uses: actions/checkout@v2.3.4 + - uses: goto-bus-stop/setup-zig@v1.2.5 with: version: 0.7.0 - run: zig fmt --check . diff --git a/clap/args.zig b/clap/args.zig index 52626fc..2868050 100644 --- a/clap/args.zig +++ b/clap/args.zig @@ -5,6 +5,7 @@ const debug = std.debug; const heap = std.heap; const mem = std.mem; const process = std.process; +const testing = std.testing; /// An example of what methods should be implemented on an arg iterator. pub const ExampleArgIterator = struct { @@ -77,3 +78,264 @@ pub const OsIterator = struct { } } }; + +/// An argument iterator that takes a string and parses it into arguments, simulating +/// how shells split arguments. +pub const ShellIterator = struct { + const Error = error{ + DanglingEscape, + QuoteNotClosed, + } || mem.Allocator.Error; + + arena: heap.ArenaAllocator, + str: []const u8, + + pub fn init(allocator: *mem.Allocator, str: []const u8) ShellIterator { + return .{ + .arena = heap.ArenaAllocator.init(allocator), + .str = str, + }; + } + + pub fn deinit(iter: *ShellIterator) void { + iter.arena.deinit(); + } + + pub fn next(iter: *ShellIterator) Error!?[]const u8 { + // Whenever possible, this iterator will return slices into `str` instead of + // allocating. Sometimes this is not possible, for example, escaped characters + // have be be unescape, so we need to allocate in this case. + var list = std.ArrayList(u8).init(&iter.arena.allocator); + var start: usize = 0; + var state: enum { + skip_whitespace, + no_quote, + no_quote_escape, + single_quote, + double_quote, + double_quote_escape, + after_quote, + } = .skip_whitespace; + + for (iter.str) |c, i| { + switch (state) { + // The state that skips the initial whitespace. + .skip_whitespace => switch (c) { + ' ', '\t', '\n' => {}, + '\'' => { + start = i + 1; + state = .single_quote; + }, + '"' => { + start = i + 1; + state = .double_quote; + }, + '\\' => { + start = i + 1; + state = .no_quote_escape; + }, + else => { + start = i; + state = .no_quote; + }, + }, + + // The state that parses the none quoted part of a argument. + .no_quote => switch (c) { + // We are doing parsing a none quoted argument when we hit a + // whitespace. + ' ', '\t', '\n' => { + defer iter.str = iter.str[i..]; + return iter.result(start, i, &list); + }, + + // Slicing is not possible if a quote starts while parsing none + // quoted args. + // Example: + // ab'cd' -> abcd + '\'' => { + try list.appendSlice(iter.str[start..i]); + start = i + 1; + state = .single_quote; + }, + '"' => { + try list.appendSlice(iter.str[start..i]); + start = i + 1; + state = .double_quote; + }, + + // Slicing is not possible if we need to escape a character. + // Example: + // ab\"d -> ab"d + '\\' => { + try list.appendSlice(iter.str[start..i]); + start = i + 1; + state = .no_quote_escape; + }, + else => {}, + }, + + // We're in this state after having parsed the quoted part of an + // argument. This state works mostly the same as .no_quote, but + // is aware, that the last character seen was a quote, which should + // not be part of the argument. This is why you will see `i - 1` here + // instead of just `i` when `iter.str` is sliced. + .after_quote => switch (c) { + ' ', '\t', '\n' => { + defer iter.str = iter.str[i..]; + return iter.result(start, i - 1, &list); + }, + '\'' => { + try list.appendSlice(iter.str[start .. i - 1]); + start = i + 1; + state = .single_quote; + }, + '"' => { + try list.appendSlice(iter.str[start .. i - 1]); + start = i + 1; + state = .double_quote; + }, + '\\' => { + try list.appendSlice(iter.str[start .. i - 1]); + start = i + 1; + state = .no_quote_escape; + }, + else => { + try list.appendSlice(iter.str[start .. i - 1]); + start = i; + state = .no_quote; + }, + }, + + // The states that parse the quoted part of arguments. The only differnece + // between single and double quoted arguments is that single quoted + // arguments ignore escape sequences, while double quoted arguments + // does escaping. + .single_quote => switch (c) { + '\'' => state = .after_quote, + else => {}, + }, + .double_quote => switch (c) { + '"' => state = .after_quote, + '\\' => { + try list.appendSlice(iter.str[start..i]); + start = i + 1; + state = .double_quote_escape; + }, + else => {}, + }, + + // The state we end up when after the escape character (`\`). All these + // states do is transition back into the previous state. + // TODO: Are there any escape sequences that does transform the second + // character into something else? For example, in Zig, `\n` is + // transformed into the line feed ascii character. + .no_quote_escape => switch (c) { + else => state = .no_quote, + }, + .double_quote_escape => switch (c) { + else => state = .double_quote, + }, + } + } + + defer iter.str = iter.str[iter.str.len..]; + switch (state) { + .skip_whitespace => return null, + .no_quote => return iter.result(start, iter.str.len, &list), + .after_quote => return iter.result(start, iter.str.len - 1, &list), + .no_quote_escape => return Error.DanglingEscape, + .single_quote, + .double_quote, + .double_quote_escape, + => return Error.QuoteNotClosed, + } + } + + fn result(iter: *ShellIterator, start: usize, end: usize, list: *std.ArrayList(u8)) Error!?[]const u8 { + const res = iter.str[start..end]; + + // If we already have something in `list` that means that we could not + // parse the argument without allocation. We therefor need to just append + // the rest we have to the list and return that. + if (list.items.len != 0) { + try list.appendSlice(res); + return list.toOwnedSlice(); + } + return res; + } +}; + +fn testShellIteratorOk(str: []const u8, allocations: usize, expect: []const []const u8) void { + var allocator = testing.FailingAllocator.init(testing.allocator, allocations); + var it = ShellIterator.init(&allocator.allocator, str); + defer it.deinit(); + + for (expect) |e| { + if (it.next()) |actual| { + testing.expect(actual != null); + testing.expectEqualStrings(e, actual.?); + } else |err| testing.expectEqual(@as(anyerror![]const u8, e), err); + } + + if (it.next()) |actual| { + testing.expectEqual(@as(?[]const u8, null), actual); + testing.expectEqual(allocations, allocator.allocations); + } else |err| testing.expectEqual(@as(anyerror!void, {}), err); +} + +fn testShellIteratorErr(str: []const u8, expect: anyerror) void { + var it = ShellIterator.init(testing.allocator, str); + defer it.deinit(); + + while (it.next() catch |err| { + testing.expectError(expect, @as(anyerror!void, err)); + return; + }) |_| {} + + testing.expectError(expect, @as(anyerror!void, {})); +} + +test "ShellIterator" { + testShellIteratorOk("a", 0, &[_][]const u8{"a"}); + testShellIteratorOk("'a'", 0, &[_][]const u8{"a"}); + testShellIteratorOk("\"a\"", 0, &[_][]const u8{"a"}); + testShellIteratorOk("a b", 0, &[_][]const u8{ "a", "b" }); + testShellIteratorOk("'a' b", 0, &[_][]const u8{ "a", "b" }); + testShellIteratorOk("\"a\" b", 0, &[_][]const u8{ "a", "b" }); + testShellIteratorOk("a 'b'", 0, &[_][]const u8{ "a", "b" }); + testShellIteratorOk("a \"b\"", 0, &[_][]const u8{ "a", "b" }); + testShellIteratorOk("'a b'", 0, &[_][]const u8{"a b"}); + testShellIteratorOk("\"a b\"", 0, &[_][]const u8{"a b"}); + testShellIteratorOk("\"a\"\"b\"", 1, &[_][]const u8{"ab"}); + testShellIteratorOk("'a''b'", 1, &[_][]const u8{"ab"}); + testShellIteratorOk("'a'b", 1, &[_][]const u8{"ab"}); + testShellIteratorOk("a'b'", 1, &[_][]const u8{"ab"}); + testShellIteratorOk("a\\ b", 1, &[_][]const u8{"a b"}); + testShellIteratorOk("\"a\\ b\"", 1, &[_][]const u8{"a b"}); + testShellIteratorOk("'a\\ b'", 0, &[_][]const u8{"a\\ b"}); + testShellIteratorOk(" a b ", 0, &[_][]const u8{ "a", "b" }); + testShellIteratorOk("\\ \\ ", 0, &[_][]const u8{ " ", " " }); + + testShellIteratorOk( + \\printf 'run\nuninstall\n' + , 0, &[_][]const u8{ "printf", "run\\nuninstall\\n" }); + testShellIteratorOk( + \\setsid -f steam "steam://$action/$id" + , 0, &[_][]const u8{ "setsid", "-f", "steam", "steam://$action/$id" }); + testShellIteratorOk( + \\xargs -I% rg --no-heading --no-line-number --only-matching + \\ --case-sensitive --multiline --text --byte-offset '(?-u)%' $@ + \\ + , 0, &[_][]const u8{ + "xargs", "-I%", "rg", "--no-heading", + "--no-line-number", "--only-matching", "--case-sensitive", "--multiline", + "--text", "--byte-offset", "(?-u)%", "$@", + }); + + testShellIteratorErr("'a", error.QuoteNotClosed); + testShellIteratorErr("'a\\", error.QuoteNotClosed); + testShellIteratorErr("\"a", error.QuoteNotClosed); + testShellIteratorErr("\"a\\", error.QuoteNotClosed); + testShellIteratorErr("a\\", error.DanglingEscape); +}