Merge branch 'master' into zig-master

This commit is contained in:
Jimmi Holst Christensen 2020-11-21 16:58:00 +01:00
commit f54d17fef5
2 changed files with 271 additions and 9 deletions

View File

@ -1,24 +1,24 @@
name: CI
on: push
on:
push:
pull_request:
jobs:
test:
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
runs-on: ${{matrix.os}}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v2.3.4
with:
submodules: recursive
- uses: goto-bus-stop/setup-zig@v1.0.0
- uses: goto-bus-stop/setup-zig@v1.2.5
with:
version: 0.7.0
- run: zig build
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: goto-bus-stop/setup-zig@v1.0.0
- uses: actions/checkout@v2.3.4
- uses: goto-bus-stop/setup-zig@v1.2.5
with:
version: 0.7.0
- run: zig fmt --check .

View File

@ -5,6 +5,7 @@ const debug = std.debug;
const heap = std.heap;
const mem = std.mem;
const process = std.process;
const testing = std.testing;
/// An example of what methods should be implemented on an arg iterator.
pub const ExampleArgIterator = struct {
@ -77,3 +78,264 @@ pub const OsIterator = struct {
}
}
};
/// An argument iterator that takes a string and parses it into arguments, simulating
/// how shells split arguments.
pub const ShellIterator = struct {
const Error = error{
DanglingEscape,
QuoteNotClosed,
} || mem.Allocator.Error;
arena: heap.ArenaAllocator,
str: []const u8,
pub fn init(allocator: *mem.Allocator, str: []const u8) ShellIterator {
return .{
.arena = heap.ArenaAllocator.init(allocator),
.str = str,
};
}
pub fn deinit(iter: *ShellIterator) void {
iter.arena.deinit();
}
pub fn next(iter: *ShellIterator) Error!?[]const u8 {
// Whenever possible, this iterator will return slices into `str` instead of
// allocating. Sometimes this is not possible, for example, escaped characters
// have be be unescape, so we need to allocate in this case.
var list = std.ArrayList(u8).init(&iter.arena.allocator);
var start: usize = 0;
var state: enum {
skip_whitespace,
no_quote,
no_quote_escape,
single_quote,
double_quote,
double_quote_escape,
after_quote,
} = .skip_whitespace;
for (iter.str) |c, i| {
switch (state) {
// The state that skips the initial whitespace.
.skip_whitespace => switch (c) {
' ', '\t', '\n' => {},
'\'' => {
start = i + 1;
state = .single_quote;
},
'"' => {
start = i + 1;
state = .double_quote;
},
'\\' => {
start = i + 1;
state = .no_quote_escape;
},
else => {
start = i;
state = .no_quote;
},
},
// The state that parses the none quoted part of a argument.
.no_quote => switch (c) {
// We are doing parsing a none quoted argument when we hit a
// whitespace.
' ', '\t', '\n' => {
defer iter.str = iter.str[i..];
return iter.result(start, i, &list);
},
// Slicing is not possible if a quote starts while parsing none
// quoted args.
// Example:
// ab'cd' -> abcd
'\'' => {
try list.appendSlice(iter.str[start..i]);
start = i + 1;
state = .single_quote;
},
'"' => {
try list.appendSlice(iter.str[start..i]);
start = i + 1;
state = .double_quote;
},
// Slicing is not possible if we need to escape a character.
// Example:
// ab\"d -> ab"d
'\\' => {
try list.appendSlice(iter.str[start..i]);
start = i + 1;
state = .no_quote_escape;
},
else => {},
},
// We're in this state after having parsed the quoted part of an
// argument. This state works mostly the same as .no_quote, but
// is aware, that the last character seen was a quote, which should
// not be part of the argument. This is why you will see `i - 1` here
// instead of just `i` when `iter.str` is sliced.
.after_quote => switch (c) {
' ', '\t', '\n' => {
defer iter.str = iter.str[i..];
return iter.result(start, i - 1, &list);
},
'\'' => {
try list.appendSlice(iter.str[start .. i - 1]);
start = i + 1;
state = .single_quote;
},
'"' => {
try list.appendSlice(iter.str[start .. i - 1]);
start = i + 1;
state = .double_quote;
},
'\\' => {
try list.appendSlice(iter.str[start .. i - 1]);
start = i + 1;
state = .no_quote_escape;
},
else => {
try list.appendSlice(iter.str[start .. i - 1]);
start = i;
state = .no_quote;
},
},
// The states that parse the quoted part of arguments. The only differnece
// between single and double quoted arguments is that single quoted
// arguments ignore escape sequences, while double quoted arguments
// does escaping.
.single_quote => switch (c) {
'\'' => state = .after_quote,
else => {},
},
.double_quote => switch (c) {
'"' => state = .after_quote,
'\\' => {
try list.appendSlice(iter.str[start..i]);
start = i + 1;
state = .double_quote_escape;
},
else => {},
},
// The state we end up when after the escape character (`\`). All these
// states do is transition back into the previous state.
// TODO: Are there any escape sequences that does transform the second
// character into something else? For example, in Zig, `\n` is
// transformed into the line feed ascii character.
.no_quote_escape => switch (c) {
else => state = .no_quote,
},
.double_quote_escape => switch (c) {
else => state = .double_quote,
},
}
}
defer iter.str = iter.str[iter.str.len..];
switch (state) {
.skip_whitespace => return null,
.no_quote => return iter.result(start, iter.str.len, &list),
.after_quote => return iter.result(start, iter.str.len - 1, &list),
.no_quote_escape => return Error.DanglingEscape,
.single_quote,
.double_quote,
.double_quote_escape,
=> return Error.QuoteNotClosed,
}
}
fn result(iter: *ShellIterator, start: usize, end: usize, list: *std.ArrayList(u8)) Error!?[]const u8 {
const res = iter.str[start..end];
// If we already have something in `list` that means that we could not
// parse the argument without allocation. We therefor need to just append
// the rest we have to the list and return that.
if (list.items.len != 0) {
try list.appendSlice(res);
return list.toOwnedSlice();
}
return res;
}
};
fn testShellIteratorOk(str: []const u8, allocations: usize, expect: []const []const u8) void {
var allocator = testing.FailingAllocator.init(testing.allocator, allocations);
var it = ShellIterator.init(&allocator.allocator, str);
defer it.deinit();
for (expect) |e| {
if (it.next()) |actual| {
testing.expect(actual != null);
testing.expectEqualStrings(e, actual.?);
} else |err| testing.expectEqual(@as(anyerror![]const u8, e), err);
}
if (it.next()) |actual| {
testing.expectEqual(@as(?[]const u8, null), actual);
testing.expectEqual(allocations, allocator.allocations);
} else |err| testing.expectEqual(@as(anyerror!void, {}), err);
}
fn testShellIteratorErr(str: []const u8, expect: anyerror) void {
var it = ShellIterator.init(testing.allocator, str);
defer it.deinit();
while (it.next() catch |err| {
testing.expectError(expect, @as(anyerror!void, err));
return;
}) |_| {}
testing.expectError(expect, @as(anyerror!void, {}));
}
test "ShellIterator" {
testShellIteratorOk("a", 0, &[_][]const u8{"a"});
testShellIteratorOk("'a'", 0, &[_][]const u8{"a"});
testShellIteratorOk("\"a\"", 0, &[_][]const u8{"a"});
testShellIteratorOk("a b", 0, &[_][]const u8{ "a", "b" });
testShellIteratorOk("'a' b", 0, &[_][]const u8{ "a", "b" });
testShellIteratorOk("\"a\" b", 0, &[_][]const u8{ "a", "b" });
testShellIteratorOk("a 'b'", 0, &[_][]const u8{ "a", "b" });
testShellIteratorOk("a \"b\"", 0, &[_][]const u8{ "a", "b" });
testShellIteratorOk("'a b'", 0, &[_][]const u8{"a b"});
testShellIteratorOk("\"a b\"", 0, &[_][]const u8{"a b"});
testShellIteratorOk("\"a\"\"b\"", 1, &[_][]const u8{"ab"});
testShellIteratorOk("'a''b'", 1, &[_][]const u8{"ab"});
testShellIteratorOk("'a'b", 1, &[_][]const u8{"ab"});
testShellIteratorOk("a'b'", 1, &[_][]const u8{"ab"});
testShellIteratorOk("a\\ b", 1, &[_][]const u8{"a b"});
testShellIteratorOk("\"a\\ b\"", 1, &[_][]const u8{"a b"});
testShellIteratorOk("'a\\ b'", 0, &[_][]const u8{"a\\ b"});
testShellIteratorOk(" a b ", 0, &[_][]const u8{ "a", "b" });
testShellIteratorOk("\\ \\ ", 0, &[_][]const u8{ " ", " " });
testShellIteratorOk(
\\printf 'run\nuninstall\n'
, 0, &[_][]const u8{ "printf", "run\\nuninstall\\n" });
testShellIteratorOk(
\\setsid -f steam "steam://$action/$id"
, 0, &[_][]const u8{ "setsid", "-f", "steam", "steam://$action/$id" });
testShellIteratorOk(
\\xargs -I% rg --no-heading --no-line-number --only-matching
\\ --case-sensitive --multiline --text --byte-offset '(?-u)%' $@
\\
, 0, &[_][]const u8{
"xargs", "-I%", "rg", "--no-heading",
"--no-line-number", "--only-matching", "--case-sensitive", "--multiline",
"--text", "--byte-offset", "(?-u)%", "$@",
});
testShellIteratorErr("'a", error.QuoteNotClosed);
testShellIteratorErr("'a\\", error.QuoteNotClosed);
testShellIteratorErr("\"a", error.QuoteNotClosed);
testShellIteratorErr("\"a\\", error.QuoteNotClosed);
testShellIteratorErr("a\\", error.DanglingEscape);
}