zig/lib/std/json.zig
Andrew Kelley 0237e7a701 std.io.getStdOut and related fns no longer can error
Thanks to the Windows Process Environment Block, it is possible to
obtain handles to the standard input, output, and error streams without
possibility of failure.
2019-11-13 04:01:40 +00:00

1382 lines
45 KiB
Zig

// JSON parser conforming to RFC8259.
//
// https://tools.ietf.org/html/rfc8259
const std = @import("std.zig");
const debug = std.debug;
const testing = std.testing;
const mem = std.mem;
const maxInt = std.math.maxInt;
pub const WriteStream = @import("json/write_stream.zig").WriteStream;
// A single token slice into the parent string.
//
// Use `token.slice()` on the input at the current position to get the current slice.
pub const Token = struct {
id: Id,
// How many bytes do we skip before counting
offset: u1,
// Whether string contains a \uXXXX sequence and cannot be zero-copied
string_has_escape: bool,
// Whether number is simple and can be represented by an integer (i.e. no `.` or `e`)
number_is_integer: bool,
// How many bytes from the current position behind the start of this token is.
count: usize,
pub const Id = enum {
ObjectBegin,
ObjectEnd,
ArrayBegin,
ArrayEnd,
String,
Number,
True,
False,
Null,
};
pub fn init(id: Id, count: usize, offset: u1) Token {
return Token{
.id = id,
.offset = offset,
.string_has_escape = false,
.number_is_integer = true,
.count = count,
};
}
pub fn initString(count: usize, has_unicode_escape: bool) Token {
return Token{
.id = Id.String,
.offset = 0,
.string_has_escape = has_unicode_escape,
.number_is_integer = true,
.count = count,
};
}
pub fn initNumber(count: usize, number_is_integer: bool) Token {
return Token{
.id = Id.Number,
.offset = 0,
.string_has_escape = false,
.number_is_integer = number_is_integer,
.count = count,
};
}
// A marker token is a zero-length
pub fn initMarker(id: Id) Token {
return Token{
.id = id,
.offset = 0,
.string_has_escape = false,
.number_is_integer = true,
.count = 0,
};
}
// Slice into the underlying input string.
pub fn slice(self: Token, input: []const u8, i: usize) []const u8 {
return input[i + self.offset - self.count .. i + self.offset];
}
};
// A small streaming JSON parser. This accepts input one byte at a time and returns tokens as
// they are encountered. No copies or allocations are performed during parsing and the entire
// parsing state requires ~40-50 bytes of stack space.
//
// Conforms strictly to RFC8529.
//
// For a non-byte based wrapper, consider using TokenStream instead.
pub const StreamingParser = struct {
// Current state
state: State,
// How many bytes we have counted for the current token
count: usize,
// What state to follow after parsing a string (either property or value string)
after_string_state: State,
// What state to follow after parsing a value (either top-level or value end)
after_value_state: State,
// If we stopped now, would the complete parsed string to now be a valid json string
complete: bool,
// Current token flags to pass through to the next generated, see Token.
string_has_escape: bool,
number_is_integer: bool,
// Bit-stack for nested object/map literals (max 255 nestings).
stack: u256,
stack_used: u8,
const object_bit = 0;
const array_bit = 1;
const max_stack_size = maxInt(u8);
pub fn init() StreamingParser {
var p: StreamingParser = undefined;
p.reset();
return p;
}
pub fn reset(p: *StreamingParser) void {
p.state = State.TopLevelBegin;
p.count = 0;
// Set before ever read in main transition function
p.after_string_state = undefined;
p.after_value_state = State.ValueEnd; // handle end of values normally
p.stack = 0;
p.stack_used = 0;
p.complete = false;
p.string_has_escape = false;
p.number_is_integer = true;
}
pub const State = enum {
// These must be first with these explicit values as we rely on them for indexing the
// bit-stack directly and avoiding a branch.
ObjectSeparator = 0,
ValueEnd = 1,
TopLevelBegin,
TopLevelEnd,
ValueBegin,
ValueBeginNoClosing,
String,
StringUtf8Byte3,
StringUtf8Byte2,
StringUtf8Byte1,
StringEscapeCharacter,
StringEscapeHexUnicode4,
StringEscapeHexUnicode3,
StringEscapeHexUnicode2,
StringEscapeHexUnicode1,
Number,
NumberMaybeDotOrExponent,
NumberMaybeDigitOrDotOrExponent,
NumberFractionalRequired,
NumberFractional,
NumberMaybeExponent,
NumberExponent,
NumberExponentDigitsRequired,
NumberExponentDigits,
TrueLiteral1,
TrueLiteral2,
TrueLiteral3,
FalseLiteral1,
FalseLiteral2,
FalseLiteral3,
FalseLiteral4,
NullLiteral1,
NullLiteral2,
NullLiteral3,
// Only call this function to generate array/object final state.
pub fn fromInt(x: var) State {
debug.assert(x == 0 or x == 1);
const T = @TagType(State);
return @intToEnum(State, @intCast(T, x));
}
};
pub const Error = error{
InvalidTopLevel,
TooManyNestedItems,
TooManyClosingItems,
InvalidValueBegin,
InvalidValueEnd,
UnbalancedBrackets,
UnbalancedBraces,
UnexpectedClosingBracket,
UnexpectedClosingBrace,
InvalidNumber,
InvalidSeparator,
InvalidLiteral,
InvalidEscapeCharacter,
InvalidUnicodeHexSymbol,
InvalidUtf8Byte,
InvalidTopLevelTrailing,
InvalidControlCharacter,
};
// Give another byte to the parser and obtain any new tokens. This may (rarely) return two
// tokens. token2 is always null if token1 is null.
//
// There is currently no error recovery on a bad stream.
pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
token1.* = null;
token2.* = null;
p.count += 1;
// unlikely
if (try p.transition(c, token1)) {
_ = try p.transition(c, token2);
}
}
// Perform a single transition on the state machine and return any possible token.
fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
switch (p.state) {
State.TopLevelBegin => switch (c) {
'{' => {
p.stack <<= 1;
p.stack |= object_bit;
p.stack_used += 1;
p.state = State.ValueBegin;
p.after_string_state = State.ObjectSeparator;
token.* = Token.initMarker(Token.Id.ObjectBegin);
},
'[' => {
p.stack <<= 1;
p.stack |= array_bit;
p.stack_used += 1;
p.state = State.ValueBegin;
p.after_string_state = State.ValueEnd;
token.* = Token.initMarker(Token.Id.ArrayBegin);
},
'-' => {
p.number_is_integer = true;
p.state = State.Number;
p.after_value_state = State.TopLevelEnd;
p.count = 0;
},
'0' => {
p.number_is_integer = true;
p.state = State.NumberMaybeDotOrExponent;
p.after_value_state = State.TopLevelEnd;
p.count = 0;
},
'1'...'9' => {
p.number_is_integer = true;
p.state = State.NumberMaybeDigitOrDotOrExponent;
p.after_value_state = State.TopLevelEnd;
p.count = 0;
},
'"' => {
p.state = State.String;
p.after_value_state = State.TopLevelEnd;
// We don't actually need the following since after_value_state should override.
p.after_string_state = State.ValueEnd;
p.string_has_escape = false;
p.count = 0;
},
't' => {
p.state = State.TrueLiteral1;
p.after_value_state = State.TopLevelEnd;
p.count = 0;
},
'f' => {
p.state = State.FalseLiteral1;
p.after_value_state = State.TopLevelEnd;
p.count = 0;
},
'n' => {
p.state = State.NullLiteral1;
p.after_value_state = State.TopLevelEnd;
p.count = 0;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidTopLevel;
},
},
State.TopLevelEnd => switch (c) {
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidTopLevelTrailing;
},
},
State.ValueBegin => switch (c) {
// NOTE: These are shared in ValueEnd as well, think we can reorder states to
// be a bit clearer and avoid this duplication.
'}' => {
// unlikely
if (p.stack & 1 != object_bit) {
return error.UnexpectedClosingBracket;
}
if (p.stack_used == 0) {
return error.TooManyClosingItems;
}
p.state = State.ValueBegin;
p.after_string_state = State.fromInt(p.stack & 1);
p.stack >>= 1;
p.stack_used -= 1;
switch (p.stack_used) {
0 => {
p.complete = true;
p.state = State.TopLevelEnd;
},
else => {
p.state = State.ValueEnd;
},
}
token.* = Token.initMarker(Token.Id.ObjectEnd);
},
']' => {
if (p.stack & 1 != array_bit) {
return error.UnexpectedClosingBrace;
}
if (p.stack_used == 0) {
return error.TooManyClosingItems;
}
p.state = State.ValueBegin;
p.after_string_state = State.fromInt(p.stack & 1);
p.stack >>= 1;
p.stack_used -= 1;
switch (p.stack_used) {
0 => {
p.complete = true;
p.state = State.TopLevelEnd;
},
else => {
p.state = State.ValueEnd;
},
}
token.* = Token.initMarker(Token.Id.ArrayEnd);
},
'{' => {
if (p.stack_used == max_stack_size) {
return error.TooManyNestedItems;
}
p.stack <<= 1;
p.stack |= object_bit;
p.stack_used += 1;
p.state = State.ValueBegin;
p.after_string_state = State.ObjectSeparator;
token.* = Token.initMarker(Token.Id.ObjectBegin);
},
'[' => {
if (p.stack_used == max_stack_size) {
return error.TooManyNestedItems;
}
p.stack <<= 1;
p.stack |= array_bit;
p.stack_used += 1;
p.state = State.ValueBegin;
p.after_string_state = State.ValueEnd;
token.* = Token.initMarker(Token.Id.ArrayBegin);
},
'-' => {
p.state = State.Number;
p.count = 0;
},
'0' => {
p.state = State.NumberMaybeDotOrExponent;
p.count = 0;
},
'1'...'9' => {
p.state = State.NumberMaybeDigitOrDotOrExponent;
p.count = 0;
},
'"' => {
p.state = State.String;
p.count = 0;
},
't' => {
p.state = State.TrueLiteral1;
p.count = 0;
},
'f' => {
p.state = State.FalseLiteral1;
p.count = 0;
},
'n' => {
p.state = State.NullLiteral1;
p.count = 0;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidValueBegin;
},
},
// TODO: A bit of duplication here and in the following state, redo.
State.ValueBeginNoClosing => switch (c) {
'{' => {
if (p.stack_used == max_stack_size) {
return error.TooManyNestedItems;
}
p.stack <<= 1;
p.stack |= object_bit;
p.stack_used += 1;
p.state = State.ValueBegin;
p.after_string_state = State.ObjectSeparator;
token.* = Token.initMarker(Token.Id.ObjectBegin);
},
'[' => {
if (p.stack_used == max_stack_size) {
return error.TooManyNestedItems;
}
p.stack <<= 1;
p.stack |= array_bit;
p.stack_used += 1;
p.state = State.ValueBegin;
p.after_string_state = State.ValueEnd;
token.* = Token.initMarker(Token.Id.ArrayBegin);
},
'-' => {
p.state = State.Number;
p.count = 0;
},
'0' => {
p.state = State.NumberMaybeDotOrExponent;
p.count = 0;
},
'1'...'9' => {
p.state = State.NumberMaybeDigitOrDotOrExponent;
p.count = 0;
},
'"' => {
p.state = State.String;
p.count = 0;
},
't' => {
p.state = State.TrueLiteral1;
p.count = 0;
},
'f' => {
p.state = State.FalseLiteral1;
p.count = 0;
},
'n' => {
p.state = State.NullLiteral1;
p.count = 0;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidValueBegin;
},
},
State.ValueEnd => switch (c) {
',' => {
p.after_string_state = State.fromInt(p.stack & 1);
p.state = State.ValueBeginNoClosing;
},
']' => {
if (p.stack_used == 0) {
return error.UnbalancedBrackets;
}
p.state = State.ValueEnd;
p.after_string_state = State.fromInt(p.stack & 1);
p.stack >>= 1;
p.stack_used -= 1;
if (p.stack_used == 0) {
p.complete = true;
p.state = State.TopLevelEnd;
}
token.* = Token.initMarker(Token.Id.ArrayEnd);
},
'}' => {
if (p.stack_used == 0) {
return error.UnbalancedBraces;
}
p.state = State.ValueEnd;
p.after_string_state = State.fromInt(p.stack & 1);
p.stack >>= 1;
p.stack_used -= 1;
if (p.stack_used == 0) {
p.complete = true;
p.state = State.TopLevelEnd;
}
token.* = Token.initMarker(Token.Id.ObjectEnd);
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidValueEnd;
},
},
State.ObjectSeparator => switch (c) {
':' => {
p.state = State.ValueBegin;
p.after_string_state = State.ValueEnd;
},
0x09, 0x0A, 0x0D, 0x20 => {
// whitespace
},
else => {
return error.InvalidSeparator;
},
},
State.String => switch (c) {
0x00...0x1F => {
return error.InvalidControlCharacter;
},
'"' => {
p.state = p.after_string_state;
if (p.after_value_state == State.TopLevelEnd) {
p.state = State.TopLevelEnd;
p.complete = true;
}
token.* = Token.initString(p.count - 1, p.string_has_escape);
},
'\\' => {
p.state = State.StringEscapeCharacter;
},
0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => {
// non-control ascii
},
0xC0...0xDF => {
p.state = State.StringUtf8Byte1;
},
0xE0...0xEF => {
p.state = State.StringUtf8Byte2;
},
0xF0...0xFF => {
p.state = State.StringUtf8Byte3;
},
else => {
return error.InvalidUtf8Byte;
},
},
State.StringUtf8Byte3 => switch (c >> 6) {
0b10 => p.state = State.StringUtf8Byte2,
else => return error.InvalidUtf8Byte,
},
State.StringUtf8Byte2 => switch (c >> 6) {
0b10 => p.state = State.StringUtf8Byte1,
else => return error.InvalidUtf8Byte,
},
State.StringUtf8Byte1 => switch (c >> 6) {
0b10 => p.state = State.String,
else => return error.InvalidUtf8Byte,
},
State.StringEscapeCharacter => switch (c) {
// NOTE: '/' is allowed as an escaped character but it also is allowed
// as unescaped according to the RFC. There is a reported errata which suggests
// removing the non-escaped variant but it makes more sense to simply disallow
// it as an escape code here.
//
// The current JSONTestSuite tests rely on both of this behaviour being present
// however, so we default to the status quo where both are accepted until this
// is further clarified.
'"', '\\', '/', 'b', 'f', 'n', 'r', 't' => {
p.string_has_escape = true;
p.state = State.String;
},
'u' => {
p.string_has_escape = true;
p.state = State.StringEscapeHexUnicode4;
},
else => {
return error.InvalidEscapeCharacter;
},
},
State.StringEscapeHexUnicode4 => switch (c) {
'0'...'9', 'A'...'F', 'a'...'f' => {
p.state = State.StringEscapeHexUnicode3;
},
else => return error.InvalidUnicodeHexSymbol,
},
State.StringEscapeHexUnicode3 => switch (c) {
'0'...'9', 'A'...'F', 'a'...'f' => {
p.state = State.StringEscapeHexUnicode2;
},
else => return error.InvalidUnicodeHexSymbol,
},
State.StringEscapeHexUnicode2 => switch (c) {
'0'...'9', 'A'...'F', 'a'...'f' => {
p.state = State.StringEscapeHexUnicode1;
},
else => return error.InvalidUnicodeHexSymbol,
},
State.StringEscapeHexUnicode1 => switch (c) {
'0'...'9', 'A'...'F', 'a'...'f' => {
p.state = State.String;
},
else => return error.InvalidUnicodeHexSymbol,
},
State.Number => {
p.complete = p.after_value_state == State.TopLevelEnd;
switch (c) {
'0' => {
p.state = State.NumberMaybeDotOrExponent;
},
'1'...'9' => {
p.state = State.NumberMaybeDigitOrDotOrExponent;
},
else => {
return error.InvalidNumber;
},
}
},
State.NumberMaybeDotOrExponent => {
p.complete = p.after_value_state == State.TopLevelEnd;
switch (c) {
'.' => {
p.number_is_integer = false;
p.state = State.NumberFractionalRequired;
},
'e', 'E' => {
p.number_is_integer = false;
p.state = State.NumberExponent;
},
else => {
p.state = p.after_value_state;
token.* = Token.initNumber(p.count, p.number_is_integer);
return true;
},
}
},
State.NumberMaybeDigitOrDotOrExponent => {
p.complete = p.after_value_state == State.TopLevelEnd;
switch (c) {
'.' => {
p.number_is_integer = false;
p.state = State.NumberFractionalRequired;
},
'e', 'E' => {
p.number_is_integer = false;
p.state = State.NumberExponent;
},
'0'...'9' => {
// another digit
},
else => {
p.state = p.after_value_state;
token.* = Token.initNumber(p.count, p.number_is_integer);
return true;
},
}
},
State.NumberFractionalRequired => {
p.complete = p.after_value_state == State.TopLevelEnd;
switch (c) {
'0'...'9' => {
p.state = State.NumberFractional;
},
else => {
return error.InvalidNumber;
},
}
},
State.NumberFractional => {
p.complete = p.after_value_state == State.TopLevelEnd;
switch (c) {
'0'...'9' => {
// another digit
},
'e', 'E' => {
p.number_is_integer = false;
p.state = State.NumberExponent;
},
else => {
p.state = p.after_value_state;
token.* = Token.initNumber(p.count, p.number_is_integer);
return true;
},
}
},
State.NumberMaybeExponent => {
p.complete = p.after_value_state == State.TopLevelEnd;
switch (c) {
'e', 'E' => {
p.number_is_integer = false;
p.state = State.NumberExponent;
},
else => {
p.state = p.after_value_state;
token.* = Token.initNumber(p.count, p.number_is_integer);
return true;
},
}
},
State.NumberExponent => switch (c) {
'-', '+' => {
p.complete = false;
p.state = State.NumberExponentDigitsRequired;
},
'0'...'9' => {
p.complete = p.after_value_state == State.TopLevelEnd;
p.state = State.NumberExponentDigits;
},
else => {
return error.InvalidNumber;
},
},
State.NumberExponentDigitsRequired => switch (c) {
'0'...'9' => {
p.complete = p.after_value_state == State.TopLevelEnd;
p.state = State.NumberExponentDigits;
},
else => {
return error.InvalidNumber;
},
},
State.NumberExponentDigits => {
p.complete = p.after_value_state == State.TopLevelEnd;
switch (c) {
'0'...'9' => {
// another digit
},
else => {
p.state = p.after_value_state;
token.* = Token.initNumber(p.count, p.number_is_integer);
return true;
},
}
},
State.TrueLiteral1 => switch (c) {
'r' => p.state = State.TrueLiteral2,
else => return error.InvalidLiteral,
},
State.TrueLiteral2 => switch (c) {
'u' => p.state = State.TrueLiteral3,
else => return error.InvalidLiteral,
},
State.TrueLiteral3 => switch (c) {
'e' => {
p.state = p.after_value_state;
p.complete = p.state == State.TopLevelEnd;
token.* = Token.init(Token.Id.True, p.count + 1, 1);
},
else => {
return error.InvalidLiteral;
},
},
State.FalseLiteral1 => switch (c) {
'a' => p.state = State.FalseLiteral2,
else => return error.InvalidLiteral,
},
State.FalseLiteral2 => switch (c) {
'l' => p.state = State.FalseLiteral3,
else => return error.InvalidLiteral,
},
State.FalseLiteral3 => switch (c) {
's' => p.state = State.FalseLiteral4,
else => return error.InvalidLiteral,
},
State.FalseLiteral4 => switch (c) {
'e' => {
p.state = p.after_value_state;
p.complete = p.state == State.TopLevelEnd;
token.* = Token.init(Token.Id.False, p.count + 1, 1);
},
else => {
return error.InvalidLiteral;
},
},
State.NullLiteral1 => switch (c) {
'u' => p.state = State.NullLiteral2,
else => return error.InvalidLiteral,
},
State.NullLiteral2 => switch (c) {
'l' => p.state = State.NullLiteral3,
else => return error.InvalidLiteral,
},
State.NullLiteral3 => switch (c) {
'l' => {
p.state = p.after_value_state;
p.complete = p.state == State.TopLevelEnd;
token.* = Token.init(Token.Id.Null, p.count + 1, 1);
},
else => {
return error.InvalidLiteral;
},
},
}
return false;
}
};
// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
pub const TokenStream = struct {
i: usize,
slice: []const u8,
parser: StreamingParser,
token: ?Token,
pub const Error = StreamingParser.Error || error{UnexpectedEndOfJson};
pub fn init(slice: []const u8) TokenStream {
return TokenStream{
.i = 0,
.slice = slice,
.parser = StreamingParser.init(),
.token = null,
};
}
pub fn next(self: *TokenStream) Error!?Token {
if (self.token) |token| {
const copy = token;
self.token = null;
return copy;
}
var t1: ?Token = undefined;
var t2: ?Token = undefined;
while (self.i < self.slice.len) {
try self.parser.feed(self.slice[self.i], &t1, &t2);
self.i += 1;
if (t1) |token| {
self.token = t2;
return token;
}
}
if (self.parser.complete) {
return null;
} else {
return error.UnexpectedEndOfJson;
}
}
};
fn checkNext(p: *TokenStream, id: Token.Id) void {
const token = (p.next() catch unreachable).?;
debug.assert(token.id == id);
}
test "json.token" {
const s =
\\{
\\ "Image": {
\\ "Width": 800,
\\ "Height": 600,
\\ "Title": "View from 15th Floor",
\\ "Thumbnail": {
\\ "Url": "http://www.example.com/image/481989943",
\\ "Height": 125,
\\ "Width": 100
\\ },
\\ "Animated" : false,
\\ "IDs": [116, 943, 234, 38793]
\\ }
\\}
;
var p = TokenStream.init(s);
checkNext(&p, Token.Id.ObjectBegin);
checkNext(&p, Token.Id.String); // Image
checkNext(&p, Token.Id.ObjectBegin);
checkNext(&p, Token.Id.String); // Width
checkNext(&p, Token.Id.Number);
checkNext(&p, Token.Id.String); // Height
checkNext(&p, Token.Id.Number);
checkNext(&p, Token.Id.String); // Title
checkNext(&p, Token.Id.String);
checkNext(&p, Token.Id.String); // Thumbnail
checkNext(&p, Token.Id.ObjectBegin);
checkNext(&p, Token.Id.String); // Url
checkNext(&p, Token.Id.String);
checkNext(&p, Token.Id.String); // Height
checkNext(&p, Token.Id.Number);
checkNext(&p, Token.Id.String); // Width
checkNext(&p, Token.Id.Number);
checkNext(&p, Token.Id.ObjectEnd);
checkNext(&p, Token.Id.String); // Animated
checkNext(&p, Token.Id.False);
checkNext(&p, Token.Id.String); // IDs
checkNext(&p, Token.Id.ArrayBegin);
checkNext(&p, Token.Id.Number);
checkNext(&p, Token.Id.Number);
checkNext(&p, Token.Id.Number);
checkNext(&p, Token.Id.Number);
checkNext(&p, Token.Id.ArrayEnd);
checkNext(&p, Token.Id.ObjectEnd);
checkNext(&p, Token.Id.ObjectEnd);
testing.expect((try p.next()) == null);
}
// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
// be able to decode the string even if this returns true.
pub fn validate(s: []const u8) bool {
var p = StreamingParser.init();
for (s) |c, i| {
var token1: ?Token = undefined;
var token2: ?Token = undefined;
p.feed(c, &token1, &token2) catch |err| {
return false;
};
}
return p.complete;
}
test "json.validate" {
testing.expect(validate("{}"));
}
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const ArrayList = std.ArrayList;
const StringHashMap = std.StringHashMap;
pub const ValueTree = struct {
arena: ArenaAllocator,
root: Value,
pub fn deinit(self: *ValueTree) void {
self.arena.deinit();
}
};
pub const ObjectMap = StringHashMap(Value);
pub const Array = ArrayList(Value);
pub const Value = union(enum) {
Null,
Bool: bool,
Integer: i64,
Float: f64,
String: []const u8,
Array: Array,
Object: ObjectMap,
pub fn dump(self: Value) void {
var held = std.debug.getStderrMutex().acquire();
defer held.release();
const stderr = std.debug.getStderrStream();
self.dumpStream(stderr, 1024) catch return;
}
pub fn dumpIndent(self: Value, comptime indent: usize) void {
if (indent == 0) {
self.dump();
} else {
var held = std.debug.getStderrMutex().acquire();
defer held.release();
const stderr = std.debug.getStderrStream();
self.dumpStreamIndent(indent, stderr, 1024) catch return;
}
}
pub fn dumpStream(self: @This(), stream: var, comptime max_depth: usize) !void {
var w = std.json.WriteStream(@typeOf(stream).Child, max_depth).init(stream);
w.newline = "";
w.one_indent = "";
w.space = "";
try w.emitJson(self);
}
pub fn dumpStreamIndent(self: @This(), comptime indent: usize, stream: var, comptime max_depth: usize) !void {
var one_indent = " " ** indent;
var w = std.json.WriteStream(@typeOf(stream).Child, max_depth).init(stream);
w.one_indent = one_indent;
try w.emitJson(self);
}
};
// A non-stream JSON parser which constructs a tree of Value's.
pub const Parser = struct {
allocator: *Allocator,
state: State,
copy_strings: bool,
// Stores parent nodes and un-combined Values.
stack: Array,
const State = enum {
ObjectKey,
ObjectValue,
ArrayValue,
Simple,
};
pub fn init(allocator: *Allocator, copy_strings: bool) Parser {
return Parser{
.allocator = allocator,
.state = State.Simple,
.copy_strings = copy_strings,
.stack = Array.init(allocator),
};
}
pub fn deinit(p: *Parser) void {
p.stack.deinit();
}
pub fn reset(p: *Parser) void {
p.state = State.Simple;
p.stack.shrink(0);
}
pub fn parse(p: *Parser, input: []const u8) !ValueTree {
var s = TokenStream.init(input);
var arena = ArenaAllocator.init(p.allocator);
errdefer arena.deinit();
while (try s.next()) |token| {
try p.transition(&arena.allocator, input, s.i - 1, token);
}
debug.assert(p.stack.len == 1);
return ValueTree{
.arena = arena,
.root = p.stack.at(0),
};
}
// Even though p.allocator exists, we take an explicit allocator so that allocation state
// can be cleaned up on error correctly during a `parse` on call.
fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: Token) !void {
switch (p.state) {
State.ObjectKey => switch (token.id) {
Token.Id.ObjectEnd => {
if (p.stack.len == 1) {
return;
}
var value = p.stack.pop();
try p.pushToParent(&value);
},
Token.Id.String => {
try p.stack.append(try p.parseString(allocator, token, input, i));
p.state = State.ObjectValue;
},
else => {
unreachable;
},
},
State.ObjectValue => {
var object = &p.stack.items[p.stack.len - 2].Object;
var key = p.stack.items[p.stack.len - 1].String;
switch (token.id) {
Token.Id.ObjectBegin => {
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
p.state = State.ObjectKey;
},
Token.Id.ArrayBegin => {
try p.stack.append(Value{ .Array = Array.init(allocator) });
p.state = State.ArrayValue;
},
Token.Id.String => {
_ = try object.put(key, try p.parseString(allocator, token, input, i));
_ = p.stack.pop();
p.state = State.ObjectKey;
},
Token.Id.Number => {
_ = try object.put(key, try p.parseNumber(token, input, i));
_ = p.stack.pop();
p.state = State.ObjectKey;
},
Token.Id.True => {
_ = try object.put(key, Value{ .Bool = true });
_ = p.stack.pop();
p.state = State.ObjectKey;
},
Token.Id.False => {
_ = try object.put(key, Value{ .Bool = false });
_ = p.stack.pop();
p.state = State.ObjectKey;
},
Token.Id.Null => {
_ = try object.put(key, Value.Null);
_ = p.stack.pop();
p.state = State.ObjectKey;
},
Token.Id.ObjectEnd, Token.Id.ArrayEnd => {
unreachable;
},
}
},
State.ArrayValue => {
var array = &p.stack.items[p.stack.len - 1].Array;
switch (token.id) {
Token.Id.ArrayEnd => {
if (p.stack.len == 1) {
return;
}
var value = p.stack.pop();
try p.pushToParent(&value);
},
Token.Id.ObjectBegin => {
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
p.state = State.ObjectKey;
},
Token.Id.ArrayBegin => {
try p.stack.append(Value{ .Array = Array.init(allocator) });
p.state = State.ArrayValue;
},
Token.Id.String => {
try array.append(try p.parseString(allocator, token, input, i));
},
Token.Id.Number => {
try array.append(try p.parseNumber(token, input, i));
},
Token.Id.True => {
try array.append(Value{ .Bool = true });
},
Token.Id.False => {
try array.append(Value{ .Bool = false });
},
Token.Id.Null => {
try array.append(Value.Null);
},
Token.Id.ObjectEnd => {
unreachable;
},
}
},
State.Simple => switch (token.id) {
Token.Id.ObjectBegin => {
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
p.state = State.ObjectKey;
},
Token.Id.ArrayBegin => {
try p.stack.append(Value{ .Array = Array.init(allocator) });
p.state = State.ArrayValue;
},
Token.Id.String => {
try p.stack.append(try p.parseString(allocator, token, input, i));
},
Token.Id.Number => {
try p.stack.append(try p.parseNumber(token, input, i));
},
Token.Id.True => {
try p.stack.append(Value{ .Bool = true });
},
Token.Id.False => {
try p.stack.append(Value{ .Bool = false });
},
Token.Id.Null => {
try p.stack.append(Value.Null);
},
Token.Id.ObjectEnd, Token.Id.ArrayEnd => {
unreachable;
},
},
}
}
fn pushToParent(p: *Parser, value: *const Value) !void {
switch (p.stack.toSlice()[p.stack.len - 1]) {
// Object Parent -> [ ..., object, <key>, value ]
Value.String => |key| {
_ = p.stack.pop();
var object = &p.stack.items[p.stack.len - 1].Object;
_ = try object.put(key, value.*);
p.state = State.ObjectKey;
},
// Array Parent -> [ ..., <array>, value ]
Value.Array => |*array| {
try array.append(value.*);
p.state = State.ArrayValue;
},
else => {
unreachable;
},
}
}
fn parseString(p: *Parser, allocator: *Allocator, token: Token, input: []const u8, i: usize) !Value {
// TODO: We don't strictly have to copy values which do not contain any escape
// characters if flagged with the option.
const slice = token.slice(input, i);
return Value{ .String = try mem.dupe(allocator, u8, slice) };
}
fn parseNumber(p: *Parser, token: Token, input: []const u8, i: usize) !Value {
return if (token.number_is_integer)
Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) }
else
Value{ .Float = try std.fmt.parseFloat(f64, token.slice(input, i)) };
}
};
test "json.parser.dynamic" {
var p = Parser.init(debug.global_allocator, false);
defer p.deinit();
const s =
\\{
\\ "Image": {
\\ "Width": 800,
\\ "Height": 600,
\\ "Title": "View from 15th Floor",
\\ "Thumbnail": {
\\ "Url": "http://www.example.com/image/481989943",
\\ "Height": 125,
\\ "Width": 100
\\ },
\\ "Animated" : false,
\\ "IDs": [116, 943, 234, 38793],
\\ "ArrayOfObject": [{"n": "m"}],
\\ "double": 1.3412
\\ }
\\}
;
var tree = try p.parse(s);
defer tree.deinit();
var root = tree.root;
var image = root.Object.get("Image").?.value;
const width = image.Object.get("Width").?.value;
testing.expect(width.Integer == 800);
const height = image.Object.get("Height").?.value;
testing.expect(height.Integer == 600);
const title = image.Object.get("Title").?.value;
testing.expect(mem.eql(u8, title.String, "View from 15th Floor"));
const animated = image.Object.get("Animated").?.value;
testing.expect(animated.Bool == false);
const array_of_object = image.Object.get("ArrayOfObject").?.value;
testing.expect(array_of_object.Array.len == 1);
const obj0 = array_of_object.Array.at(0).Object.get("n").?.value;
testing.expect(mem.eql(u8, obj0.String, "m"));
const double = image.Object.get("double").?.value;
testing.expect(double.Float == 1.3412);
}
test "import more json tests" {
_ = @import("json/test.zig");
_ = @import("json/write_stream.zig");
}
test "write json then parse it" {
var out_buffer: [1000]u8 = undefined;
var slice_out_stream = std.io.SliceOutStream.init(&out_buffer);
const out_stream = &slice_out_stream.stream;
var jw = WriteStream(@typeOf(out_stream).Child, 4).init(out_stream);
try jw.beginObject();
try jw.objectField("f");
try jw.emitBool(false);
try jw.objectField("t");
try jw.emitBool(true);
try jw.objectField("int");
try jw.emitNumber(@as(i32, 1234));
try jw.objectField("array");
try jw.beginArray();
try jw.arrayElem();
try jw.emitNull();
try jw.arrayElem();
try jw.emitNumber(@as(f64, 12.34));
try jw.endArray();
try jw.objectField("str");
try jw.emitString("hello");
try jw.endObject();
var mem_buffer: [1024 * 20]u8 = undefined;
const allocator = &std.heap.FixedBufferAllocator.init(&mem_buffer).allocator;
var parser = Parser.init(allocator, false);
const tree = try parser.parse(slice_out_stream.getWritten());
testing.expect(tree.root.Object.get("f").?.value.Bool == false);
testing.expect(tree.root.Object.get("t").?.value.Bool == true);
testing.expect(tree.root.Object.get("int").?.value.Integer == 1234);
testing.expect(tree.root.Object.get("array").?.value.Array.at(0).Null == {});
testing.expect(tree.root.Object.get("array").?.value.Array.at(1).Float == 12.34);
testing.expect(mem.eql(u8, tree.root.Object.get("str").?.value.String, "hello"));
}
test "parsing empty string gives appropriate error" {
var p = Parser.init(debug.global_allocator, false);
defer p.deinit();
testing.expectError(error.UnexpectedEndOfJson, p.parse(""));
}