diff options
Diffstat (limited to 'src/non-terminal.zig')
| -rw-r--r-- | src/non-terminal.zig | 240 |
1 files changed, 212 insertions, 28 deletions
diff --git a/src/non-terminal.zig b/src/non-terminal.zig index eed30e3..1665a80 100644 --- a/src/non-terminal.zig +++ b/src/non-terminal.zig @@ -1,30 +1,36 @@ const std = @import("std"); const CharSet = @import("char-set.zig"); const Character = @import("character.zig").Character; +const string = @import("string.zig"); const Self = @This(); pub const Rule = []Character; -name: u8, +id: usize, +name: []const u8, rule_list: std.ArrayList(Rule), -first: CharSet, -follows: CharSet, +first: CharSet = CharSet {}, +follows: CharSet = CharSet {}, +allocator: std.mem.Allocator, -pub fn init(name: u8, allocator: std.mem.Allocator) Self { +pub fn init(name: []const u8, id: usize, allocator: std.mem.Allocator) !Self { + const target = try allocator.alloc(u8, name.len); + @memcpy(target, name); return Self { - .name = name, + .name = target, + .id = id, .rule_list = std.ArrayList(Rule).init(allocator), - .first = CharSet {}, - .follows = CharSet {}, + .allocator = allocator, }; } -pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { +pub fn deinit(self: *Self) void { for (self.rules()) |rule| { - allocator.free(rule); + self.allocator.free(rule); } self.rule_list.deinit(); + self.allocator.free(self.name); } pub inline fn rules(self: *Self) []Rule { @@ -35,32 +41,78 @@ pub fn add_rule(self: *Self, rule: Rule) !void { try self.rule_list.append(rule); } -pub fn parse_rule( +pub fn parse_name(buffer: []const u8) ![]const u8 { + const arrow_start = std.mem.indexOf(u8, buffer, "->") orelse return error.MissingArrow; + + if (arrow_start == 0) { + return error.MissingRuleName; + } + + return std.mem.trim(u8, buffer[0..arrow_start], &std.ascii.whitespace); +} + +pub fn parse_and_add_variants( + self: *Self, buffer: []const u8, - allocator: std.mem.Allocator -) !struct { u8, Rule } { + names: *const std.StringHashMap(usize), +) !void { + const arrow_start = std.mem.indexOf(u8, buffer, "->") orelse return error.MissingArrow; + var head: []const u8 = buffer[arrow_start + 2..]; - var rule = std.ArrayList(Character).init(allocator); + if (std.mem.trimLeft(u8, head, &std.ascii.whitespace).len == 0) { + return error.MissingBody; + } + + var rule = std.ArrayList(Character).init(self.allocator); errdefer rule.deinit(); - var tokens = std.mem.tokenizeAny(u8, buffer, &std.ascii.whitespace); + head = std.mem.trimLeft(u8, head, &std.ascii.whitespace); + while (head.len > 0) { + if (head[0] == '|') { + try self.rule_list.append(try rule.toOwnedSlice()); + rule = std.ArrayList(Character).init(self.allocator); + head = head[1..]; + } else if (head[0] == '\'') { + head = head[1..]; + + if (head.len > 0 and head[0] == '\'') { + try rule.append(Character { .epsilon = void{} }); + } + + while (head.len > 0 and head[0] != '\'') { + const char, head = try string.read_first_from_literal(head); + try rule.append(Character { .terminal = char }); + } + + if (head.len == 0) { + return error.UnterminatedString; + } + + head = head[1..]; + } else { - const lhs = tokens.next() orelse return error.MissingLhs; - if (lhs.len > 1) return error.InvalidLhs; + const child_name, head = string.read_name(head); - const arrow = tokens.next() orelse return error.MissingArrow; - if (!std.mem.eql(u8, arrow, "->")) return error.InvalidArrow; + if (child_name.len == 0) { + std.debug.print("{s}\n", .{buffer}); + return error.EmptyVariant; + } + + try rule.append(Character { + .non_terminal = names.get(child_name) orelse { + return error.MissingRule; + } + }); + } - while (tokens.next()) |token| { - if (token.len > 1) return error.InvalidCharacter; - try rule.append(Character.from_u8(token[0])); + head = std.mem.trimLeft(u8, head, &std.ascii.whitespace); } if (rule.items.len == 0) { - return error.EmptyProduction; + return error.EmptyVariant; } - return .{ lhs[0], try rule.toOwnedSlice() }; + try self.rule_list.append(try rule.toOwnedSlice()); } pub inline fn is_empty(self: *const Self) bool { @@ -76,20 +128,152 @@ pub fn format( _ = fmt; _ = options; - try writer.print("[{c} -> ", .{ self.name }); + try writer.print("{s} ->", .{ self.name }); if (self.rule_list.items.len > 0) { for (self.rule_list.items[0]) |c| { - try writer.print("{}", .{c}); + try writer.print(" {}", .{c}); } for (self.rule_list.items[1..]) |r| { - try writer.print(" | ", .{}); + try writer.print(" |", .{}); for (r) |c| { - try writer.print("{}", .{c}); + try writer.print(" {}", .{c}); } } } +} + +test "single-non-terminal" { + const allocator = std.testing.allocator; + var non_terminal = try Self.init("main", 0, allocator); + defer non_terminal.deinit(); + + var names = std.StringHashMap(usize).init(allocator); + defer names.deinit(); + try names.put("main", 0); + try names.put("one", 1); + + try non_terminal.parse_and_add_variants("main -> one", &names); + + try std.testing.expectEqualDeep(&[_][]const Character{ + &[_] Character { Character { .non_terminal = 1 } } + }, non_terminal.rules()); +} + +test "multiple-non-terminal" { + const allocator = std.testing.allocator; + var non_terminal = try Self.init("main", 0, allocator); + defer non_terminal.deinit(); + + var names = std.StringHashMap(usize).init(allocator); + defer names.deinit(); + try names.put("main", 0); + try names.put("one", 1); + try names.put("two", 2); + try names.put("three", 3); + + try non_terminal.parse_and_add_variants("main -> one | two | three", &names); + + try std.testing.expectEqualDeep(&[_][]const Character{ + &[_] Character { Character { .non_terminal = 1 } }, + &[_] Character { Character { .non_terminal = 2 } }, + &[_] Character { Character { .non_terminal = 3 } }, + }, non_terminal.rules()); +} + +test "multiple-non-terminal-sequence" { + const allocator = std.testing.allocator; + var non_terminal = try Self.init("main", 0, allocator); + defer non_terminal.deinit(); + + var names = std.StringHashMap(usize).init(allocator); + defer names.deinit(); + try names.put("main", 0); + try names.put("one", 1); + try names.put("two", 2); + try names.put("three", 3); + + try non_terminal.parse_and_add_variants("main -> one two | two | three", &names); + + try std.testing.expectEqualDeep(&[_][]const Character{ + &[_] Character { Character { .non_terminal = 1 }, Character { .non_terminal = 2 }, }, + &[_] Character { Character { .non_terminal = 2 } }, + &[_] Character { Character { .non_terminal = 3 } }, + }, non_terminal.rules()); +} + +test "single-terminal" { + const allocator = std.testing.allocator; + var non_terminal = try Self.init("main", 0, allocator); + defer non_terminal.deinit(); + + var names = std.StringHashMap(usize).init(allocator); + defer names.deinit(); + try names.put("main", 0); + try names.put("one", 1); + + try non_terminal.parse_and_add_variants("main -> 'one'", &names); + + try std.testing.expectEqualDeep(&[_][]const Character{ + &[_] Character { + Character { .terminal = 'o' }, + Character { .terminal = 'n' }, + Character { .terminal = 'e' } + }, + }, non_terminal.rules()); +} + +test "single-terminal-with-escapes" { + const allocator = std.testing.allocator; + var non_terminal = try Self.init("main", 0, allocator); + defer non_terminal.deinit(); + + var names = std.StringHashMap(usize).init(allocator); + defer names.deinit(); + try names.put("main", 0); + + try non_terminal.parse_and_add_variants("main -> 'o\\r\\nn\\'e\\\\'", &names); + + try std.testing.expectEqualDeep(&[_][]const Character{ + &[_] Character { + Character { .terminal = 'o' }, + Character { .terminal = '\r' }, + Character { .terminal = '\n' }, + Character { .terminal = 'n' }, + Character { .terminal = '\'' }, + Character { .terminal = 'e' }, + Character { .terminal = '\\' }, + }, + }, non_terminal.rules()); +} + +test "mix" { + const allocator = std.testing.allocator; + var non_terminal = try Self.init("main", 0, allocator); + defer non_terminal.deinit(); + + var names = std.StringHashMap(usize).init(allocator); + defer names.deinit(); + try names.put("main", 0); + try names.put("other", 1); + try names.put("some", 2); + + try non_terminal.parse_and_add_variants("main -> 'ab' other | other some 'a' | main", &names); - try writer.writeByte(']'); + try std.testing.expectEqualDeep(&[_][]const Character{ + &[_] Character { + Character { .terminal = 'a' }, + Character { .terminal = 'b' }, + Character { .non_terminal = 1 }, + }, + &[_] Character { + Character { .non_terminal = 1 }, + Character { .non_terminal = 2 }, + Character { .terminal = 'a' }, + }, + &[_] Character { + Character { .non_terminal = 0 }, + }, + }, non_terminal.rules()); } |