aboutsummaryrefslogtreecommitdiff
path: root/src/non-terminal.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/non-terminal.zig')
-rw-r--r--src/non-terminal.zig240
1 files changed, 212 insertions, 28 deletions
diff --git a/src/non-terminal.zig b/src/non-terminal.zig
index eed30e3..1665a80 100644
--- a/src/non-terminal.zig
+++ b/src/non-terminal.zig
@@ -1,30 +1,36 @@
const std = @import("std");
const CharSet = @import("char-set.zig");
const Character = @import("character.zig").Character;
+const string = @import("string.zig");
const Self = @This();
pub const Rule = []Character;
-name: u8,
+id: usize,
+name: []const u8,
rule_list: std.ArrayList(Rule),
-first: CharSet,
-follows: CharSet,
+first: CharSet = CharSet {},
+follows: CharSet = CharSet {},
+allocator: std.mem.Allocator,
-pub fn init(name: u8, allocator: std.mem.Allocator) Self {
+pub fn init(name: []const u8, id: usize, allocator: std.mem.Allocator) !Self {
+ const target = try allocator.alloc(u8, name.len);
+ @memcpy(target, name);
return Self {
- .name = name,
+ .name = target,
+ .id = id,
.rule_list = std.ArrayList(Rule).init(allocator),
- .first = CharSet {},
- .follows = CharSet {},
+ .allocator = allocator,
};
}
-pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
+pub fn deinit(self: *Self) void {
for (self.rules()) |rule| {
- allocator.free(rule);
+ self.allocator.free(rule);
}
self.rule_list.deinit();
+ self.allocator.free(self.name);
}
pub inline fn rules(self: *Self) []Rule {
@@ -35,32 +41,78 @@ pub fn add_rule(self: *Self, rule: Rule) !void {
try self.rule_list.append(rule);
}
-pub fn parse_rule(
+pub fn parse_name(buffer: []const u8) ![]const u8 {
+ const arrow_start = std.mem.indexOf(u8, buffer, "->") orelse return error.MissingArrow;
+
+ if (arrow_start == 0) {
+ return error.MissingRuleName;
+ }
+
+ return std.mem.trim(u8, buffer[0..arrow_start], &std.ascii.whitespace);
+}
+
+pub fn parse_and_add_variants(
+ self: *Self,
buffer: []const u8,
- allocator: std.mem.Allocator
-) !struct { u8, Rule } {
+ names: *const std.StringHashMap(usize),
+) !void {
+ const arrow_start = std.mem.indexOf(u8, buffer, "->") orelse return error.MissingArrow;
+ var head: []const u8 = buffer[arrow_start + 2..];
- var rule = std.ArrayList(Character).init(allocator);
+ if (std.mem.trimLeft(u8, head, &std.ascii.whitespace).len == 0) {
+ return error.MissingBody;
+ }
+
+ var rule = std.ArrayList(Character).init(self.allocator);
errdefer rule.deinit();
- var tokens = std.mem.tokenizeAny(u8, buffer, &std.ascii.whitespace);
+ head = std.mem.trimLeft(u8, head, &std.ascii.whitespace);
+ while (head.len > 0) {
+ if (head[0] == '|') {
+ try self.rule_list.append(try rule.toOwnedSlice());
+ rule = std.ArrayList(Character).init(self.allocator);
+ head = head[1..];
+ } else if (head[0] == '\'') {
+ head = head[1..];
+
+ if (head.len > 0 and head[0] == '\'') {
+ try rule.append(Character { .epsilon = void{} });
+ }
+
+ while (head.len > 0 and head[0] != '\'') {
+ const char, head = try string.read_first_from_literal(head);
+ try rule.append(Character { .terminal = char });
+ }
+
+ if (head.len == 0) {
+ return error.UnterminatedString;
+ }
+
+ head = head[1..];
+ } else {
- const lhs = tokens.next() orelse return error.MissingLhs;
- if (lhs.len > 1) return error.InvalidLhs;
+ const child_name, head = string.read_name(head);
- const arrow = tokens.next() orelse return error.MissingArrow;
- if (!std.mem.eql(u8, arrow, "->")) return error.InvalidArrow;
+ if (child_name.len == 0) {
+ std.debug.print("{s}\n", .{buffer});
+ return error.EmptyVariant;
+ }
+
+ try rule.append(Character {
+ .non_terminal = names.get(child_name) orelse {
+ return error.MissingRule;
+ }
+ });
+ }
- while (tokens.next()) |token| {
- if (token.len > 1) return error.InvalidCharacter;
- try rule.append(Character.from_u8(token[0]));
+ head = std.mem.trimLeft(u8, head, &std.ascii.whitespace);
}
if (rule.items.len == 0) {
- return error.EmptyProduction;
+ return error.EmptyVariant;
}
- return .{ lhs[0], try rule.toOwnedSlice() };
+ try self.rule_list.append(try rule.toOwnedSlice());
}
pub inline fn is_empty(self: *const Self) bool {
@@ -76,20 +128,152 @@ pub fn format(
_ = fmt;
_ = options;
- try writer.print("[{c} -> ", .{ self.name });
+ try writer.print("{s} ->", .{ self.name });
if (self.rule_list.items.len > 0) {
for (self.rule_list.items[0]) |c| {
- try writer.print("{}", .{c});
+ try writer.print(" {}", .{c});
}
for (self.rule_list.items[1..]) |r| {
- try writer.print(" | ", .{});
+ try writer.print(" |", .{});
for (r) |c| {
- try writer.print("{}", .{c});
+ try writer.print(" {}", .{c});
}
}
}
+}
+
+test "single-non-terminal" {
+ const allocator = std.testing.allocator;
+ var non_terminal = try Self.init("main", 0, allocator);
+ defer non_terminal.deinit();
+
+ var names = std.StringHashMap(usize).init(allocator);
+ defer names.deinit();
+ try names.put("main", 0);
+ try names.put("one", 1);
+
+ try non_terminal.parse_and_add_variants("main -> one", &names);
+
+ try std.testing.expectEqualDeep(&[_][]const Character{
+ &[_] Character { Character { .non_terminal = 1 } }
+ }, non_terminal.rules());
+}
+
+test "multiple-non-terminal" {
+ const allocator = std.testing.allocator;
+ var non_terminal = try Self.init("main", 0, allocator);
+ defer non_terminal.deinit();
+
+ var names = std.StringHashMap(usize).init(allocator);
+ defer names.deinit();
+ try names.put("main", 0);
+ try names.put("one", 1);
+ try names.put("two", 2);
+ try names.put("three", 3);
+
+ try non_terminal.parse_and_add_variants("main -> one | two | three", &names);
+
+ try std.testing.expectEqualDeep(&[_][]const Character{
+ &[_] Character { Character { .non_terminal = 1 } },
+ &[_] Character { Character { .non_terminal = 2 } },
+ &[_] Character { Character { .non_terminal = 3 } },
+ }, non_terminal.rules());
+}
+
+test "multiple-non-terminal-sequence" {
+ const allocator = std.testing.allocator;
+ var non_terminal = try Self.init("main", 0, allocator);
+ defer non_terminal.deinit();
+
+ var names = std.StringHashMap(usize).init(allocator);
+ defer names.deinit();
+ try names.put("main", 0);
+ try names.put("one", 1);
+ try names.put("two", 2);
+ try names.put("three", 3);
+
+ try non_terminal.parse_and_add_variants("main -> one two | two | three", &names);
+
+ try std.testing.expectEqualDeep(&[_][]const Character{
+ &[_] Character { Character { .non_terminal = 1 }, Character { .non_terminal = 2 }, },
+ &[_] Character { Character { .non_terminal = 2 } },
+ &[_] Character { Character { .non_terminal = 3 } },
+ }, non_terminal.rules());
+}
+
+test "single-terminal" {
+ const allocator = std.testing.allocator;
+ var non_terminal = try Self.init("main", 0, allocator);
+ defer non_terminal.deinit();
+
+ var names = std.StringHashMap(usize).init(allocator);
+ defer names.deinit();
+ try names.put("main", 0);
+ try names.put("one", 1);
+
+ try non_terminal.parse_and_add_variants("main -> 'one'", &names);
+
+ try std.testing.expectEqualDeep(&[_][]const Character{
+ &[_] Character {
+ Character { .terminal = 'o' },
+ Character { .terminal = 'n' },
+ Character { .terminal = 'e' }
+ },
+ }, non_terminal.rules());
+}
+
+test "single-terminal-with-escapes" {
+ const allocator = std.testing.allocator;
+ var non_terminal = try Self.init("main", 0, allocator);
+ defer non_terminal.deinit();
+
+ var names = std.StringHashMap(usize).init(allocator);
+ defer names.deinit();
+ try names.put("main", 0);
+
+ try non_terminal.parse_and_add_variants("main -> 'o\\r\\nn\\'e\\\\'", &names);
+
+ try std.testing.expectEqualDeep(&[_][]const Character{
+ &[_] Character {
+ Character { .terminal = 'o' },
+ Character { .terminal = '\r' },
+ Character { .terminal = '\n' },
+ Character { .terminal = 'n' },
+ Character { .terminal = '\'' },
+ Character { .terminal = 'e' },
+ Character { .terminal = '\\' },
+ },
+ }, non_terminal.rules());
+}
+
+test "mix" {
+ const allocator = std.testing.allocator;
+ var non_terminal = try Self.init("main", 0, allocator);
+ defer non_terminal.deinit();
+
+ var names = std.StringHashMap(usize).init(allocator);
+ defer names.deinit();
+ try names.put("main", 0);
+ try names.put("other", 1);
+ try names.put("some", 2);
+
+ try non_terminal.parse_and_add_variants("main -> 'ab' other | other some 'a' | main", &names);
- try writer.writeByte(']');
+ try std.testing.expectEqualDeep(&[_][]const Character{
+ &[_] Character {
+ Character { .terminal = 'a' },
+ Character { .terminal = 'b' },
+ Character { .non_terminal = 1 },
+ },
+ &[_] Character {
+ Character { .non_terminal = 1 },
+ Character { .non_terminal = 2 },
+ Character { .terminal = 'a' },
+ },
+ &[_] Character {
+ Character { .non_terminal = 0 },
+ },
+ }, non_terminal.rules());
}