From 79a5816b2643efef936651ba6384af616af9a2fb Mon Sep 17 00:00:00 2001 From: Nathan Reiner Date: Wed, 21 May 2025 19:44:11 +0200 Subject: fix unit tests --- src/grammar.zig | 95 +++++++++++++++++++++++++++++----------------------- src/non-terminal.zig | 54 ++++++++++++++++++++--------- src/recognizer.zig | 29 ++++++++++------ 3 files changed, 109 insertions(+), 69 deletions(-) (limited to 'src') diff --git a/src/grammar.zig b/src/grammar.zig index 3c586f3..9b794a0 100644 --- a/src/grammar.zig +++ b/src/grammar.zig @@ -1,12 +1,12 @@ const std = @import("std"); +const Rule = @import("rule.zig"); const Character = @import("character.zig").Character; const NonTerminal = @import("non-terminal.zig"); const Self = @This(); non_terminals: []NonTerminal, -entry_id: usize, allocator: std.mem.Allocator, pub fn parse(entry: []const u8, buffer: []const u8, allocator: std.mem.Allocator) !Self { @@ -18,18 +18,29 @@ pub fn parse(entry: []const u8, buffer: []const u8, allocator: std.mem.Allocator const name = try NonTerminal.parse_name(line); if (!names.contains(name)) { - try names.put(name, names.count()); + try names.put(name, names.count() + 1); } } lines = std.mem.tokenizeScalar(u8, buffer, '\n'); var self = Self { - .non_terminals = try allocator.alloc(NonTerminal, names.count()), - .entry_id = names.get(entry) orelse return error.MissingRule, + .non_terminals = try allocator.alloc(NonTerminal, names.count() + 1), .allocator = allocator, }; errdefer self.deinit(); + { + self.non_terminals[0] = try NonTerminal.init("_start", 0, allocator); + const init_rule = try allocator.alloc(Character, 1); + init_rule[0] = Character { + .non_terminal = names.get(entry) orelse return error.MissingRule, + }; + + try self.non_terminals[0].add_rule(Rule { + .items = init_rule + }); + } + { var iterator = names.iterator(); while (iterator.next()) |name_entry| { @@ -68,7 +79,7 @@ pub inline fn non_terminal_by_id(self: *Self, id: usize) *NonTerminal { } pub inline fn entry_point(self: *Self) *NonTerminal { - return self.non_terminal_by_id(self.entry_id); + return self.non_terminal_by_id(0); } fn generate_first(self: *Self) void { @@ -254,17 +265,17 @@ test "expr" { var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); - try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { '(', 'a', 'b' }); - try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { '+', Character.EPSILON }); - try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { '(', 'a', 'b' }); - try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { '*', Character.EPSILON }); - try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { '(', 'a', 'b' }); + try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { '(', 'a', 'b' }); + try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { '+', Character.EPSILON }); + try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { '(', 'a', 'b' }); + try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { '*', Character.EPSILON }); + try grammar.non_terminal_by_id(5).first.expect(&[_]u9 { '(', 'a', 'b' }); - try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { ')', Character.END }); try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { ')', Character.END }); - try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { '+', ')', Character.END }); + try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { ')', Character.END }); try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { '+', ')', Character.END }); - try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { '*', '+', ')', Character.END }); + try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { '+', ')', Character.END }); + try grammar.non_terminal_by_id(5).follows.expect(&[_]u9 { '*', '+', ')', Character.END }); } test "sample 0" { @@ -279,15 +290,15 @@ test "sample 0" { var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); - try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON }); - try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'd', 'g', 'h', Character.EPSILON }); - try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'g', Character.EPSILON }); - try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'h', Character.EPSILON }); + try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON }); + try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd', 'g', 'h', Character.EPSILON }); + try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'g', Character.EPSILON }); + try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { 'h', Character.EPSILON }); - try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END }); - try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'g', 'h', Character.END }); - try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'g', 'h', Character.END }); - try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'b', 'g', 'h', Character.END }); + try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { Character.END }); + try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'g', 'h', Character.END }); + try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'a', 'g', 'h', Character.END }); + try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { 'b', 'g', 'h', Character.END }); } test "sample 1" { @@ -303,13 +314,13 @@ test "sample 1" { var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); - try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a' }); - try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'c', Character.EPSILON }); - try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd', Character.EPSILON }); + try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a' }); + try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'c', Character.EPSILON }); + try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'd', Character.EPSILON }); - try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END }); - try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'b', 'd' }); - try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'b' }); + try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { Character.END }); + try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'b', 'd' }); + try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'b' }); } test "sample 2" { @@ -329,17 +340,17 @@ test "sample 2" { var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); - try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a', 'c', 'e' }); - try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c' }); - try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd' }); - try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'e', Character.EPSILON }); - try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { 'f', Character.EPSILON }); + try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c', 'e' }); + try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'a', 'c' }); + try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'd' }); + try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { 'e', Character.EPSILON }); + try grammar.non_terminal_by_id(5).first.expect(&[_]u9 { 'f', Character.EPSILON }); - try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END }); - try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'd' }); - try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { Character.END }); + try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { Character.END }); + try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'd' }); try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { Character.END }); - try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { 'a' }); + try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { Character.END }); + try grammar.non_terminal_by_id(5).follows.expect(&[_]u9 { 'a' }); } test "sample 3" { @@ -357,11 +368,11 @@ test "sample 3" { var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); - try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a', 'b', 'c', Character.EPSILON }); - try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c' }); - try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'a', 'b' }); + try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'b', 'c', Character.EPSILON }); + try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'a', 'c' }); + try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'a', 'b' }); - try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { 'd', Character.END }); - try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd' }); - try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd', Character.END }); + try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'd', Character.END }); + try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd' }); + try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd', Character.END }); } diff --git a/src/non-terminal.zig b/src/non-terminal.zig index fdab32a..6eaf068 100644 --- a/src/non-terminal.zig +++ b/src/non-terminal.zig @@ -155,9 +155,10 @@ test "single-non-terminal" { try non_terminal.parse_and_add_variants("main -> one", &names); - try std.testing.expectEqualDeep(&[_][]const Character{ - &[_] Character { Character { .non_terminal = 1 } } - }, non_terminal.rules()); + try std.testing.expectEqualDeep( + &[_] Character { Character { .non_terminal = 1 } }, + non_terminal.rules()[0].items + ); } test "multiple-non-terminal" { @@ -174,11 +175,12 @@ test "multiple-non-terminal" { try non_terminal.parse_and_add_variants("main -> one | two | three", &names); - try std.testing.expectEqualDeep(&[_][]const Character{ - &[_] Character { Character { .non_terminal = 1 } }, - &[_] Character { Character { .non_terminal = 2 } }, - &[_] Character { Character { .non_terminal = 3 } }, - }, non_terminal.rules()); + inline for (0..3) |i| { + try std.testing.expectEqualDeep( + &[_] Character { Character { .non_terminal = i + 1 } }, + non_terminal.rules()[i].items + ); + } } test "multiple-non-terminal-sequence" { @@ -195,11 +197,20 @@ test "multiple-non-terminal-sequence" { try non_terminal.parse_and_add_variants("main -> one two | two | three", &names); - try std.testing.expectEqualDeep(&[_][]const Character{ + try std.testing.expectEqualDeep( &[_] Character { Character { .non_terminal = 1 }, Character { .non_terminal = 2 }, }, + non_terminal.rules()[0].items + ); + + try std.testing.expectEqualDeep( &[_] Character { Character { .non_terminal = 2 } }, + non_terminal.rules()[1].items + ); + + try std.testing.expectEqualDeep( &[_] Character { Character { .non_terminal = 3 } }, - }, non_terminal.rules()); + non_terminal.rules()[2].items + ); } test "single-terminal" { @@ -214,13 +225,14 @@ test "single-terminal" { try non_terminal.parse_and_add_variants("main -> 'one'", &names); - try std.testing.expectEqualDeep(&[_][]const Character{ + try std.testing.expectEqualDeep( &[_] Character { Character { .terminal = 'o' }, Character { .terminal = 'n' }, Character { .terminal = 'e' } }, - }, non_terminal.rules()); + non_terminal.rules()[0].items + ); } test "single-terminal-with-escapes" { @@ -234,7 +246,7 @@ test "single-terminal-with-escapes" { try non_terminal.parse_and_add_variants("main -> 'o\\r\\nn\\'e\\\\'", &names); - try std.testing.expectEqualDeep(&[_][]const Character{ + try std.testing.expectEqualDeep( &[_] Character { Character { .terminal = 'o' }, Character { .terminal = '\r' }, @@ -244,7 +256,8 @@ test "single-terminal-with-escapes" { Character { .terminal = 'e' }, Character { .terminal = '\\' }, }, - }, non_terminal.rules()); + non_terminal.rules()[0].items + ); } test "mix" { @@ -260,19 +273,28 @@ test "mix" { try non_terminal.parse_and_add_variants("main -> 'ab' other | other some 'a' | main", &names); - try std.testing.expectEqualDeep(&[_][]const Character{ + try std.testing.expectEqualDeep( &[_] Character { Character { .terminal = 'a' }, Character { .terminal = 'b' }, Character { .non_terminal = 1 }, }, + non_terminal.rules()[0].items, + ); + + try std.testing.expectEqualDeep( &[_] Character { Character { .non_terminal = 1 }, Character { .non_terminal = 2 }, Character { .terminal = 'a' }, }, + non_terminal.rules()[1].items, + ); + + try std.testing.expectEqualDeep( &[_] Character { Character { .non_terminal = 0 }, }, - }, non_terminal.rules()); + non_terminal.rules()[2].items, + ); } diff --git a/src/recognizer.zig b/src/recognizer.zig index 696a80a..4001e2c 100644 --- a/src/recognizer.zig +++ b/src/recognizer.zig @@ -55,7 +55,7 @@ const State = struct { const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index]; std.debug.print("{*} state (", .{ self }); - for (rule, 0..) |char, index| { + for (rule.items, 0..) |char, index| { if (index == self.inner_position) { std.debug.print("○", .{}); } @@ -63,7 +63,7 @@ const State = struct { std.debug.print("{}", .{char}); } - if (rule.len == self.inner_position) { + if (rule.items.len == self.inner_position) { std.debug.print("○", .{}); } std.debug.print(")\n", .{}); @@ -72,25 +72,32 @@ const State = struct { pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; - if (node.state.is_at_end_of_rule(rule)) { - for (node.parents.items) |parent| { - if (reaches_end_of_entry(grammar, parent)) { - return true; - } - } - return node.is_toplevel; + if (node.parents.items.len == 0) { + return true; } for (rule.items[node.state.inner_position..]) |character| { switch (character) { .terminal => return false, - .non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END), + .non_terminal => |n| { + if (!grammar.non_terminal_by_id(n).first.is_set(Character.EPSILON)) { + std.debug.print("HERE {}\n", .{n}); + return false; + } + }, else => {} } } - return grammar.non_terminal_by_id(node.state.id).follows.is_set(Character.END); + for (node.parents.items) |parent| { + parent.state = parent.state.next(); + if (reaches_end_of_entry(grammar, parent)) { + return true; + } + } + + return false; } pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool { -- cgit v1.2.3-70-g09d2