aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2025-05-21 19:44:11 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2025-05-21 19:47:36 +0200
commit2ee78f3c3884a315baffee7d5e9244f52c59c10b (patch)
tree65919d8a6f54be7b67be9c12b108a47c645f3275 /src
parente28bda801c1d7b9953298993b56408ce4202084f (diff)
fix unit tests
Diffstat (limited to 'src')
-rw-r--r--src/grammar.zig95
-rw-r--r--src/non-terminal.zig54
-rw-r--r--src/recognizer.zig28
3 files changed, 108 insertions, 69 deletions
diff --git a/src/grammar.zig b/src/grammar.zig
index 3c586f3..9b794a0 100644
--- a/src/grammar.zig
+++ b/src/grammar.zig
@@ -1,12 +1,12 @@
const std = @import("std");
+const Rule = @import("rule.zig");
const Character = @import("character.zig").Character;
const NonTerminal = @import("non-terminal.zig");
const Self = @This();
non_terminals: []NonTerminal,
-entry_id: usize,
allocator: std.mem.Allocator,
pub fn parse(entry: []const u8, buffer: []const u8, allocator: std.mem.Allocator) !Self {
@@ -18,19 +18,30 @@ pub fn parse(entry: []const u8, buffer: []const u8, allocator: std.mem.Allocator
const name = try NonTerminal.parse_name(line);
if (!names.contains(name)) {
- try names.put(name, names.count());
+ try names.put(name, names.count() + 1);
}
}
lines = std.mem.tokenizeScalar(u8, buffer, '\n');
var self = Self {
- .non_terminals = try allocator.alloc(NonTerminal, names.count()),
- .entry_id = names.get(entry) orelse return error.MissingRule,
+ .non_terminals = try allocator.alloc(NonTerminal, names.count() + 1),
.allocator = allocator,
};
errdefer self.deinit();
{
+ self.non_terminals[0] = try NonTerminal.init("_start", 0, allocator);
+ const init_rule = try allocator.alloc(Character, 1);
+ init_rule[0] = Character {
+ .non_terminal = names.get(entry) orelse return error.MissingRule,
+ };
+
+ try self.non_terminals[0].add_rule(Rule {
+ .items = init_rule
+ });
+ }
+
+ {
var iterator = names.iterator();
while (iterator.next()) |name_entry| {
self.non_terminals[name_entry.value_ptr.*] = try NonTerminal.init(
@@ -68,7 +79,7 @@ pub inline fn non_terminal_by_id(self: *Self, id: usize) *NonTerminal {
}
pub inline fn entry_point(self: *Self) *NonTerminal {
- return self.non_terminal_by_id(self.entry_id);
+ return self.non_terminal_by_id(0);
}
fn generate_first(self: *Self) void {
@@ -254,17 +265,17 @@ test "expr" {
var grammar = try Self.parse("S", text, allocator);
defer grammar.deinit();
- try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { '(', 'a', 'b' });
- try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { '+', Character.EPSILON });
- try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { '(', 'a', 'b' });
- try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { '*', Character.EPSILON });
- try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { '+', Character.EPSILON });
+ try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { '*', Character.EPSILON });
+ try grammar.non_terminal_by_id(5).first.expect(&[_]u9 { '(', 'a', 'b' });
- try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { ')', Character.END });
try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { ')', Character.END });
- try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { '+', ')', Character.END });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { ')', Character.END });
try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { '+', ')', Character.END });
- try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { '*', '+', ')', Character.END });
+ try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { '+', ')', Character.END });
+ try grammar.non_terminal_by_id(5).follows.expect(&[_]u9 { '*', '+', ')', Character.END });
}
test "sample 0" {
@@ -279,15 +290,15 @@ test "sample 0" {
var grammar = try Self.parse("S", text, allocator);
defer grammar.deinit();
- try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON });
- try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'd', 'g', 'h', Character.EPSILON });
- try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'g', Character.EPSILON });
- try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'h', Character.EPSILON });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd', 'g', 'h', Character.EPSILON });
+ try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'g', Character.EPSILON });
+ try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { 'h', Character.EPSILON });
- try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END });
- try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'g', 'h', Character.END });
- try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'g', 'h', Character.END });
- try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'b', 'g', 'h', Character.END });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'g', 'h', Character.END });
+ try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'a', 'g', 'h', Character.END });
+ try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { 'b', 'g', 'h', Character.END });
}
test "sample 1" {
@@ -303,13 +314,13 @@ test "sample 1" {
var grammar = try Self.parse("S", text, allocator);
defer grammar.deinit();
- try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a' });
- try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'c', Character.EPSILON });
- try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd', Character.EPSILON });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a' });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'c', Character.EPSILON });
+ try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'd', Character.EPSILON });
- try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END });
- try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'b', 'd' });
- try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'b' });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'b', 'd' });
+ try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'b' });
}
test "sample 2" {
@@ -329,17 +340,17 @@ test "sample 2" {
var grammar = try Self.parse("S", text, allocator);
defer grammar.deinit();
- try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a', 'c', 'e' });
- try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c' });
- try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd' });
- try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'e', Character.EPSILON });
- try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { 'f', Character.EPSILON });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c', 'e' });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'a', 'c' });
+ try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'd' });
+ try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { 'e', Character.EPSILON });
+ try grammar.non_terminal_by_id(5).first.expect(&[_]u9 { 'f', Character.EPSILON });
- try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END });
- try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'd' });
- try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'd' });
try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { Character.END });
- try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { 'a' });
+ try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(5).follows.expect(&[_]u9 { 'a' });
}
test "sample 3" {
@@ -357,11 +368,11 @@ test "sample 3" {
var grammar = try Self.parse("S", text, allocator);
defer grammar.deinit();
- try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a', 'b', 'c', Character.EPSILON });
- try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c' });
- try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'a', 'b' });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'b', 'c', Character.EPSILON });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'a', 'c' });
+ try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'a', 'b' });
- try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { 'd', Character.END });
- try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd' });
- try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd', Character.END });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'd', Character.END });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd' });
+ try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd', Character.END });
}
diff --git a/src/non-terminal.zig b/src/non-terminal.zig
index fdab32a..6eaf068 100644
--- a/src/non-terminal.zig
+++ b/src/non-terminal.zig
@@ -155,9 +155,10 @@ test "single-non-terminal" {
try non_terminal.parse_and_add_variants("main -> one", &names);
- try std.testing.expectEqualDeep(&[_][]const Character{
- &[_] Character { Character { .non_terminal = 1 } }
- }, non_terminal.rules());
+ try std.testing.expectEqualDeep(
+ &[_] Character { Character { .non_terminal = 1 } },
+ non_terminal.rules()[0].items
+ );
}
test "multiple-non-terminal" {
@@ -174,11 +175,12 @@ test "multiple-non-terminal" {
try non_terminal.parse_and_add_variants("main -> one | two | three", &names);
- try std.testing.expectEqualDeep(&[_][]const Character{
- &[_] Character { Character { .non_terminal = 1 } },
- &[_] Character { Character { .non_terminal = 2 } },
- &[_] Character { Character { .non_terminal = 3 } },
- }, non_terminal.rules());
+ inline for (0..3) |i| {
+ try std.testing.expectEqualDeep(
+ &[_] Character { Character { .non_terminal = i + 1 } },
+ non_terminal.rules()[i].items
+ );
+ }
}
test "multiple-non-terminal-sequence" {
@@ -195,11 +197,20 @@ test "multiple-non-terminal-sequence" {
try non_terminal.parse_and_add_variants("main -> one two | two | three", &names);
- try std.testing.expectEqualDeep(&[_][]const Character{
+ try std.testing.expectEqualDeep(
&[_] Character { Character { .non_terminal = 1 }, Character { .non_terminal = 2 }, },
+ non_terminal.rules()[0].items
+ );
+
+ try std.testing.expectEqualDeep(
&[_] Character { Character { .non_terminal = 2 } },
+ non_terminal.rules()[1].items
+ );
+
+ try std.testing.expectEqualDeep(
&[_] Character { Character { .non_terminal = 3 } },
- }, non_terminal.rules());
+ non_terminal.rules()[2].items
+ );
}
test "single-terminal" {
@@ -214,13 +225,14 @@ test "single-terminal" {
try non_terminal.parse_and_add_variants("main -> 'one'", &names);
- try std.testing.expectEqualDeep(&[_][]const Character{
+ try std.testing.expectEqualDeep(
&[_] Character {
Character { .terminal = 'o' },
Character { .terminal = 'n' },
Character { .terminal = 'e' }
},
- }, non_terminal.rules());
+ non_terminal.rules()[0].items
+ );
}
test "single-terminal-with-escapes" {
@@ -234,7 +246,7 @@ test "single-terminal-with-escapes" {
try non_terminal.parse_and_add_variants("main -> 'o\\r\\nn\\'e\\\\'", &names);
- try std.testing.expectEqualDeep(&[_][]const Character{
+ try std.testing.expectEqualDeep(
&[_] Character {
Character { .terminal = 'o' },
Character { .terminal = '\r' },
@@ -244,7 +256,8 @@ test "single-terminal-with-escapes" {
Character { .terminal = 'e' },
Character { .terminal = '\\' },
},
- }, non_terminal.rules());
+ non_terminal.rules()[0].items
+ );
}
test "mix" {
@@ -260,19 +273,28 @@ test "mix" {
try non_terminal.parse_and_add_variants("main -> 'ab' other | other some 'a' | main", &names);
- try std.testing.expectEqualDeep(&[_][]const Character{
+ try std.testing.expectEqualDeep(
&[_] Character {
Character { .terminal = 'a' },
Character { .terminal = 'b' },
Character { .non_terminal = 1 },
},
+ non_terminal.rules()[0].items,
+ );
+
+ try std.testing.expectEqualDeep(
&[_] Character {
Character { .non_terminal = 1 },
Character { .non_terminal = 2 },
Character { .terminal = 'a' },
},
+ non_terminal.rules()[1].items,
+ );
+
+ try std.testing.expectEqualDeep(
&[_] Character {
Character { .non_terminal = 0 },
},
- }, non_terminal.rules());
+ non_terminal.rules()[2].items,
+ );
}
diff --git a/src/recognizer.zig b/src/recognizer.zig
index 696a80a..a6fe721 100644
--- a/src/recognizer.zig
+++ b/src/recognizer.zig
@@ -55,7 +55,7 @@ const State = struct {
const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index];
std.debug.print("{*} state (", .{ self });
- for (rule, 0..) |char, index| {
+ for (rule.items, 0..) |char, index| {
if (index == self.inner_position) {
std.debug.print("○", .{});
}
@@ -63,7 +63,7 @@ const State = struct {
std.debug.print("{}", .{char});
}
- if (rule.len == self.inner_position) {
+ if (rule.items.len == self.inner_position) {
std.debug.print("○", .{});
}
std.debug.print(")\n", .{});
@@ -72,25 +72,31 @@ const State = struct {
pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index];
- if (node.state.is_at_end_of_rule(rule)) {
- for (node.parents.items) |parent| {
- if (reaches_end_of_entry(grammar, parent)) {
- return true;
- }
- }
- return node.is_toplevel;
+ if (node.parents.items.len == 0) {
+ return true;
}
for (rule.items[node.state.inner_position..]) |character| {
switch (character) {
.terminal => return false,
- .non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END),
+ .non_terminal => |n| {
+ if (!grammar.non_terminal_by_id(n).first.is_set(Character.EPSILON)) {
+ return false;
+ }
+ },
else => {}
}
}
- return grammar.non_terminal_by_id(node.state.id).follows.is_set(Character.END);
+ for (node.parents.items) |parent| {
+ parent.state = parent.state.next();
+ if (reaches_end_of_entry(grammar, parent)) {
+ return true;
+ }
+ }
+
+ return false;
}
pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool {