diff options
| author | Nathan Reiner <nathan@nathanreiner.xyz> | 2025-05-06 10:43:09 +0200 |
|---|---|---|
| committer | Nathan Reiner <nathan@nathanreiner.xyz> | 2025-05-06 10:43:09 +0200 |
| commit | eff19cc15a9bf4df60e7f90c3a7ee525c65266c0 (patch) | |
| tree | d6034b574e8e2218054d42caadbf25fa17862abf /src/recognizer.zig | |
| parent | e2f01d5df22704bfe62396a0f9a260f86edbde0e (diff) | |
add full grammar parsing
Diffstat (limited to 'src/recognizer.zig')
| -rw-r--r-- | src/recognizer.zig | 136 |
1 files changed, 79 insertions, 57 deletions
diff --git a/src/recognizer.zig b/src/recognizer.zig index 1783db1..25a7944 100644 --- a/src/recognizer.zig +++ b/src/recognizer.zig @@ -8,8 +8,8 @@ const gss = @import("gss.zig"); const State = struct { const Self = @This(); - name: u8, - rule: NonTerminal.Rule, + id: usize, + rule_index: usize, inner_position: usize, input_position: usize, @@ -22,13 +22,34 @@ const State = struct { _ = fmt; _ = options; - try writer.print("[ {c}, {s}, {}, {} ]", .{ - self.name, - self.rule, + try writer.print("[ {}, {}, {}, {} ]", .{ + self.id, + self.rule_index, self.inner_position, self.input_position, }); } + + pub fn debug(self: *const Self, grammar: *Grammar, input: []const u8) void { + const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index]; + std.debug.print("input ({s}○{s}) state (", .{ + input[0..self.input_position], + input[self.input_position..], + }); + + for (rule, 0..) |char, index| { + if (index == self.inner_position) { + std.debug.print("○", .{}); + } + + std.debug.print("{}", .{char}); + } + + if (rule.len == self.inner_position) { + std.debug.print("○", .{}); + } + std.debug.print(")\n", .{}); + } }; pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool { @@ -44,11 +65,11 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo defer for (queues) |queue| queue.deinit(); var processing_queue = &queues[0]; - for (entry.rules()) |rule| { + for (0..entry.rules().len) |index| { const node = try allocator.create(gss.Node(State)); node.* = gss.Node(State).init(State { - .name = entry.name, - .rule = rule, + .id = entry.id, + .rule_index = index, .inner_position = 0, .input_position = 0, }); @@ -60,10 +81,12 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo while (processing_queue.items.len > 0) { for (processing_queue.items) |node| { - if (node.state.inner_position == node.state.rule.len) { + const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; + + if (node.state.inner_position == rule.len) { if ( node.parent == null and - node.state.name == entry.name and + node.state.id == entry.id and node.state.input_position == input.len ) { return true; @@ -72,8 +95,8 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo const old_state, const parent_node = node.pop(allocator); if (parent_node) |parent| { const sibling = try parent.clone(State { - .name = parent.state.name, - .rule = parent.state.rule, + .id = parent.state.id, + .rule_index = parent.state.rule_index, .inner_position = parent.state.inner_position + 1, .input_position = old_state.input_position, }, allocator); @@ -82,12 +105,12 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo } else { const epsilon_only = node.state.input_position == input.len; - switch (node.state.rule[node.state.inner_position]) { + switch (rule[node.state.inner_position]) { .terminal => |t| { if (!epsilon_only and input[node.state.input_position] == t) { const sibling = try node.clone(State { - .name = node.state.name, - .rule = node.state.rule, + .id = node.state.id, + .rule_index = node.state.rule_index, .inner_position = node.state.inner_position + 1, .input_position = node.state.input_position + 1, }, allocator); @@ -95,14 +118,13 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo } }, .non_terminal => |n| { - const non_terminal = grammar.non_terminal_by_name(n); - const rules = non_terminal.rules(); + const non_terminal = grammar.non_terminal_by_id(n); if (!epsilon_only and non_terminal.first.is_set(input[node.state.input_position])) { - for (rules) |rule| { + for (0..non_terminal.rules().len) |index| { const child = try node.push(State { - .name = non_terminal.name, - .rule = rule, + .id = non_terminal.id, + .rule_index = index, .inner_position = 0, .input_position = node.state.input_position, }, allocator); @@ -112,8 +134,8 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo if (non_terminal.first.is_set(Character.EPSILON)) { const sibling = try node.clone(State { - .name = node.state.name, - .rule = node.state.rule, + .id = node.state.id, + .rule_index = node.state.rule_index, .inner_position = node.state.inner_position + 1, .input_position = node.state.input_position }, allocator); @@ -122,8 +144,8 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo }, .epsilon => { const sibling = try node.clone(State { - .name = node.state.name, - .rule = node.state.rule, + .id = node.state.id, + .rule_index = node.state.rule_index, .inner_position = node.state.inner_position + 1, .input_position = node.state.input_position }, allocator); @@ -145,83 +167,83 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo test "expr" { const text = \\S -> B A - \\A -> + B A - \\A -> _ + \\A -> '+' B A + \\A -> '' \\B -> D C - \\C -> * D C - \\C -> _ - \\D -> ( S ) - \\D -> a - \\D -> b + \\C -> '*' D C + \\C -> '' + \\D -> '(' S ')' + \\D -> 'a' + \\D -> 'b' ; const input = "b+a*b"; const allocator = std.testing.allocator; - var grammar = try Grammar.parse(text, allocator); - defer grammar.deinit(allocator); + var grammar = try Grammar.parse("S", text, allocator); + defer grammar.deinit(); try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - success" { const text = - \\S -> A S d + \\S -> A S 'd' \\S -> B S - \\S -> _ - \\A -> a - \\A -> c - \\B -> a - \\B -> b + \\S -> '' + \\A -> 'a' + \\A -> 'c' + \\B -> 'a' + \\B -> 'b' ; const input = "aad"; const allocator = std.testing.allocator; - var grammar = try Grammar.parse(text, allocator); - defer grammar.deinit(allocator); + var grammar = try Grammar.parse("S", text, allocator); + defer grammar.deinit(); try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - fail" { const text = - \\S -> A S d + \\S -> A S 'd' \\S -> B S - \\S -> _ - \\A -> a - \\A -> c - \\B -> a - \\B -> b + \\S -> '' + \\A -> 'a' + \\A -> 'c' + \\B -> 'a' + \\B -> 'b' ; const input = "accd"; const allocator = std.testing.allocator; - var grammar = try Grammar.parse(text, allocator); - defer grammar.deinit(allocator); + var grammar = try Grammar.parse("S", text, allocator); + defer grammar.deinit(); try std.testing.expect(!try check(&grammar, input, allocator)); } test "simple 0 - fuzzy" { const text = - \\S -> A S d + \\S -> A S 'd' \\S -> B S - \\S -> _ - \\A -> a - \\A -> c - \\B -> a - \\B -> b + \\S -> '' + \\A -> 'a' + \\A -> 'c' + \\B -> 'a' + \\B -> 'b' ; const allocator = std.testing.allocator; - var grammar = try Grammar.parse(text, allocator); - defer grammar.deinit(allocator); + var grammar = try Grammar.parse("S", text, allocator); + defer grammar.deinit(); var generator = Generator(struct { const Self = @This(); |