diff options
| author | Nathan Reiner <nathan@nathanreiner.xyz> | 2025-04-26 14:23:28 +0200 |
|---|---|---|
| committer | Nathan Reiner <nathan@nathanreiner.xyz> | 2025-04-26 14:23:28 +0200 |
| commit | cf4d53c3eb35028839e6b267230c23df68b1e94a (patch) | |
| tree | ac564add1e8b0ee1b9d111a7692ec2ab7fc0499e /src/recognizer.zig | |
| parent | f593da7580f423b1405f4705081368acef0b3c21 (diff) | |
first working implementation (unoptimized)
Diffstat (limited to 'src/recognizer.zig')
| -rw-r--r-- | src/recognizer.zig | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/src/recognizer.zig b/src/recognizer.zig new file mode 100644 index 0000000..3adb7a4 --- /dev/null +++ b/src/recognizer.zig @@ -0,0 +1,240 @@ +const std = @import("std"); +const Grammar = @import("grammar.zig"); +const NonTerminal = @import("non-terminal.zig"); +const Character = @import("character.zig").Character; +const Generator = @import("generator.zig").Generator; +const gss = @import("gss.zig"); + +const State = struct { + const Self = @This(); + + name: u8, + rule: NonTerminal.Rule, + inner_position: usize, + input_position: usize, + + pub fn format( + self: *const Self, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + + try writer.print("[ {c}, {s}, {}, {} ]", .{ + self.name, + self.rule, + self.inner_position, + self.input_position, + }); + } +}; + +pub fn check(grammar: *Grammar, input: []const u8, allocator: std.mem.Allocator) !bool { + var stacks = std.ArrayList(std.ArrayList(State)).init(allocator); + defer { + for (stacks.items) |stack| { + stack.deinit(); + } + stacks.deinit(); + } + + const entry = grammar.entry_point(); + + for (entry.rules()) |rule| { + const state = State { + .name = entry.name, + .rule = rule, + .inner_position = 0, + .input_position = 0, + }; + + var stack = std.ArrayList(State).init(allocator); + try stack.append(state); + try stacks.append(stack); + } + + var remove_queue = std.ArrayList(usize).init(allocator); + defer remove_queue.deinit(); + var add_queue = std.ArrayList(std.ArrayList(State)).init(allocator); + defer add_queue.deinit(); + + while (stacks.items.len > 0) { + for (stacks.items, 0..) |*stack, stack_index| { + var state: *State = &stack.items[stack.items.len - 1]; + + if (state.inner_position == state.rule.len) { + if ( + stack.items.len == 1 and + state.name == entry.name and + state.input_position == input.len + ) { + return true; + } + + const old_state = stack.pop(); + if (stack.items.len > 0) { + state = &stack.items[stack.items.len - 1]; + state.inner_position += 1; + state.input_position = old_state.input_position; + } else { + try remove_queue.append(stack_index); + } + continue; + } + + const epsilon_only = state.input_position == input.len; + + switch (state.rule[state.inner_position]) { + .terminal => |t| { + if (!epsilon_only and input[state.input_position] == t) { + state.inner_position += 1; + state.input_position += 1; + } else { + try remove_queue.append(stack_index); + } + }, + .non_terminal => |n| { + const non_terminal = grammar.non_terminal_by_name(n); + const rules = non_terminal.rules(); + + if (!epsilon_only and non_terminal.first.is_set(input[state.input_position])) { + for (rules) |rule| { + const next_state = State { + .name = non_terminal.name, + .rule = rule, + .inner_position = 0, + .input_position = state.input_position, + }; + var new_stack = try stack.clone(); + try new_stack.append(next_state); + try add_queue.append(new_stack); + } + } + + if (non_terminal.first.is_set(Character.EPSILON)) { + state.inner_position += 1; + } else { + try remove_queue.append(stack_index); + } + }, + .epsilon => { + state.inner_position += 1; + }, + } + } + + for (remove_queue.items, 0..) |index, offset| { + const stack = stacks.orderedRemove(index - offset); + stack.deinit(); + } + + for (add_queue.items) |stack| { + try stacks.append(stack); + } + + remove_queue.clearAndFree(); + add_queue.clearAndFree(); + } + + return false; +} + +test "expr" { + const text = + \\S -> B A + \\A -> + B A + \\A -> _ + \\B -> D C + \\C -> * D C + \\C -> _ + \\D -> ( S ) + \\D -> a + \\D -> b + ; + + const input = "b+a*b"; + + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse(text, allocator); + defer grammar.deinit(allocator); + + try std.testing.expect(try check(&grammar, input, allocator)); +} + +test "simple 0 - success" { + const text = + \\S -> A S d + \\S -> B S + \\S -> _ + \\A -> a + \\A -> c + \\B -> a + \\B -> b + ; + + const input = "aad"; + + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse(text, allocator); + defer grammar.deinit(allocator); + + try std.testing.expect(try check(&grammar, input, allocator)); +} + +test "simple 0 - fail" { + const text = + \\S -> A S d + \\S -> B S + \\S -> _ + \\A -> a + \\A -> c + \\B -> a + \\B -> b + ; + + const input = "accd"; + + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse(text, allocator); + defer grammar.deinit(allocator); + + try std.testing.expect(!try check(&grammar, input, allocator)); +} + +test "simple 0 - fuzzy" { + const text = + \\S -> A S d + \\S -> B S + \\S -> _ + \\A -> a + \\A -> c + \\B -> a + \\B -> b + ; + + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse(text, allocator); + defer grammar.deinit(allocator); + + var generator = Generator(struct { + const Self = @This(); + + pub fn next(_: *Self, n: usize) usize { + return std.crypto.random.uintLessThan(usize, n); + } + }){}; + + for (0..100) |_| { + const input = try generator.sentential_from_grammar(&grammar, 1000, allocator); + defer allocator.free(input); + + try std.testing.expect(try check(&grammar, input, allocator)); + } + +} |