aboutsummaryrefslogtreecommitdiff
path: root/src/recognizer.zig
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2025-04-26 14:23:28 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2025-04-26 14:23:28 +0200
commitcf4d53c3eb35028839e6b267230c23df68b1e94a (patch)
treeac564add1e8b0ee1b9d111a7692ec2ab7fc0499e /src/recognizer.zig
parentf593da7580f423b1405f4705081368acef0b3c21 (diff)
first working implementation (unoptimized)
Diffstat (limited to 'src/recognizer.zig')
-rw-r--r--src/recognizer.zig240
1 files changed, 240 insertions, 0 deletions
diff --git a/src/recognizer.zig b/src/recognizer.zig
new file mode 100644
index 0000000..3adb7a4
--- /dev/null
+++ b/src/recognizer.zig
@@ -0,0 +1,240 @@
+const std = @import("std");
+const Grammar = @import("grammar.zig");
+const NonTerminal = @import("non-terminal.zig");
+const Character = @import("character.zig").Character;
+const Generator = @import("generator.zig").Generator;
+const gss = @import("gss.zig");
+
+const State = struct {
+ const Self = @This();
+
+ name: u8,
+ rule: NonTerminal.Rule,
+ inner_position: usize,
+ input_position: usize,
+
+ pub fn format(
+ self: *const Self,
+ comptime fmt: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = fmt;
+ _ = options;
+
+ try writer.print("[ {c}, {s}, {}, {} ]", .{
+ self.name,
+ self.rule,
+ self.inner_position,
+ self.input_position,
+ });
+ }
+};
+
+pub fn check(grammar: *Grammar, input: []const u8, allocator: std.mem.Allocator) !bool {
+ var stacks = std.ArrayList(std.ArrayList(State)).init(allocator);
+ defer {
+ for (stacks.items) |stack| {
+ stack.deinit();
+ }
+ stacks.deinit();
+ }
+
+ const entry = grammar.entry_point();
+
+ for (entry.rules()) |rule| {
+ const state = State {
+ .name = entry.name,
+ .rule = rule,
+ .inner_position = 0,
+ .input_position = 0,
+ };
+
+ var stack = std.ArrayList(State).init(allocator);
+ try stack.append(state);
+ try stacks.append(stack);
+ }
+
+ var remove_queue = std.ArrayList(usize).init(allocator);
+ defer remove_queue.deinit();
+ var add_queue = std.ArrayList(std.ArrayList(State)).init(allocator);
+ defer add_queue.deinit();
+
+ while (stacks.items.len > 0) {
+ for (stacks.items, 0..) |*stack, stack_index| {
+ var state: *State = &stack.items[stack.items.len - 1];
+
+ if (state.inner_position == state.rule.len) {
+ if (
+ stack.items.len == 1 and
+ state.name == entry.name and
+ state.input_position == input.len
+ ) {
+ return true;
+ }
+
+ const old_state = stack.pop();
+ if (stack.items.len > 0) {
+ state = &stack.items[stack.items.len - 1];
+ state.inner_position += 1;
+ state.input_position = old_state.input_position;
+ } else {
+ try remove_queue.append(stack_index);
+ }
+ continue;
+ }
+
+ const epsilon_only = state.input_position == input.len;
+
+ switch (state.rule[state.inner_position]) {
+ .terminal => |t| {
+ if (!epsilon_only and input[state.input_position] == t) {
+ state.inner_position += 1;
+ state.input_position += 1;
+ } else {
+ try remove_queue.append(stack_index);
+ }
+ },
+ .non_terminal => |n| {
+ const non_terminal = grammar.non_terminal_by_name(n);
+ const rules = non_terminal.rules();
+
+ if (!epsilon_only and non_terminal.first.is_set(input[state.input_position])) {
+ for (rules) |rule| {
+ const next_state = State {
+ .name = non_terminal.name,
+ .rule = rule,
+ .inner_position = 0,
+ .input_position = state.input_position,
+ };
+ var new_stack = try stack.clone();
+ try new_stack.append(next_state);
+ try add_queue.append(new_stack);
+ }
+ }
+
+ if (non_terminal.first.is_set(Character.EPSILON)) {
+ state.inner_position += 1;
+ } else {
+ try remove_queue.append(stack_index);
+ }
+ },
+ .epsilon => {
+ state.inner_position += 1;
+ },
+ }
+ }
+
+ for (remove_queue.items, 0..) |index, offset| {
+ const stack = stacks.orderedRemove(index - offset);
+ stack.deinit();
+ }
+
+ for (add_queue.items) |stack| {
+ try stacks.append(stack);
+ }
+
+ remove_queue.clearAndFree();
+ add_queue.clearAndFree();
+ }
+
+ return false;
+}
+
+test "expr" {
+ const text =
+ \\S -> B A
+ \\A -> + B A
+ \\A -> _
+ \\B -> D C
+ \\C -> * D C
+ \\C -> _
+ \\D -> ( S )
+ \\D -> a
+ \\D -> b
+ ;
+
+ const input = "b+a*b";
+
+ const allocator = std.testing.allocator;
+
+ var grammar = try Grammar.parse(text, allocator);
+ defer grammar.deinit(allocator);
+
+ try std.testing.expect(try check(&grammar, input, allocator));
+}
+
+test "simple 0 - success" {
+ const text =
+ \\S -> A S d
+ \\S -> B S
+ \\S -> _
+ \\A -> a
+ \\A -> c
+ \\B -> a
+ \\B -> b
+ ;
+
+ const input = "aad";
+
+ const allocator = std.testing.allocator;
+
+ var grammar = try Grammar.parse(text, allocator);
+ defer grammar.deinit(allocator);
+
+ try std.testing.expect(try check(&grammar, input, allocator));
+}
+
+test "simple 0 - fail" {
+ const text =
+ \\S -> A S d
+ \\S -> B S
+ \\S -> _
+ \\A -> a
+ \\A -> c
+ \\B -> a
+ \\B -> b
+ ;
+
+ const input = "accd";
+
+ const allocator = std.testing.allocator;
+
+ var grammar = try Grammar.parse(text, allocator);
+ defer grammar.deinit(allocator);
+
+ try std.testing.expect(!try check(&grammar, input, allocator));
+}
+
+test "simple 0 - fuzzy" {
+ const text =
+ \\S -> A S d
+ \\S -> B S
+ \\S -> _
+ \\A -> a
+ \\A -> c
+ \\B -> a
+ \\B -> b
+ ;
+
+ const allocator = std.testing.allocator;
+
+ var grammar = try Grammar.parse(text, allocator);
+ defer grammar.deinit(allocator);
+
+ var generator = Generator(struct {
+ const Self = @This();
+
+ pub fn next(_: *Self, n: usize) usize {
+ return std.crypto.random.uintLessThan(usize, n);
+ }
+ }){};
+
+ for (0..100) |_| {
+ const input = try generator.sentential_from_grammar(&grammar, 1000, allocator);
+ defer allocator.free(input);
+
+ try std.testing.expect(try check(&grammar, input, allocator));
+ }
+
+}