const std = @import("std"); const Grammar = @import("grammar.zig"); const NonTerminal = @import("non-terminal.zig"); const Character = @import("character.zig").Character; const Generator = @import("generator.zig").Generator; const gss = @import("gss.zig"); const State = struct { const Self = @This(); pub const Context = struct { pub fn hash(_: @This(), s: Self) u64 { return std.hash.Wyhash.hash(0, std.mem.asBytes(&s)); } pub fn eql(_: @This(), a: Self, b: Self) bool { return a.id == b.id and a.rule_index == b.rule_index and a.inner_position == b.inner_position; } }; id: usize, rule_index: usize, inner_position: usize, pub inline fn next(self: *const Self) Self { var other = self.*; other .inner_position += 1; return other; } pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool { return self.inner_position == rule.len; } pub fn format( self: *const Self, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype, ) !void { _ = fmt; _ = options; try writer.print("[ {}, {}, {} ]", .{ self.id, self.rule_index, self.inner_position, }); } pub fn debug(self: *const Self, grammar: *Grammar) void { const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index]; std.debug.print("{*} state (", .{ self }); for (rule, 0..) |char, index| { if (index == self.inner_position) { std.debug.print("○", .{}); } std.debug.print("{}", .{char}); } if (rule.len == self.inner_position) { std.debug.print("○", .{}); } std.debug.print(")\n", .{}); } }; pub fn forward_and_consume( grammar: *Grammar, base: *gss.Node(State), graph: *gss.Graph(State), terminal: u8, queue: *std.ArrayList(*gss.Node(State)), node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80), allocator: std.mem.Allocator, ) !void { var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator); try forward_queue.append(.{ base.state, base }); while (forward_queue.popOrNull()) |checkpoint| { const last_state, const last_node = checkpoint; const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index]; if (last_state.inner_position == rule.len) { for (last_node.parents.items) |parent| { try forward_queue.append(.{parent.state.next(), parent}); } continue; } switch (rule[last_state.inner_position]) { .terminal => |t| if (t == terminal) { const node = try graph.clone(last_node, last_state.next()); try queue.append(node); }, .non_terminal => |n| { const non_terminal = grammar.non_terminal_by_id(n); for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| { if (grammar.is_next_char( non_terminal.rules()[rule_index], n, terminal, )) { const state = State { .id = n, .rule_index = rule_index, .inner_position = 0, }; if (node_cache.get(state)) |parent| { try parent.parents.append(last_node); } else { const parent = try graph.clone(last_node, last_state); const next = try graph.push(parent, state); try node_cache.put(state, next); try forward_queue.append(.{state, next}); } } } }, .epsilon => { try forward_queue.append(.{last_state.next(), last_node}); }, } } } pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; if (node.state.is_at_end_of_rule(rule)) { for (node.parents.items) |parent| { if (reaches_end_of_entry(grammar, parent)) { return true; } } return node.is_toplevel; } for (rule[node.state.inner_position..]) |character| { switch (character) { .terminal => return false, .non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END), else => {} } } return grammar.non_terminal_by_id(node.state.id).follows.is_set(Character.END); } pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool { var arena = std.heap.ArenaAllocator.init(inner_allocator); defer arena.deinit(); const allocator = arena.allocator(); const entry = grammar.entry_point(); var queues = [2]std.ArrayList(*gss.Node(State)) { std.ArrayList(*gss.Node(State)).init(allocator), std.ArrayList(*gss.Node(State)).init(allocator), }; defer for (queues) |queue| queue.deinit(); var processing_queue = &queues[0]; var graph = gss.Graph(State).init(allocator); for (0..entry.rules().len) |index| { const id = try graph.add_toplevel(State { .id = entry.id, .rule_index = index, .inner_position = 0, }); try processing_queue.append(id); } var next_processing_queue = &queues[1]; var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator); defer node_cache.deinit(); for (input) |character| { for (processing_queue.items) |node| { try forward_and_consume( grammar, node, &graph, character, next_processing_queue, &node_cache, allocator, ); node_cache.clearRetainingCapacity(); } const swap = processing_queue; processing_queue = next_processing_queue; next_processing_queue = swap; next_processing_queue.clearRetainingCapacity(); } for (processing_queue.items) |node| { if (reaches_end_of_entry(grammar, node)) { return true; } } return false; } test "expr" { const text = \\S -> B A \\A -> '+' B A \\A -> '' \\B -> D C \\C -> '*' D C \\C -> '' \\D -> '(' S ')' \\D -> 'a' \\D -> 'b' ; const input = "b+a*b"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - success" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const input = "aad"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - fail" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const input = "accd"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(!try check(&grammar, input, allocator)); } test "simple 0 - fuzzy" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); var generator = Generator(struct { const Self = @This(); pub fn next(_: *Self, n: usize) usize { return std.crypto.random.uintLessThan(usize, n); } }){}; for (0..100) |_| { const input = try generator.sentential_from_grammar(&grammar, 1000, allocator); defer allocator.free(input); try std.testing.expect(try check(&grammar, input, allocator)); } }