const std = @import("std"); const Grammar = @import("grammar.zig"); const NonTerminal = @import("non-terminal.zig"); const Rule = @import("rule.zig"); const Character = @import("character.zig").Character; const Generator = @import("generator.zig").Generator; const gss = @import("gss.zig"); const State = struct { const Self = @This(); pub const Context = struct { pub fn hash(_: @This(), s: Self) u64 { return std.hash.Wyhash.hash(0, std.mem.asBytes(&s)); } pub fn eql(_: @This(), a: Self, b: Self) bool { return a.id == b.id and a.rule_index == b.rule_index and a.inner_position == b.inner_position; } }; id: usize, rule_index: usize, inner_position: usize, pub inline fn next(self: *const Self) Self { var other = self.*; other .inner_position += 1; return other; } pub inline fn is_at_end_of_rule(self: *Self, rule: Rule) bool { return self.inner_position == rule.items.len; } pub fn format( self: *const Self, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype, ) !void { _ = fmt; _ = options; try writer.print("[ {}, {}, {} ]", .{ self.id, self.rule_index, self.inner_position, }); } pub fn debug(self: *const Self, grammar: *Grammar) void { const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index]; std.debug.print("{*} state (", .{ self }); for (rule.items, 0..) |char, index| { if (index == self.inner_position) { std.debug.print("○", .{}); } std.debug.print("{}", .{char}); } if (rule.items.len == self.inner_position) { std.debug.print("○", .{}); } std.debug.print(")\n", .{}); } }; pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; if (node.parents.items.len == 0) { return true; } for (rule.items[node.state.inner_position..]) |character| { switch (character) { .terminal => return false, .non_terminal => |n| { if (!grammar.non_terminal_by_id(n).first.is_set(Character.EPSILON)) { return false; } }, else => {} } } for (node.parents.items) |parent| { parent.state = parent.state.next(); if (reaches_end_of_entry(grammar, parent)) { return true; } } return false; } pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool { var arena = std.heap.ArenaAllocator.init(inner_allocator); defer arena.deinit(); const allocator = arena.allocator(); const entry = grammar.entry_point(); var queues = [2]std.ArrayList(*gss.Node(State)) { std.ArrayList(*gss.Node(State)).init(allocator), std.ArrayList(*gss.Node(State)).init(allocator), }; defer for (queues) |queue| queue.deinit(); var processing_queue = &queues[0]; var graph = gss.Graph(State).init(allocator); for (0..entry.rules().len) |index| { const id = try graph.add_toplevel(State { .id = entry.id, .rule_index = index, .inner_position = 0, }); try processing_queue.append(id); } var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator); var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator); var next_processing_queue = &queues[1]; for (input) |character| { for (processing_queue.items) |base| { try forward_queue.append(.{ base.state, base }); while (forward_queue.popOrNull()) |checkpoint| { const last_state, const last_node = checkpoint; const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index]; if (last_state.inner_position == rule.items.len) { for (last_node.parents.items) |parent| { try forward_queue.append(.{parent.state.next(), parent}); } continue; } switch (rule.items[last_state.inner_position]) { .terminal => |t| if (t == character) { const node = try graph.clone(last_node, last_state.next()); try next_processing_queue.append(node); }, .non_terminal => |n| { const non_terminal = grammar.non_terminal_by_id(n); for (grammar.non_terminal_by_id(n).rules(), 0..) |child_rule, rule_index| { if ( child_rule.first.is_set(character) or (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character)) ) { const state = State { .id = n, .rule_index = rule_index, .inner_position = 0, }; if (node_cache.get(state)) |parent| { try parent.parents.append(last_node); } else { const parent = try graph.clone(last_node, last_state); const next = try graph.push(parent, state); try node_cache.put(state, next); try forward_queue.append(.{state, next}); } } } }, .epsilon => { try forward_queue.append(.{last_state.next(), last_node}); }, } } node_cache.clearRetainingCapacity(); forward_queue.clearRetainingCapacity(); } const swap = processing_queue; processing_queue = next_processing_queue; next_processing_queue = swap; next_processing_queue.clearRetainingCapacity(); } for (processing_queue.items) |node| { if (reaches_end_of_entry(grammar, node)) { return true; } } return false; } test "expr" { const text = \\S -> B A \\A -> '+' B A \\A -> '' \\B -> D C \\C -> '*' D C \\C -> '' \\D -> '(' S ')' \\D -> 'a' \\D -> 'b' ; const input = "b+a*b"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - success" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const input = "aad"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - fail" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const input = "accd"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(!try check(&grammar, input, allocator)); } test "simple 0 - fuzzy" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); var generator = Generator(struct { const Self = @This(); pub fn next(_: *Self, n: usize) usize { return std.crypto.random.uintLessThan(usize, n); } }){}; for (0..100) |_| { const input = try generator.sentential_from_grammar(&grammar, 1000, allocator); defer allocator.free(input); try std.testing.expect(try check(&grammar, input, allocator)); } }