const std = @import("std"); const Grammar = @import("grammar.zig"); const NonTerminal = @import("non-terminal.zig"); const Character = @import("character.zig").Character; const Generator = @import("generator.zig").Generator; const gss = @import("gss.zig"); const State = struct { const Self = @This(); pub const Context = struct { pub fn hash(_: @This(), s: Self) u64 { return std.hash.Wyhash.hash(0, std.mem.asBytes(&s)); } pub fn eql(_: @This(), a: Self, b: Self) bool { return a.id == b.id and a.rule_index == b.rule_index and a.inner_position == b.inner_position; } }; id: usize, rule_index: usize, inner_position: usize, pub inline fn next(self: *const Self) Self { var other = self.*; other .inner_position += 1; return other; } pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool { return self.inner_position == rule.len; } pub fn format( self: *const Self, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype, ) !void { _ = fmt; _ = options; try writer.print("[ {}, {}, {} ]", .{ self.id, self.rule_index, self.inner_position, }); } pub fn debug(self: *const Self, grammar: *Grammar) void { const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index]; std.debug.print("{*} state (", .{ self }); for (rule, 0..) |char, index| { if (index == self.inner_position) { std.debug.print("○", .{}); } std.debug.print("{}", .{char}); } if (rule.len == self.inner_position) { std.debug.print("○", .{}); } std.debug.print(")\n", .{}); } }; // NOTE: Since I can't remember shit: // // if (grammar.is_next_char( // non_terminal.rules()[node.state.rule_index][node.state.inner_position..], // node.state.id, // input[node.state.input_position], // )) pub fn forward_and_consume( grammar: *Grammar, node: *gss.Node(State), graph: *gss.Graph(State), terminal: u8, queue: *std.ArrayList(*gss.Node(State)), node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80), ) !void { const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; if (node.state.inner_position == rule.len) { for (node.parents.items) |parent| { const next = try graph.clone(parent, parent.state.next()); try forward_and_consume(grammar, next, graph, terminal, queue, node_cache); } return; } switch (rule[node.state.inner_position]) { .terminal => |t| if (t == terminal) try queue.append(try graph.clone(node, node.state.next())), .non_terminal => |n| { const non_terminal = grammar.non_terminal_by_id(n); if (grammar.is_next_char( non_terminal.rules()[node.state.rule_index][node.state.inner_position..], node.state.id, terminal, )) { for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| { const state = State { .id = n, .rule_index = rule_index, .inner_position = 0, }; const result = try node_cache.getOrPut(state); if (!result.found_existing) { result.value_ptr.* = try graph.push(node, state); try forward_and_consume(grammar, result.value_ptr.*, graph, terminal, queue, node_cache); } else { std.debug.print("HERE {}{*} <- {}{*}\n", .{node, node, result.value_ptr.*, result.value_ptr.*}); try result.value_ptr.*.parents.append(node); } } } }, .epsilon => { const next = try graph.clone(node, node.state.next()); try forward_and_consume(grammar, next, graph, terminal, queue, node_cache); }, } } pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; if (node.state.is_at_end_of_rule(rule)) { for (node.parents.items) |parent| { if (reaches_end_of_entry(grammar, parent)) { return true; } } std.debug.print("HERE {}\n", .{ node.is_toplevel }); return node.is_toplevel; } for (rule[node.state.inner_position..]) |character| { switch (character) { .terminal => return false, .non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END), else => {} } } return grammar.non_terminal_by_id(node.state.id).follows.is_set(Character.END); } pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool { var arena = std.heap.ArenaAllocator.init(inner_allocator); defer arena.deinit(); const allocator = arena.allocator(); const entry = grammar.entry_point(); var queues = [2]std.ArrayList(*gss.Node(State)) { std.ArrayList(*gss.Node(State)).init(allocator), std.ArrayList(*gss.Node(State)).init(allocator), }; defer for (queues) |queue| queue.deinit(); var processing_queue = &queues[0]; var graph = gss.Graph(State).init(allocator); for (0..entry.rules().len) |index| { const id = try graph.add_toplevel(State { .id = entry.id, .rule_index = index, .inner_position = 0, }); try processing_queue.append(id); } var next_processing_queue = &queues[1]; var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator); defer node_cache.deinit(); for (input) |character| { std.debug.print("=============\n", .{}); for (processing_queue.items) |node| { node.state.debug(grammar); std.debug.print("PARENT:\n", .{}); for (node.parents.items) |parent| { parent.state.debug(grammar); } std.debug.print("-------------\n", .{}); try forward_and_consume( grammar, node, &graph, character, next_processing_queue, &node_cache, ); node_cache.clearRetainingCapacity(); } const swap = processing_queue; processing_queue = next_processing_queue; next_processing_queue = swap; next_processing_queue.clearRetainingCapacity(); } std.debug.print("==== END ====\n", .{}); for (processing_queue.items) |node| { node.state.debug(grammar); std.debug.print("parent = {any}\n", .{node.parents.items}); if (reaches_end_of_entry(grammar, node)) { return true; } } return false; } test "expr" { const text = \\S -> B A \\A -> '+' B A \\A -> '' \\B -> D C \\C -> '*' D C \\C -> '' \\D -> '(' S ')' \\D -> 'a' \\D -> 'b' ; const input = "b+a*b"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - success" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const input = "aad"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - fail" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const input = "accd"; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); try std.testing.expect(!try check(&grammar, input, allocator)); } test "simple 0 - fuzzy" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const allocator = std.testing.allocator; var grammar = try Grammar.parse("S", text, allocator); defer grammar.deinit(); var generator = Generator(struct { const Self = @This(); pub fn next(_: *Self, n: usize) usize { return std.crypto.random.uintLessThan(usize, n); } }){}; for (0..100) |_| { const input = try generator.sentential_from_grammar(&grammar, 1000, allocator); defer allocator.free(input); try std.testing.expect(try check(&grammar, input, allocator)); } }