diff options
| author | Nathan Reiner <nathan@nathanreiner.xyz> | 2025-07-19 20:18:15 +0200 |
|---|---|---|
| committer | Nathan Reiner <nathan@nathanreiner.xyz> | 2025-07-19 20:18:15 +0200 |
| commit | ae10b7d764d9587ab92a682781f8479107e8dff0 (patch) | |
| tree | 13e060f304ca1cac98ae1e71a2a6e27d9c5fb269 /src/recognizer.zig | |
| parent | d138a622dcc77302cc452c52946f6202b6a03f5e (diff) | |
add pex
Diffstat (limited to 'src/recognizer.zig')
| -rw-r--r-- | src/recognizer.zig | 456 |
1 files changed, 225 insertions, 231 deletions
diff --git a/src/recognizer.zig b/src/recognizer.zig index 1934750..6af7de1 100644 --- a/src/recognizer.zig +++ b/src/recognizer.zig @@ -5,295 +5,289 @@ const Rule = @import("rule.zig"); const Character = @import("character.zig").Character; const Generator = @import("generator.zig").Generator; const gss = @import("gss.zig"); +const Pex = @import("pex/root.zig"); -const State = struct { - const Self = @This(); +pub const Result = struct { + const Info = struct { + const Self = @This(); - pub const Context = struct { - pub fn hash(_: @This(), s: Self) u64 { - return std.hash.Wyhash.hash(0, std.mem.asBytes(&s)); - } + max_heap_size: usize, + number_of_nodes: usize, - pub fn eql(_: @This(), a: Self, b: Self) bool { - return a.id == b.id - and a.rule_index == b.rule_index - and a.inner_position == b.inner_position; - } - }; + pub fn format( + self: *const Self, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; - id: usize, - rule_index: usize, - inner_position: usize, + try writer.print("{:.2}, {} nodes", .{ + std.fmt.fmtIntSizeBin(self.max_heap_size), + self.number_of_nodes, + }); + } + }; - pub inline fn next(self: *const Self) Self { - var other = self.*; - other .inner_position += 1; - return other; - } - - pub inline fn is_at_end_of_rule(self: *Self, rule: Rule) bool { - return self.inner_position == rule.items.len; - } - - pub fn format( - self: *const Self, - comptime fmt: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - _ = fmt; - _ = options; - - try writer.print("[ {}, {}, {} ]", .{ - self.id, - self.rule_index, - self.inner_position, - }); - } - - pub fn debug(self: *const Self, grammar: *Grammar) void { - const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index]; - std.debug.print("{*} state (", .{ self }); - - for (rule.items, 0..) |char, index| { - if (index == self.inner_position) { - std.debug.print("○", .{}); - } - - std.debug.print("{}", .{char}); - } - - if (rule.items.len == self.inner_position) { - std.debug.print("○", .{}); - } - std.debug.print(")\n", .{}); - } + is_accepted: bool, + info: Info, }; -pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { - const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; +fn OpaqueContext(T: type) type { + return struct { + pub fn hash(_: @This(), h: T) u64 { + return @intFromPtr(h); + } - if (node.parents.items.len == 0) { - return true; - } + pub fn eql(_: @This(), a: T, b: T) bool { + return a == b; + } + }; +} - for (rule.items[node.state.inner_position..]) |character| { - switch (character) { - .terminal => return false, - .non_terminal => |n| { - if (!grammar.non_terminal_by_id(n).first.is_set(Character.EPSILON)) { - return false; - } - }, - else => {} - } - } - for (node.parents.items) |parent| { - parent.state = parent.state.next(); - if (reaches_end_of_entry(grammar, parent)) { - return true; - } - } +pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(*Pex.Node)) bool { + if (node.parents.items.len == 0) { + return true; + } - return false; -} + var pex_node = node.state; + while (pex_node.next) |next| : (pex_node = next) { + switch (pex_node.instruction) { + .check => return false, + .call => |n| { + if (!grammar.non_terminal_by_id(n).first.is_set(Character.EPSILON)) { + return false; + } + }, + else => {}, + } + } -pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool { - var arena = std.heap.ArenaAllocator.init(inner_allocator); - defer arena.deinit(); - const allocator = arena.allocator(); + for (node.parents.items) |parent| { + if (parent.state.next) |next| { + parent.state = next; - const entry = grammar.entry_point(); - var queues = [2]std.ArrayList(*gss.Node(State)) { - std.ArrayList(*gss.Node(State)).init(allocator), - std.ArrayList(*gss.Node(State)).init(allocator), - }; - defer for (queues) |queue| queue.deinit(); - var processing_queue = &queues[0]; + if (reaches_end_of_entry(grammar, parent)) { + return true; + } + } + } - var graph = gss.Graph(State).init(allocator); + return false; +} - for (0..entry.rules().len) |index| { - const id = try graph.add_toplevel(State { - .id = entry.id, - .rule_index = index, - .inner_position = 0, - }); +pub fn check( + pex: *const Pex, + input: []const u8, + inner_allocator: std.mem.Allocator +) !Result { + var arena = std.heap.ArenaAllocator.init(inner_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); - try processing_queue.append(id); - } + var queues = [2]std.ArrayList(*gss.Node(*Pex.Node)) { + std.ArrayList(*gss.Node(*Pex.Node)).init(allocator), + std.ArrayList(*gss.Node(*Pex.Node)).init(allocator), + }; + defer for (queues) |queue| queue.deinit(); + var processing_queue = &queues[0]; - var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator); - var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator); + var graph = gss.Graph(*Pex.Node).init(allocator); - var next_processing_queue = &queues[1]; + for (pex.entry_point().heads.items) |head| { + try processing_queue.append(try graph.add_toplevel(head)); + } - for (input) |character| { - for (processing_queue.items) |base| { - try forward_queue.append(.{ base.state, base }); + var node_cache = std.HashMap( + *Pex.Node, + *gss.Node(*Pex.Node), + OpaqueContext(*Pex.Node), + 80 + ).init(allocator); - while (forward_queue.pop()) |checkpoint| { - const last_state, const last_node = checkpoint; - const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index]; + var forward_queue = std.ArrayList(struct { *Pex.Node, *gss.Node(*Pex.Node) }).init(allocator); + var next_processing_queue = &queues[1]; - if (last_state.inner_position == rule.items.len) { - for (last_node.parents.items) |parent| { - try forward_queue.append(.{parent.state.next(), parent}); - } + for (input) |character| { + for (processing_queue.items) |base| { + try forward_queue.append(.{ base.state, base }); + try node_cache.put(base.state, base); - continue; - } + while (forward_queue.pop()) |checkpoint| { + const pex_node, const last_node = checkpoint; - switch (rule.items[last_state.inner_position]) { - .terminal => |t| if (t == character) { - const node = try graph.clone(last_node, last_state.next()); - try next_processing_queue.append(node); - }, - .non_terminal => |n| { - const non_terminal = grammar.non_terminal_by_id(n); - for (grammar.non_terminal_by_id(n).rules(), 0..) |child_rule, rule_index| { - if ( - child_rule.first.is_set(character) or - (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character)) - ) { + switch (pex_node.instruction) { + .check => |t| if (t == character) { + if (pex_node.next) |next| { + const node = try graph.clone(last_node, next); + try next_processing_queue.append(node); + } + }, + .@"return" => { + for (last_node.parents.items) |parent| { + if (parent.state.next) |next| { + try forward_queue.append(.{next, parent}); + } + } + }, + .call => |id| { + const non_terminal = pex.grammar.non_terminal_by_id(id); + for (non_terminal.rules(), pex.blocks[id].heads.items) |child_rule, head| { + if ( + child_rule.first.is_set(character) or + (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character)) + ) { - const state = State { - .id = n, - .rule_index = rule_index, - .inner_position = 0, - }; + if (node_cache.get(head)) |parent| { + try parent.parents.append(last_node); + } else { + const parent = try graph.clone(last_node, pex_node); + const next = try graph.push(parent, head); + try node_cache.put(head, next); + try forward_queue.append(.{head, next}); + } + } + } + }, + .jump => |jump_next| { + if (pex_node.next) |next| { + try forward_queue.append(.{next, last_node}); + } - if (node_cache.get(state)) |parent| { - try parent.parents.append(last_node); - } else { - const parent = try graph.clone(last_node, last_state); - const next = try graph.push(parent, state); - try node_cache.put(state, next); - try forward_queue.append(.{state, next}); - } - } - } - }, - .epsilon => { - try forward_queue.append(.{last_state.next(), last_node}); - }, - } - } + for (jump_next.items) |next| { + try forward_queue.append(.{next, last_node}); + } + }, + } + } - node_cache.clearRetainingCapacity(); - forward_queue.clearRetainingCapacity(); - } + node_cache.clearRetainingCapacity(); + forward_queue.clearRetainingCapacity(); + } - const swap = processing_queue; - processing_queue = next_processing_queue; - next_processing_queue = swap; - next_processing_queue.clearRetainingCapacity(); - } + const swap = processing_queue; + processing_queue = next_processing_queue; + next_processing_queue = swap; + next_processing_queue.clearRetainingCapacity(); + } - for (processing_queue.items) |node| { - if (reaches_end_of_entry(grammar, node)) { - return true; - } - } + for (processing_queue.items) |node| { + if (reaches_end_of_entry(pex.grammar, node)) { + return .{ + .is_accepted = true, + .info = .{ + .max_heap_size = arena.queryCapacity(), + .number_of_nodes = graph.number_of_nodes, + }, + }; + } + } - return false; + return .{ + .is_accepted = false, + .info = .{ + .max_heap_size = arena.queryCapacity(), + .number_of_nodes = graph.number_of_nodes, + }, + }; } test "expr" { - const text = - \\S -> B A - \\A -> '+' B A - \\A -> '' - \\B -> D C - \\C -> '*' D C - \\C -> '' - \\D -> '(' S ')' - \\D -> 'a' - \\D -> 'b' - ; + const text = + \\S -> B A + \\A -> '+' B A + \\A -> '' + \\B -> D C + \\C -> '*' D C + \\C -> '' + \\D -> '(' S ')' + \\D -> 'a' + \\D -> 'b' + ; - const input = "b+a*b"; + const input = "b+a*b"; - const allocator = std.testing.allocator; + const allocator = std.testing.allocator; - var grammar = try Grammar.parse("S", text, allocator); - defer grammar.deinit(); + var grammar = try Grammar.parse("S", text, allocator); + defer grammar.deinit(); - try std.testing.expect(try check(&grammar, input, allocator)); + try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - success" { - const text = - \\S -> A S 'd' - \\S -> B S - \\S -> '' - \\A -> 'a' - \\A -> 'c' - \\B -> 'a' - \\B -> 'b' - ; + const text = + \\S -> A S 'd' + \\S -> B S + \\S -> '' + \\A -> 'a' + \\A -> 'c' + \\B -> 'a' + \\B -> 'b' + ; - const input = "aad"; + const input = "aad"; - const allocator = std.testing.allocator; + const allocator = std.testing.allocator; - var grammar = try Grammar.parse("S", text, allocator); - defer grammar.deinit(); + var grammar = try Grammar.parse("S", text, allocator); + defer grammar.deinit(); - try std.testing.expect(try check(&grammar, input, allocator)); + try std.testing.expect(try check(&grammar, input, allocator)); } test "simple 0 - fail" { - const text = - \\S -> A S 'd' - \\S -> B S - \\S -> '' - \\A -> 'a' - \\A -> 'c' - \\B -> 'a' - \\B -> 'b' - ; + const text = + \\S -> A S 'd' + \\S -> B S + \\S -> '' + \\A -> 'a' + \\A -> 'c' + \\B -> 'a' + \\B -> 'b' + ; - const input = "accd"; + const input = "accd"; - const allocator = std.testing.allocator; + const allocator = std.testing.allocator; - var grammar = try Grammar.parse("S", text, allocator); - defer grammar.deinit(); + var grammar = try Grammar.parse("S", text, allocator); + defer grammar.deinit(); - try std.testing.expect(!try check(&grammar, input, allocator)); + try std.testing.expect(!try check(&grammar, input, allocator)); } test "simple 0 - fuzzy" { - const text = - \\S -> A S 'd' - \\S -> B S - \\S -> '' - \\A -> 'a' - \\A -> 'c' - \\B -> 'a' - \\B -> 'b' - ; + const text = + \\S -> A S 'd' + \\S -> B S + \\S -> '' + \\A -> 'a' + \\A -> 'c' + \\B -> 'a' + \\B -> 'b' + ; - const allocator = std.testing.allocator; + const allocator = std.testing.allocator; - var grammar = try Grammar.parse("S", text, allocator); - defer grammar.deinit(); + var grammar = try Grammar.parse("S", text, allocator); + defer grammar.deinit(); - var generator = Generator(struct { - const Self = @This(); + var generator = Generator(struct { + const Self = @This(); - pub fn next(_: *Self, n: usize) usize { - return std.crypto.random.uintLessThan(usize, n); - } - }){}; + pub fn next(_: *Self, n: usize) usize { + return std.crypto.random.uintLessThan(usize, n); + } + }){}; - for (0..100) |_| { - const input = try generator.sentential_from_grammar(&grammar, 1000, allocator); - defer allocator.free(input); + for (0..100) |_| { + const input = try generator.sentential_from_grammar(&grammar, 1000, allocator); + defer allocator.free(input); - try std.testing.expect(try check(&grammar, input, allocator)); - } + try std.testing.expect(try check(&grammar, input, allocator)); + } } |