From 0273079fa0004f9f93ae96615f97934b832f8c51 Mon Sep 17 00:00:00 2001 From: Nathan Reiner Date: Thu, 15 May 2025 17:49:24 +0200 Subject: minor optimization: add a first set to each rule variant --- src/recognizer.zig | 135 ++++++++++++++++++++++------------------------------- 1 file changed, 57 insertions(+), 78 deletions(-) (limited to 'src/recognizer.zig') diff --git a/src/recognizer.zig b/src/recognizer.zig index 2eeae0d..05db2c7 100644 --- a/src/recognizer.zig +++ b/src/recognizer.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Grammar = @import("grammar.zig"); const NonTerminal = @import("non-terminal.zig"); +const Rule = @import("rule.zig"); const Character = @import("character.zig").Character; const Generator = @import("generator.zig").Generator; const gss = @import("gss.zig"); @@ -30,8 +31,8 @@ const State = struct { return other; } - pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool { - return self.inner_position == rule.len; + pub inline fn is_at_end_of_rule(self: *Self, rule: Rule) bool { + return self.inner_position == rule.items.len; } pub fn format( @@ -69,68 +70,6 @@ const State = struct { } }; -pub fn forward_and_consume( - grammar: *Grammar, - base: *gss.Node(State), - graph: *gss.Graph(State), - terminal: u8, - queue: *std.ArrayList(*gss.Node(State)), - node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80), - allocator: std.mem.Allocator, -) !void { - var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator); - try forward_queue.append(.{ base.state, base }); - - while (forward_queue.popOrNull()) |checkpoint| { - const last_state, const last_node = checkpoint; - const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index]; - - if (last_state.inner_position == rule.len) { - for (last_node.parents.items) |parent| { - try forward_queue.append(.{parent.state.next(), parent}); - } - - continue; - } - - switch (rule[last_state.inner_position]) { - .terminal => |t| if (t == terminal) { - const node = try graph.clone(last_node, last_state.next()); - try queue.append(node); - }, - .non_terminal => |n| { - const non_terminal = grammar.non_terminal_by_id(n); - for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| { - if (grammar.is_next_char( - non_terminal.rules()[rule_index], - n, - terminal, - )) { - - const state = State { - .id = n, - .rule_index = rule_index, - .inner_position = 0, - }; - - if (node_cache.get(state)) |parent| { - try parent.parents.append(last_node); - } else { - const parent = try graph.clone(last_node, last_state); - const next = try graph.push(parent, state); - try node_cache.put(state, next); - try forward_queue.append(.{state, next}); - } - } - } - }, - .epsilon => { - try forward_queue.append(.{last_state.next(), last_node}); - }, - } - } -} - pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; if (node.state.is_at_end_of_rule(rule)) { @@ -143,7 +82,7 @@ pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { return node.is_toplevel; } - for (rule[node.state.inner_position..]) |character| { + for (rule.items[node.state.inner_position..]) |character| { switch (character) { .terminal => return false, .non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END), @@ -179,24 +118,64 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo try processing_queue.append(id); } - var next_processing_queue = &queues[1]; - var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator); - defer node_cache.deinit(); + var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator); + + var next_processing_queue = &queues[1]; for (input) |character| { - for (processing_queue.items) |node| { - try forward_and_consume( - grammar, - node, - &graph, - character, - next_processing_queue, - &node_cache, - allocator, - ); + for (processing_queue.items) |base| { + try forward_queue.append(.{ base.state, base }); + + while (forward_queue.popOrNull()) |checkpoint| { + const last_state, const last_node = checkpoint; + const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index]; + + if (last_state.inner_position == rule.items.len) { + for (last_node.parents.items) |parent| { + try forward_queue.append(.{parent.state.next(), parent}); + } + + continue; + } + + switch (rule.items[last_state.inner_position]) { + .terminal => |t| if (t == character) { + const node = try graph.clone(last_node, last_state.next()); + try next_processing_queue.append(node); + }, + .non_terminal => |n| { + const non_terminal = grammar.non_terminal_by_id(n); + for (grammar.non_terminal_by_id(n).rules(), 0..) |child_rule, rule_index| { + if (child_rule.first.is_set(character)) { + + const state = State { + .id = n, + .rule_index = rule_index, + .inner_position = 0, + }; + + if (node_cache.get(state)) |parent| { + try parent.parents.append(last_node); + } else { + const parent = try graph.clone(last_node, last_state); + const next = try graph.push(parent, state); + try node_cache.put(state, next); + try forward_queue.append(.{state, next}); + } + } else if (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character)) { + @panic("not implemented"); + } + } + }, + .epsilon => { + try forward_queue.append(.{last_state.next(), last_node}); + }, + } + } node_cache.clearRetainingCapacity(); + forward_queue.clearRetainingCapacity(); } const swap = processing_queue; -- cgit v1.2.3-70-g09d2