aboutsummaryrefslogtreecommitdiff
path: root/src/recognizer.zig
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2025-05-15 17:49:24 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2025-05-15 17:49:24 +0200
commit0273079fa0004f9f93ae96615f97934b832f8c51 (patch)
tree07a2d68d856efc24e80d3c0b77dbc1dbb54d9d14 /src/recognizer.zig
parentf67e7524859f0b5462065a23c4334c97710f4f84 (diff)
minor optimization: add a first set to each rule variant
Diffstat (limited to 'src/recognizer.zig')
-rw-r--r--src/recognizer.zig135
1 files changed, 57 insertions, 78 deletions
diff --git a/src/recognizer.zig b/src/recognizer.zig
index 2eeae0d..05db2c7 100644
--- a/src/recognizer.zig
+++ b/src/recognizer.zig
@@ -1,6 +1,7 @@
const std = @import("std");
const Grammar = @import("grammar.zig");
const NonTerminal = @import("non-terminal.zig");
+const Rule = @import("rule.zig");
const Character = @import("character.zig").Character;
const Generator = @import("generator.zig").Generator;
const gss = @import("gss.zig");
@@ -30,8 +31,8 @@ const State = struct {
return other;
}
- pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool {
- return self.inner_position == rule.len;
+ pub inline fn is_at_end_of_rule(self: *Self, rule: Rule) bool {
+ return self.inner_position == rule.items.len;
}
pub fn format(
@@ -69,68 +70,6 @@ const State = struct {
}
};
-pub fn forward_and_consume(
- grammar: *Grammar,
- base: *gss.Node(State),
- graph: *gss.Graph(State),
- terminal: u8,
- queue: *std.ArrayList(*gss.Node(State)),
- node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80),
- allocator: std.mem.Allocator,
-) !void {
- var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator);
- try forward_queue.append(.{ base.state, base });
-
- while (forward_queue.popOrNull()) |checkpoint| {
- const last_state, const last_node = checkpoint;
- const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index];
-
- if (last_state.inner_position == rule.len) {
- for (last_node.parents.items) |parent| {
- try forward_queue.append(.{parent.state.next(), parent});
- }
-
- continue;
- }
-
- switch (rule[last_state.inner_position]) {
- .terminal => |t| if (t == terminal) {
- const node = try graph.clone(last_node, last_state.next());
- try queue.append(node);
- },
- .non_terminal => |n| {
- const non_terminal = grammar.non_terminal_by_id(n);
- for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| {
- if (grammar.is_next_char(
- non_terminal.rules()[rule_index],
- n,
- terminal,
- )) {
-
- const state = State {
- .id = n,
- .rule_index = rule_index,
- .inner_position = 0,
- };
-
- if (node_cache.get(state)) |parent| {
- try parent.parents.append(last_node);
- } else {
- const parent = try graph.clone(last_node, last_state);
- const next = try graph.push(parent, state);
- try node_cache.put(state, next);
- try forward_queue.append(.{state, next});
- }
- }
- }
- },
- .epsilon => {
- try forward_queue.append(.{last_state.next(), last_node});
- },
- }
- }
-}
-
pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index];
if (node.state.is_at_end_of_rule(rule)) {
@@ -143,7 +82,7 @@ pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
return node.is_toplevel;
}
- for (rule[node.state.inner_position..]) |character| {
+ for (rule.items[node.state.inner_position..]) |character| {
switch (character) {
.terminal => return false,
.non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END),
@@ -179,24 +118,64 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo
try processing_queue.append(id);
}
- var next_processing_queue = &queues[1];
-
var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator);
- defer node_cache.deinit();
+ var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator);
+
+ var next_processing_queue = &queues[1];
for (input) |character| {
- for (processing_queue.items) |node| {
- try forward_and_consume(
- grammar,
- node,
- &graph,
- character,
- next_processing_queue,
- &node_cache,
- allocator,
- );
+ for (processing_queue.items) |base| {
+ try forward_queue.append(.{ base.state, base });
+
+ while (forward_queue.popOrNull()) |checkpoint| {
+ const last_state, const last_node = checkpoint;
+ const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index];
+
+ if (last_state.inner_position == rule.items.len) {
+ for (last_node.parents.items) |parent| {
+ try forward_queue.append(.{parent.state.next(), parent});
+ }
+
+ continue;
+ }
+
+ switch (rule.items[last_state.inner_position]) {
+ .terminal => |t| if (t == character) {
+ const node = try graph.clone(last_node, last_state.next());
+ try next_processing_queue.append(node);
+ },
+ .non_terminal => |n| {
+ const non_terminal = grammar.non_terminal_by_id(n);
+ for (grammar.non_terminal_by_id(n).rules(), 0..) |child_rule, rule_index| {
+ if (child_rule.first.is_set(character)) {
+
+ const state = State {
+ .id = n,
+ .rule_index = rule_index,
+ .inner_position = 0,
+ };
+
+ if (node_cache.get(state)) |parent| {
+ try parent.parents.append(last_node);
+ } else {
+ const parent = try graph.clone(last_node, last_state);
+ const next = try graph.push(parent, state);
+ try node_cache.put(state, next);
+ try forward_queue.append(.{state, next});
+ }
+ } else if (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character)) {
+ @panic("not implemented");
+ }
+ }
+ },
+ .epsilon => {
+ try forward_queue.append(.{last_state.next(), last_node});
+ },
+ }
+ }
node_cache.clearRetainingCapacity();
+ forward_queue.clearRetainingCapacity();
}
const swap = processing_queue;