minor optimization: add a first set to each rule variant

author: Nathan Reiner <nathan@nathanreiner.xyz> 2025-05-15 17:49:24 +0200
committer: Nathan Reiner <nathan@nathanreiner.xyz> 2025-05-15 17:49:24 +0200
commit: 0273079fa0004f9f93ae96615f97934b832f8c51 (patch)
tree: 07a2d68d856efc24e80d3c0b77dbc1dbb54d9d14 /src/recognizer.zig
parent: f67e7524859f0b5462065a23c4334c97710f4f84 (diff)
1 files changed, 57 insertions, 78 deletions
diff --git a/src/recognizer.zig b/src/recognizer.zig
index 2eeae0d..05db2c7 100644
--- a/src/recognizer.zig
+++ b/src/recognizer.zig
@@ -1,6 +1,7 @@
 const std = @import("std");
 const Grammar = @import("grammar.zig");
 const NonTerminal = @import("non-terminal.zig");
+const Rule = @import("rule.zig");
 const Character = @import("character.zig").Character;
 const Generator = @import("generator.zig").Generator;
 const gss = @import("gss.zig");
@@ -30,8 +31,8 @@ const State = struct {
 		return other;
 	}
 
-	pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool {
-		return self.inner_position == rule.len;
+	pub inline fn is_at_end_of_rule(self: *Self, rule: Rule) bool {
+		return self.inner_position == rule.items.len;
 	}
 
 	pub fn format(
@@ -69,68 +70,6 @@ const State = struct {
 	}
 };
 
-pub fn forward_and_consume(
-	grammar: *Grammar,
-	base: *gss.Node(State),
-	graph: *gss.Graph(State),
-	terminal: u8,
-	queue: *std.ArrayList(*gss.Node(State)),
-	node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80),
-	allocator: std.mem.Allocator,
-) !void {
-	var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator);
-	try forward_queue.append(.{ base.state, base });
-
-	while (forward_queue.popOrNull()) |checkpoint| {
-		const last_state, const last_node = checkpoint;
-		const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index];
-
-		if (last_state.inner_position == rule.len) {
-			for (last_node.parents.items) |parent| {
-				try forward_queue.append(.{parent.state.next(), parent});
-			}
-
-			continue;
-		}
-
-		switch (rule[last_state.inner_position]) {
-			.terminal => |t| if (t == terminal) {
-				const node = try graph.clone(last_node, last_state.next());
-				try queue.append(node);
-			},
-			.non_terminal => |n| {
-				const non_terminal = grammar.non_terminal_by_id(n);
-				for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| {
-					if (grammar.is_next_char(
-							non_terminal.rules()[rule_index],
-							n,
-							terminal,
-					)) {
-
-						const state = State {
-							.id = n,
-							.rule_index = rule_index,
-							.inner_position = 0,
-						};
-
-						if (node_cache.get(state)) |parent| {
-							try parent.parents.append(last_node);
-						} else {
-							const parent = try graph.clone(last_node, last_state);
-							const next = try graph.push(parent, state);
-							try node_cache.put(state, next);
-							try forward_queue.append(.{state, next});
-						}
-					}
-				}
-			},
-			.epsilon => {
-				try forward_queue.append(.{last_state.next(), last_node});
-			},
-		}
-	}
-}
-
 pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
 	const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index];
 	if (node.state.is_at_end_of_rule(rule)) {
@@ -143,7 +82,7 @@ pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
 		return node.is_toplevel;
 	}
 
-	for (rule[node.state.inner_position..]) |character| {
+	for (rule.items[node.state.inner_position..]) |character| {
 		switch (character) {
 			.terminal => return false,
 			.non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END),
@@ -179,24 +118,64 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo
 		try processing_queue.append(id);
 	}
 
-	var next_processing_queue = &queues[1];
-
 	var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator);
-	defer node_cache.deinit();
+	var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator);
+
+	var next_processing_queue = &queues[1];
 
 	for (input) |character| {
-		for (processing_queue.items) |node| {
-			try forward_and_consume(
-				grammar,
-				node,
-				&graph,
-				character,
-				next_processing_queue,
-				&node_cache,
-				allocator,
-			);
+		for (processing_queue.items) |base| {
+			try forward_queue.append(.{ base.state, base });
+
+			while (forward_queue.popOrNull()) |checkpoint| {
+				const last_state, const last_node = checkpoint;
+				const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index];
+
+				if (last_state.inner_position == rule.items.len) {
+					for (last_node.parents.items) |parent| {
+						try forward_queue.append(.{parent.state.next(), parent});
+					}
+
+					continue;
+				}
+
+				switch (rule.items[last_state.inner_position]) {
+					.terminal => |t| if (t == character) {
+						const node = try graph.clone(last_node, last_state.next());
+						try next_processing_queue.append(node);
+					},
+					.non_terminal => |n| {
+						const non_terminal = grammar.non_terminal_by_id(n);
+						for (grammar.non_terminal_by_id(n).rules(), 0..) |child_rule, rule_index| {
+							if (child_rule.first.is_set(character)) {
+
+								const state = State {
+									.id = n,
+									.rule_index = rule_index,
+									.inner_position = 0,
+								};
+
+								if (node_cache.get(state)) |parent| {
+									try parent.parents.append(last_node);
+								} else {
+									const parent = try graph.clone(last_node, last_state);
+									const next = try graph.push(parent, state);
+									try node_cache.put(state, next);
+									try forward_queue.append(.{state, next});
+								}
+							} else if (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character)) {
+								@panic("not implemented");
+							}
+						}
+					},
+					.epsilon => {
+						try forward_queue.append(.{last_state.next(), last_node});
+					},
+				}
+			}
 
 			node_cache.clearRetainingCapacity();
+			forward_queue.clearRetainingCapacity();
 		}
 
 		const swap = processing_queue;
author	Nathan Reiner <nathan@nathanreiner.xyz>	2025-05-15 17:49:24 +0200
committer	Nathan Reiner <nathan@nathanreiner.xyz>	2025-05-15 17:49:24 +0200
commit	0273079fa0004f9f93ae96615f97934b832f8c51 (patch)
tree	07a2d68d856efc24e80d3c0b77dbc1dbb54d9d14 /src/recognizer.zig
parent	f67e7524859f0b5462065a23c4334c97710f4f84 (diff)