aboutsummaryrefslogtreecommitdiff
path: root/src/recognizer.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/recognizer.zig')
-rw-r--r--src/recognizer.zig228
1 files changed, 144 insertions, 84 deletions
diff --git a/src/recognizer.zig b/src/recognizer.zig
index 25a7944..4a266a2 100644
--- a/src/recognizer.zig
+++ b/src/recognizer.zig
@@ -8,10 +8,31 @@ const gss = @import("gss.zig");
const State = struct {
const Self = @This();
+ pub const Context = struct {
+ pub fn hash(_: @This(), s: Self) u64 {
+ return std.hash.Wyhash.hash(0, std.mem.asBytes(&s));
+ }
+
+ pub fn eql(_: @This(), a: Self, b: Self) bool {
+ return a.id == b.id
+ and a.rule_index == b.rule_index
+ and a.inner_position == b.inner_position;
+ }
+ };
+
id: usize,
rule_index: usize,
inner_position: usize,
- input_position: usize,
+
+ pub inline fn next(self: *const Self) Self {
+ var other = self.*;
+ other .inner_position += 1;
+ return other;
+ }
+
+ pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool {
+ return self.inner_position == rule.len;
+ }
pub fn format(
self: *const Self,
@@ -22,20 +43,16 @@ const State = struct {
_ = fmt;
_ = options;
- try writer.print("[ {}, {}, {}, {} ]", .{
+ try writer.print("[ {}, {}, {} ]", .{
self.id,
self.rule_index,
self.inner_position,
- self.input_position,
});
}
- pub fn debug(self: *const Self, grammar: *Grammar, input: []const u8) void {
+ pub fn debug(self: *const Self, grammar: *Grammar) void {
const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index];
- std.debug.print("input ({s}○{s}) state (", .{
- input[0..self.input_position],
- input[self.input_position..],
- });
+ std.debug.print("{*} state (", .{ self });
for (rule, 0..) |char, index| {
if (index == self.inner_position) {
@@ -52,6 +69,93 @@ const State = struct {
}
};
+// NOTE: Since I can't remember shit:
+//
+// if (grammar.is_next_char(
+// non_terminal.rules()[node.state.rule_index][node.state.inner_position..],
+// node.state.id,
+// input[node.state.input_position],
+// ))
+
+pub fn forward_and_consume(
+ grammar: *Grammar,
+ node: *gss.Node(State),
+ graph: *gss.Graph(State),
+ terminal: u8,
+ queue: *std.ArrayList(*gss.Node(State)),
+ node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80),
+) !void {
+ const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index];
+
+ if (node.state.inner_position == rule.len) {
+ for (node.parents.items) |parent| {
+ const next = try graph.clone(parent, parent.state.next());
+ try forward_and_consume(grammar, next, graph, terminal, queue, node_cache);
+ }
+
+ return;
+ }
+
+ switch (rule[node.state.inner_position]) {
+ .terminal => |t| if (t == terminal) try queue.append(try graph.clone(node, node.state.next())),
+ .non_terminal => |n| {
+ const non_terminal = grammar.non_terminal_by_id(n);
+ if (grammar.is_next_char(
+ non_terminal.rules()[node.state.rule_index][node.state.inner_position..],
+ node.state.id,
+ terminal,
+ )) {
+ for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| {
+ const state = State {
+ .id = n,
+ .rule_index = rule_index,
+ .inner_position = 0,
+ };
+
+ const result = try node_cache.getOrPut(state);
+
+ if (!result.found_existing) {
+ result.value_ptr.* = try graph.push(node, state);
+ try forward_and_consume(grammar, result.value_ptr.*, graph, terminal, queue, node_cache);
+ } else {
+ std.debug.print("HERE {}{*} <- {}{*}\n", .{node, node, result.value_ptr.*, result.value_ptr.*});
+ try result.value_ptr.*.parents.append(node);
+ }
+ }
+ }
+ },
+ .epsilon => {
+ const next = try graph.clone(node, node.state.next());
+ try forward_and_consume(grammar, next, graph, terminal, queue, node_cache);
+ },
+ }
+}
+
+pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
+ const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index];
+ if (node.state.is_at_end_of_rule(rule)) {
+ for (node.parents.items) |parent| {
+ if (reaches_end_of_entry(grammar, parent)) {
+ return true;
+ }
+ }
+
+ std.debug.print("HERE {}\n", .{ node.is_toplevel });
+
+ return node.is_toplevel;
+ }
+
+ for (rule[node.state.inner_position..]) |character| {
+ switch (character) {
+ .terminal => return false,
+ .non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END),
+ else => {}
+ }
+ }
+
+ return grammar.non_terminal_by_id(node.state.id).follows.is_set(Character.END);
+}
+
pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool {
var arena = std.heap.ArenaAllocator.init(inner_allocator);
defer arena.deinit();
@@ -65,94 +169,40 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo
defer for (queues) |queue| queue.deinit();
var processing_queue = &queues[0];
+ var graph = gss.Graph(State).init(allocator);
+
for (0..entry.rules().len) |index| {
- const node = try allocator.create(gss.Node(State));
- node.* = gss.Node(State).init(State {
+ const id = try graph.add_toplevel(State {
.id = entry.id,
.rule_index = index,
.inner_position = 0,
- .input_position = 0,
});
- try processing_queue.append(node);
+ try processing_queue.append(id);
}
var next_processing_queue = &queues[1];
- while (processing_queue.items.len > 0) {
- for (processing_queue.items) |node| {
- const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index];
-
- if (node.state.inner_position == rule.len) {
- if (
- node.parent == null and
- node.state.id == entry.id and
- node.state.input_position == input.len
- ) {
- return true;
- }
-
- const old_state, const parent_node = node.pop(allocator);
- if (parent_node) |parent| {
- const sibling = try parent.clone(State {
- .id = parent.state.id,
- .rule_index = parent.state.rule_index,
- .inner_position = parent.state.inner_position + 1,
- .input_position = old_state.input_position,
- }, allocator);
- try next_processing_queue.append(sibling);
- }
- } else {
- const epsilon_only = node.state.input_position == input.len;
-
- switch (rule[node.state.inner_position]) {
- .terminal => |t| {
- if (!epsilon_only and input[node.state.input_position] == t) {
- const sibling = try node.clone(State {
- .id = node.state.id,
- .rule_index = node.state.rule_index,
- .inner_position = node.state.inner_position + 1,
- .input_position = node.state.input_position + 1,
- }, allocator);
- try next_processing_queue.append(sibling);
- }
- },
- .non_terminal => |n| {
- const non_terminal = grammar.non_terminal_by_id(n);
+ var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator);
+ defer node_cache.deinit();
- if (!epsilon_only and non_terminal.first.is_set(input[node.state.input_position])) {
- for (0..non_terminal.rules().len) |index| {
- const child = try node.push(State {
- .id = non_terminal.id,
- .rule_index = index,
- .inner_position = 0,
- .input_position = node.state.input_position,
- }, allocator);
- try next_processing_queue.append(child);
- }
- }
+ for (input) |character| {
+ std.debug.print("=============\n", .{});
+ for (processing_queue.items) |node| {
+ node.state.debug(grammar);
+ std.debug.print("PARENT:\n", .{});
+ for (node.parents.items) |parent| { parent.state.debug(grammar); }
+ std.debug.print("-------------\n", .{});
+ try forward_and_consume(
+ grammar,
+ node,
+ &graph,
+ character,
+ next_processing_queue,
+ &node_cache,
+ );
- if (non_terminal.first.is_set(Character.EPSILON)) {
- const sibling = try node.clone(State {
- .id = node.state.id,
- .rule_index = node.state.rule_index,
- .inner_position = node.state.inner_position + 1,
- .input_position = node.state.input_position
- }, allocator);
- try next_processing_queue.append(sibling);
- }
- },
- .epsilon => {
- const sibling = try node.clone(State {
- .id = node.state.id,
- .rule_index = node.state.rule_index,
- .inner_position = node.state.inner_position + 1,
- .input_position = node.state.input_position
- }, allocator);
- try next_processing_queue.append(sibling);
- },
- }
- }
+ node_cache.clearRetainingCapacity();
}
const swap = processing_queue;
@@ -161,6 +211,16 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo
next_processing_queue.clearRetainingCapacity();
}
+ std.debug.print("==== END ====\n", .{});
+
+ for (processing_queue.items) |node| {
+ node.state.debug(grammar);
+ std.debug.print("parent = {any}\n", .{node.parents.items});
+ if (reaches_end_of_entry(grammar, node)) {
+ return true;
+ }
+ }
+
return false;
}