aboutsummaryrefslogtreecommitdiff
path: root/src/recognizer.zig
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2025-07-19 20:18:15 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2025-07-19 20:18:15 +0200
commitae10b7d764d9587ab92a682781f8479107e8dff0 (patch)
tree13e060f304ca1cac98ae1e71a2a6e27d9c5fb269 /src/recognizer.zig
parentd138a622dcc77302cc452c52946f6202b6a03f5e (diff)
add pex
Diffstat (limited to 'src/recognizer.zig')
-rw-r--r--src/recognizer.zig456
1 files changed, 225 insertions, 231 deletions
diff --git a/src/recognizer.zig b/src/recognizer.zig
index 1934750..6af7de1 100644
--- a/src/recognizer.zig
+++ b/src/recognizer.zig
@@ -5,295 +5,289 @@ const Rule = @import("rule.zig");
const Character = @import("character.zig").Character;
const Generator = @import("generator.zig").Generator;
const gss = @import("gss.zig");
+const Pex = @import("pex/root.zig");
-const State = struct {
- const Self = @This();
+pub const Result = struct {
+ const Info = struct {
+ const Self = @This();
- pub const Context = struct {
- pub fn hash(_: @This(), s: Self) u64 {
- return std.hash.Wyhash.hash(0, std.mem.asBytes(&s));
- }
+ max_heap_size: usize,
+ number_of_nodes: usize,
- pub fn eql(_: @This(), a: Self, b: Self) bool {
- return a.id == b.id
- and a.rule_index == b.rule_index
- and a.inner_position == b.inner_position;
- }
- };
+ pub fn format(
+ self: *const Self,
+ comptime fmt: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = fmt;
+ _ = options;
- id: usize,
- rule_index: usize,
- inner_position: usize,
+ try writer.print("{:.2}, {} nodes", .{
+ std.fmt.fmtIntSizeBin(self.max_heap_size),
+ self.number_of_nodes,
+ });
+ }
+ };
- pub inline fn next(self: *const Self) Self {
- var other = self.*;
- other .inner_position += 1;
- return other;
- }
-
- pub inline fn is_at_end_of_rule(self: *Self, rule: Rule) bool {
- return self.inner_position == rule.items.len;
- }
-
- pub fn format(
- self: *const Self,
- comptime fmt: []const u8,
- options: std.fmt.FormatOptions,
- writer: anytype,
- ) !void {
- _ = fmt;
- _ = options;
-
- try writer.print("[ {}, {}, {} ]", .{
- self.id,
- self.rule_index,
- self.inner_position,
- });
- }
-
- pub fn debug(self: *const Self, grammar: *Grammar) void {
- const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index];
- std.debug.print("{*} state (", .{ self });
-
- for (rule.items, 0..) |char, index| {
- if (index == self.inner_position) {
- std.debug.print("○", .{});
- }
-
- std.debug.print("{}", .{char});
- }
-
- if (rule.items.len == self.inner_position) {
- std.debug.print("○", .{});
- }
- std.debug.print(")\n", .{});
- }
+ is_accepted: bool,
+ info: Info,
};
-pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
- const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index];
+fn OpaqueContext(T: type) type {
+ return struct {
+ pub fn hash(_: @This(), h: T) u64 {
+ return @intFromPtr(h);
+ }
- if (node.parents.items.len == 0) {
- return true;
- }
+ pub fn eql(_: @This(), a: T, b: T) bool {
+ return a == b;
+ }
+ };
+}
- for (rule.items[node.state.inner_position..]) |character| {
- switch (character) {
- .terminal => return false,
- .non_terminal => |n| {
- if (!grammar.non_terminal_by_id(n).first.is_set(Character.EPSILON)) {
- return false;
- }
- },
- else => {}
- }
- }
- for (node.parents.items) |parent| {
- parent.state = parent.state.next();
- if (reaches_end_of_entry(grammar, parent)) {
- return true;
- }
- }
+pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(*Pex.Node)) bool {
+ if (node.parents.items.len == 0) {
+ return true;
+ }
- return false;
-}
+ var pex_node = node.state;
+ while (pex_node.next) |next| : (pex_node = next) {
+ switch (pex_node.instruction) {
+ .check => return false,
+ .call => |n| {
+ if (!grammar.non_terminal_by_id(n).first.is_set(Character.EPSILON)) {
+ return false;
+ }
+ },
+ else => {},
+ }
+ }
-pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool {
- var arena = std.heap.ArenaAllocator.init(inner_allocator);
- defer arena.deinit();
- const allocator = arena.allocator();
+ for (node.parents.items) |parent| {
+ if (parent.state.next) |next| {
+ parent.state = next;
- const entry = grammar.entry_point();
- var queues = [2]std.ArrayList(*gss.Node(State)) {
- std.ArrayList(*gss.Node(State)).init(allocator),
- std.ArrayList(*gss.Node(State)).init(allocator),
- };
- defer for (queues) |queue| queue.deinit();
- var processing_queue = &queues[0];
+ if (reaches_end_of_entry(grammar, parent)) {
+ return true;
+ }
+ }
+ }
- var graph = gss.Graph(State).init(allocator);
+ return false;
+}
- for (0..entry.rules().len) |index| {
- const id = try graph.add_toplevel(State {
- .id = entry.id,
- .rule_index = index,
- .inner_position = 0,
- });
+pub fn check(
+ pex: *const Pex,
+ input: []const u8,
+ inner_allocator: std.mem.Allocator
+) !Result {
+ var arena = std.heap.ArenaAllocator.init(inner_allocator);
+ defer arena.deinit();
+ const allocator = arena.allocator();
- try processing_queue.append(id);
- }
+ var queues = [2]std.ArrayList(*gss.Node(*Pex.Node)) {
+ std.ArrayList(*gss.Node(*Pex.Node)).init(allocator),
+ std.ArrayList(*gss.Node(*Pex.Node)).init(allocator),
+ };
+ defer for (queues) |queue| queue.deinit();
+ var processing_queue = &queues[0];
- var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator);
- var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator);
+ var graph = gss.Graph(*Pex.Node).init(allocator);
- var next_processing_queue = &queues[1];
+ for (pex.entry_point().heads.items) |head| {
+ try processing_queue.append(try graph.add_toplevel(head));
+ }
- for (input) |character| {
- for (processing_queue.items) |base| {
- try forward_queue.append(.{ base.state, base });
+ var node_cache = std.HashMap(
+ *Pex.Node,
+ *gss.Node(*Pex.Node),
+ OpaqueContext(*Pex.Node),
+ 80
+ ).init(allocator);
- while (forward_queue.pop()) |checkpoint| {
- const last_state, const last_node = checkpoint;
- const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index];
+ var forward_queue = std.ArrayList(struct { *Pex.Node, *gss.Node(*Pex.Node) }).init(allocator);
+ var next_processing_queue = &queues[1];
- if (last_state.inner_position == rule.items.len) {
- for (last_node.parents.items) |parent| {
- try forward_queue.append(.{parent.state.next(), parent});
- }
+ for (input) |character| {
+ for (processing_queue.items) |base| {
+ try forward_queue.append(.{ base.state, base });
+ try node_cache.put(base.state, base);
- continue;
- }
+ while (forward_queue.pop()) |checkpoint| {
+ const pex_node, const last_node = checkpoint;
- switch (rule.items[last_state.inner_position]) {
- .terminal => |t| if (t == character) {
- const node = try graph.clone(last_node, last_state.next());
- try next_processing_queue.append(node);
- },
- .non_terminal => |n| {
- const non_terminal = grammar.non_terminal_by_id(n);
- for (grammar.non_terminal_by_id(n).rules(), 0..) |child_rule, rule_index| {
- if (
- child_rule.first.is_set(character) or
- (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character))
- ) {
+ switch (pex_node.instruction) {
+ .check => |t| if (t == character) {
+ if (pex_node.next) |next| {
+ const node = try graph.clone(last_node, next);
+ try next_processing_queue.append(node);
+ }
+ },
+ .@"return" => {
+ for (last_node.parents.items) |parent| {
+ if (parent.state.next) |next| {
+ try forward_queue.append(.{next, parent});
+ }
+ }
+ },
+ .call => |id| {
+ const non_terminal = pex.grammar.non_terminal_by_id(id);
+ for (non_terminal.rules(), pex.blocks[id].heads.items) |child_rule, head| {
+ if (
+ child_rule.first.is_set(character) or
+ (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character))
+ ) {
- const state = State {
- .id = n,
- .rule_index = rule_index,
- .inner_position = 0,
- };
+ if (node_cache.get(head)) |parent| {
+ try parent.parents.append(last_node);
+ } else {
+ const parent = try graph.clone(last_node, pex_node);
+ const next = try graph.push(parent, head);
+ try node_cache.put(head, next);
+ try forward_queue.append(.{head, next});
+ }
+ }
+ }
+ },
+ .jump => |jump_next| {
+ if (pex_node.next) |next| {
+ try forward_queue.append(.{next, last_node});
+ }
- if (node_cache.get(state)) |parent| {
- try parent.parents.append(last_node);
- } else {
- const parent = try graph.clone(last_node, last_state);
- const next = try graph.push(parent, state);
- try node_cache.put(state, next);
- try forward_queue.append(.{state, next});
- }
- }
- }
- },
- .epsilon => {
- try forward_queue.append(.{last_state.next(), last_node});
- },
- }
- }
+ for (jump_next.items) |next| {
+ try forward_queue.append(.{next, last_node});
+ }
+ },
+ }
+ }
- node_cache.clearRetainingCapacity();
- forward_queue.clearRetainingCapacity();
- }
+ node_cache.clearRetainingCapacity();
+ forward_queue.clearRetainingCapacity();
+ }
- const swap = processing_queue;
- processing_queue = next_processing_queue;
- next_processing_queue = swap;
- next_processing_queue.clearRetainingCapacity();
- }
+ const swap = processing_queue;
+ processing_queue = next_processing_queue;
+ next_processing_queue = swap;
+ next_processing_queue.clearRetainingCapacity();
+ }
- for (processing_queue.items) |node| {
- if (reaches_end_of_entry(grammar, node)) {
- return true;
- }
- }
+ for (processing_queue.items) |node| {
+ if (reaches_end_of_entry(pex.grammar, node)) {
+ return .{
+ .is_accepted = true,
+ .info = .{
+ .max_heap_size = arena.queryCapacity(),
+ .number_of_nodes = graph.number_of_nodes,
+ },
+ };
+ }
+ }
- return false;
+ return .{
+ .is_accepted = false,
+ .info = .{
+ .max_heap_size = arena.queryCapacity(),
+ .number_of_nodes = graph.number_of_nodes,
+ },
+ };
}
test "expr" {
- const text =
- \\S -> B A
- \\A -> '+' B A
- \\A -> ''
- \\B -> D C
- \\C -> '*' D C
- \\C -> ''
- \\D -> '(' S ')'
- \\D -> 'a'
- \\D -> 'b'
- ;
+ const text =
+ \\S -> B A
+ \\A -> '+' B A
+ \\A -> ''
+ \\B -> D C
+ \\C -> '*' D C
+ \\C -> ''
+ \\D -> '(' S ')'
+ \\D -> 'a'
+ \\D -> 'b'
+ ;
- const input = "b+a*b";
+ const input = "b+a*b";
- const allocator = std.testing.allocator;
+ const allocator = std.testing.allocator;
- var grammar = try Grammar.parse("S", text, allocator);
- defer grammar.deinit();
+ var grammar = try Grammar.parse("S", text, allocator);
+ defer grammar.deinit();
- try std.testing.expect(try check(&grammar, input, allocator));
+ try std.testing.expect(try check(&grammar, input, allocator));
}
test "simple 0 - success" {
- const text =
- \\S -> A S 'd'
- \\S -> B S
- \\S -> ''
- \\A -> 'a'
- \\A -> 'c'
- \\B -> 'a'
- \\B -> 'b'
- ;
+ const text =
+ \\S -> A S 'd'
+ \\S -> B S
+ \\S -> ''
+ \\A -> 'a'
+ \\A -> 'c'
+ \\B -> 'a'
+ \\B -> 'b'
+ ;
- const input = "aad";
+ const input = "aad";
- const allocator = std.testing.allocator;
+ const allocator = std.testing.allocator;
- var grammar = try Grammar.parse("S", text, allocator);
- defer grammar.deinit();
+ var grammar = try Grammar.parse("S", text, allocator);
+ defer grammar.deinit();
- try std.testing.expect(try check(&grammar, input, allocator));
+ try std.testing.expect(try check(&grammar, input, allocator));
}
test "simple 0 - fail" {
- const text =
- \\S -> A S 'd'
- \\S -> B S
- \\S -> ''
- \\A -> 'a'
- \\A -> 'c'
- \\B -> 'a'
- \\B -> 'b'
- ;
+ const text =
+ \\S -> A S 'd'
+ \\S -> B S
+ \\S -> ''
+ \\A -> 'a'
+ \\A -> 'c'
+ \\B -> 'a'
+ \\B -> 'b'
+ ;
- const input = "accd";
+ const input = "accd";
- const allocator = std.testing.allocator;
+ const allocator = std.testing.allocator;
- var grammar = try Grammar.parse("S", text, allocator);
- defer grammar.deinit();
+ var grammar = try Grammar.parse("S", text, allocator);
+ defer grammar.deinit();
- try std.testing.expect(!try check(&grammar, input, allocator));
+ try std.testing.expect(!try check(&grammar, input, allocator));
}
test "simple 0 - fuzzy" {
- const text =
- \\S -> A S 'd'
- \\S -> B S
- \\S -> ''
- \\A -> 'a'
- \\A -> 'c'
- \\B -> 'a'
- \\B -> 'b'
- ;
+ const text =
+ \\S -> A S 'd'
+ \\S -> B S
+ \\S -> ''
+ \\A -> 'a'
+ \\A -> 'c'
+ \\B -> 'a'
+ \\B -> 'b'
+ ;
- const allocator = std.testing.allocator;
+ const allocator = std.testing.allocator;
- var grammar = try Grammar.parse("S", text, allocator);
- defer grammar.deinit();
+ var grammar = try Grammar.parse("S", text, allocator);
+ defer grammar.deinit();
- var generator = Generator(struct {
- const Self = @This();
+ var generator = Generator(struct {
+ const Self = @This();
- pub fn next(_: *Self, n: usize) usize {
- return std.crypto.random.uintLessThan(usize, n);
- }
- }){};
+ pub fn next(_: *Self, n: usize) usize {
+ return std.crypto.random.uintLessThan(usize, n);
+ }
+ }){};
- for (0..100) |_| {
- const input = try generator.sentential_from_grammar(&grammar, 1000, allocator);
- defer allocator.free(input);
+ for (0..100) |_| {
+ const input = try generator.sentential_from_grammar(&grammar, 1000, allocator);
+ defer allocator.free(input);
- try std.testing.expect(try check(&grammar, input, allocator));
- }
+ try std.testing.expect(try check(&grammar, input, allocator));
+ }
}