diff options
| author | Nathan Reiner <nathan@nathanreiner.xyz> | 2025-05-13 15:45:50 +0200 |
|---|---|---|
| committer | Nathan Reiner <nathan@nathanreiner.xyz> | 2025-05-13 15:45:50 +0200 |
| commit | 76f808ea98313dc847f634b2d8e31066b7a1c68d (patch) | |
| tree | ceead1140f66547bb26cd3af364d37913b380398 /src | |
| parent | eff19cc15a9bf4df60e7f90c3a7ee525c65266c0 (diff) | |
make it work sometimes
Diffstat (limited to 'src')
| -rw-r--r-- | src/grammar.zig | 45 | ||||
| -rw-r--r-- | src/gss.zig | 76 | ||||
| -rw-r--r-- | src/recognizer.zig | 228 |
3 files changed, 245 insertions, 104 deletions
diff --git a/src/grammar.zig b/src/grammar.zig index 298932c..b3785ba 100644 --- a/src/grammar.zig +++ b/src/grammar.zig @@ -51,6 +51,12 @@ pub fn parse(entry: []const u8, buffer: []const u8, allocator: std.mem.Allocator self.generate_first(); self.generate_follows(); + for (self.non_terminals) |non_terminal| { + std.debug.print("{s} := {}\n", .{non_terminal.name, non_terminal.follows}); + } + + std.debug.print("{}\n", .{self}); + return self; } @@ -181,6 +187,45 @@ fn generate_follows(self: *Self) void { } } +pub fn is_next_char(self: *const Self, rule_slice: []Character, current: usize, char: u8) bool { + var is_char_in_first = false; + var is_epsilon_in_first = true; + + for (rule_slice) |character| { + switch(character) { + .terminal => |c| { + is_char_in_first = c == char; + break; + }, + .non_terminal => |n| { + is_char_in_first = self.non_terminals[n].first.is_set(char); + + if (is_char_in_first or !self.non_terminals[n].first.is_set(Character.EPSILON)) { + break; + } + }, + else => {}, + } + } + + for (rule_slice) |character| { + switch(character) { + .terminal => is_epsilon_in_first = false, + .non_terminal => |n| { + if (!self.non_terminals[n].first.is_set(Character.EPSILON)) { + is_epsilon_in_first = false; + break; + } + }, + else => {}, + } + } + + const result = is_char_in_first or (is_epsilon_in_first and self.non_terminals[current].follows.is_set(char)); + + return result; +} + pub fn format( self: *const Self, comptime fmt: []const u8, diff --git a/src/gss.zig b/src/gss.zig index 8f81d80..ef5d221 100644 --- a/src/gss.zig +++ b/src/gss.zig @@ -4,34 +4,39 @@ pub fn Node(T: type) type { return struct { const Self = @This(); - parent: ?*Self = null, + parents: *std.ArrayList(*Self), state: T, + is_toplevel: bool = false, - pub fn init(state: T) Self { - return Self { .state = state }; - } + pub fn init(state: T, allocator: std.mem.Allocator) !Self { + const parents = try allocator.create(std.ArrayList(*Self)); + parents.* = std.ArrayList(*Self).init(allocator); - pub fn push(self: *Self, state: T, allocator: std.mem.Allocator) !*Self { - const node = try allocator.create(Self); - node.parent = self; - node.state = state; - return node; + return Self { + .state = state, + .parents = parents, + }; } - pub fn clone(self: *Self, state: T, allocator: std.mem.Allocator) !*Self { - const node = try allocator.create(Self); - node.parent = self.parent; - node.state = state; - return node; + pub fn clone(self: *Self, state: T) !Self { + return Self { + .parents = self.parents, + .state = state, + .is_toplevel = self.is_toplevel, + }; } - pub fn pop(self: *Self, allocator: std.mem.Allocator) struct { T, ?*Self } { - const parent = self.parent; - const state = self.state; + pub fn push(self: *Self, state: T) !Self { + const parents = try self.parents.allocator.create(std.ArrayList(*Self)); + parents.* = std.ArrayList(*Self).init(self.parents.allocator); + var other = Self { + .parents = parents, + .state = state, + }; - allocator.destroy(self); + try other.parents.append(self); - return .{ state, parent }; + return other; } pub fn format( @@ -43,7 +48,38 @@ pub fn Node(T: type) type { _ = fmt; _ = options; - try writer.print("Node {{ {} }}", .{ self.state }); + try writer.print("Node {{ {} }}", .{self.state}); + } + }; +} + +pub fn Graph(T: type) type { + return struct { + const Self = @This(); + + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) Self { + return Self { .allocator = allocator }; + } + + pub fn push(self: *Self, node: *Node(T), state: T) !*Node(T) { + const child = try self.allocator.create(Node(T)); + child.* = try node.push(state); + return child; + } + + pub fn add_toplevel(self: *Self, state: T) !*Node(T) { + const node = try self.allocator.create(Node(T)); + node.* = try Node(T).init(state, self.allocator); + node.is_toplevel = true; + return node; + } + + pub fn clone(self: *Self, node: *Node(T), state: T) !*Node(T) { + const sibling = try self.allocator.create(Node(T)); + sibling.* = try node.clone(state); + return sibling; } }; } diff --git a/src/recognizer.zig b/src/recognizer.zig index 25a7944..4a266a2 100644 --- a/src/recognizer.zig +++ b/src/recognizer.zig @@ -8,10 +8,31 @@ const gss = @import("gss.zig"); const State = struct { const Self = @This(); + pub const Context = struct { + pub fn hash(_: @This(), s: Self) u64 { + return std.hash.Wyhash.hash(0, std.mem.asBytes(&s)); + } + + pub fn eql(_: @This(), a: Self, b: Self) bool { + return a.id == b.id + and a.rule_index == b.rule_index + and a.inner_position == b.inner_position; + } + }; + id: usize, rule_index: usize, inner_position: usize, - input_position: usize, + + pub inline fn next(self: *const Self) Self { + var other = self.*; + other .inner_position += 1; + return other; + } + + pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool { + return self.inner_position == rule.len; + } pub fn format( self: *const Self, @@ -22,20 +43,16 @@ const State = struct { _ = fmt; _ = options; - try writer.print("[ {}, {}, {}, {} ]", .{ + try writer.print("[ {}, {}, {} ]", .{ self.id, self.rule_index, self.inner_position, - self.input_position, }); } - pub fn debug(self: *const Self, grammar: *Grammar, input: []const u8) void { + pub fn debug(self: *const Self, grammar: *Grammar) void { const rule = grammar.non_terminal_by_id(self.id).rules()[self.rule_index]; - std.debug.print("input ({s}○{s}) state (", .{ - input[0..self.input_position], - input[self.input_position..], - }); + std.debug.print("{*} state (", .{ self }); for (rule, 0..) |char, index| { if (index == self.inner_position) { @@ -52,6 +69,93 @@ const State = struct { } }; +// NOTE: Since I can't remember shit: +// +// if (grammar.is_next_char( +// non_terminal.rules()[node.state.rule_index][node.state.inner_position..], +// node.state.id, +// input[node.state.input_position], +// )) + +pub fn forward_and_consume( + grammar: *Grammar, + node: *gss.Node(State), + graph: *gss.Graph(State), + terminal: u8, + queue: *std.ArrayList(*gss.Node(State)), + node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80), +) !void { + const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; + + if (node.state.inner_position == rule.len) { + for (node.parents.items) |parent| { + const next = try graph.clone(parent, parent.state.next()); + try forward_and_consume(grammar, next, graph, terminal, queue, node_cache); + } + + return; + } + + switch (rule[node.state.inner_position]) { + .terminal => |t| if (t == terminal) try queue.append(try graph.clone(node, node.state.next())), + .non_terminal => |n| { + const non_terminal = grammar.non_terminal_by_id(n); + if (grammar.is_next_char( + non_terminal.rules()[node.state.rule_index][node.state.inner_position..], + node.state.id, + terminal, + )) { + for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| { + const state = State { + .id = n, + .rule_index = rule_index, + .inner_position = 0, + }; + + const result = try node_cache.getOrPut(state); + + if (!result.found_existing) { + result.value_ptr.* = try graph.push(node, state); + try forward_and_consume(grammar, result.value_ptr.*, graph, terminal, queue, node_cache); + } else { + std.debug.print("HERE {}{*} <- {}{*}\n", .{node, node, result.value_ptr.*, result.value_ptr.*}); + try result.value_ptr.*.parents.append(node); + } + } + } + }, + .epsilon => { + const next = try graph.clone(node, node.state.next()); + try forward_and_consume(grammar, next, graph, terminal, queue, node_cache); + }, + } +} + +pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { + const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; + if (node.state.is_at_end_of_rule(rule)) { + for (node.parents.items) |parent| { + if (reaches_end_of_entry(grammar, parent)) { + return true; + } + } + + std.debug.print("HERE {}\n", .{ node.is_toplevel }); + + return node.is_toplevel; + } + + for (rule[node.state.inner_position..]) |character| { + switch (character) { + .terminal => return false, + .non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END), + else => {} + } + } + + return grammar.non_terminal_by_id(node.state.id).follows.is_set(Character.END); +} + pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allocator) !bool { var arena = std.heap.ArenaAllocator.init(inner_allocator); defer arena.deinit(); @@ -65,94 +169,40 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo defer for (queues) |queue| queue.deinit(); var processing_queue = &queues[0]; + var graph = gss.Graph(State).init(allocator); + for (0..entry.rules().len) |index| { - const node = try allocator.create(gss.Node(State)); - node.* = gss.Node(State).init(State { + const id = try graph.add_toplevel(State { .id = entry.id, .rule_index = index, .inner_position = 0, - .input_position = 0, }); - try processing_queue.append(node); + try processing_queue.append(id); } var next_processing_queue = &queues[1]; - while (processing_queue.items.len > 0) { - for (processing_queue.items) |node| { - const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; - - if (node.state.inner_position == rule.len) { - if ( - node.parent == null and - node.state.id == entry.id and - node.state.input_position == input.len - ) { - return true; - } - - const old_state, const parent_node = node.pop(allocator); - if (parent_node) |parent| { - const sibling = try parent.clone(State { - .id = parent.state.id, - .rule_index = parent.state.rule_index, - .inner_position = parent.state.inner_position + 1, - .input_position = old_state.input_position, - }, allocator); - try next_processing_queue.append(sibling); - } - } else { - const epsilon_only = node.state.input_position == input.len; - - switch (rule[node.state.inner_position]) { - .terminal => |t| { - if (!epsilon_only and input[node.state.input_position] == t) { - const sibling = try node.clone(State { - .id = node.state.id, - .rule_index = node.state.rule_index, - .inner_position = node.state.inner_position + 1, - .input_position = node.state.input_position + 1, - }, allocator); - try next_processing_queue.append(sibling); - } - }, - .non_terminal => |n| { - const non_terminal = grammar.non_terminal_by_id(n); + var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator); + defer node_cache.deinit(); - if (!epsilon_only and non_terminal.first.is_set(input[node.state.input_position])) { - for (0..non_terminal.rules().len) |index| { - const child = try node.push(State { - .id = non_terminal.id, - .rule_index = index, - .inner_position = 0, - .input_position = node.state.input_position, - }, allocator); - try next_processing_queue.append(child); - } - } + for (input) |character| { + std.debug.print("=============\n", .{}); + for (processing_queue.items) |node| { + node.state.debug(grammar); + std.debug.print("PARENT:\n", .{}); + for (node.parents.items) |parent| { parent.state.debug(grammar); } + std.debug.print("-------------\n", .{}); + try forward_and_consume( + grammar, + node, + &graph, + character, + next_processing_queue, + &node_cache, + ); - if (non_terminal.first.is_set(Character.EPSILON)) { - const sibling = try node.clone(State { - .id = node.state.id, - .rule_index = node.state.rule_index, - .inner_position = node.state.inner_position + 1, - .input_position = node.state.input_position - }, allocator); - try next_processing_queue.append(sibling); - } - }, - .epsilon => { - const sibling = try node.clone(State { - .id = node.state.id, - .rule_index = node.state.rule_index, - .inner_position = node.state.inner_position + 1, - .input_position = node.state.input_position - }, allocator); - try next_processing_queue.append(sibling); - }, - } - } + node_cache.clearRetainingCapacity(); } const swap = processing_queue; @@ -161,6 +211,16 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo next_processing_queue.clearRetainingCapacity(); } + std.debug.print("==== END ====\n", .{}); + + for (processing_queue.items) |node| { + node.state.debug(grammar); + std.debug.print("parent = {any}\n", .{node.parents.items}); + if (reaches_end_of_entry(grammar, node)) { + return true; + } + } + return false; } |