diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/generator.zig | 2 | ||||
| -rw-r--r-- | src/grammar.zig | 18 | ||||
| -rw-r--r-- | src/non-terminal.zig | 12 | ||||
| -rw-r--r-- | src/recognizer.zig | 135 | ||||
| -rw-r--r-- | src/rule.zig | 5 |
5 files changed, 80 insertions, 92 deletions
diff --git a/src/generator.zig b/src/generator.zig index 0676834..e28cc55 100644 --- a/src/generator.zig +++ b/src/generator.zig @@ -53,7 +53,7 @@ pub fn Generator(T: type) type { const rule = rules[index]; const success = blk: { - for (rule) |character| { + for (rule.items) |character| { switch (character) { .terminal => |t| try string.append(t), .non_terminal => |next_n| { diff --git a/src/grammar.zig b/src/grammar.zig index ab51275..2169fe7 100644 --- a/src/grammar.zig +++ b/src/grammar.zig @@ -80,21 +80,23 @@ fn generate_first(self: *Self) void { modified = false; for (self.non_terminals) |*non_terminal| { - for (non_terminal.rules()) |rule| { - switch (rule[0]) { + for (non_terminal.rules()) |*rule| { + switch (rule.items[0]) { .terminal => |t| { modified = modified or !non_terminal.first.is_set(t); non_terminal.first.set(t); + rule.first.set(t); }, .non_terminal, .epsilon => { var insert_epsilon = true; - for (rule) |*char| { + for (rule.items) |*char| { switch (char.*) { .terminal => |t| { modified = modified or !non_terminal.first.is_set(t); non_terminal.first.set(t); + rule.first.set(t); insert_epsilon = false; break; }, @@ -105,6 +107,7 @@ fn generate_first(self: *Self) void { modified = modified or (!non_terminal.first.is_set(index) and child_first); non_terminal.first.set_if(index, child_first); + rule.first.set_if(index, child_first); } if (!first.is_set(Character.EPSILON)) { @@ -119,6 +122,7 @@ fn generate_first(self: *Self) void { if (insert_epsilon and !non_terminal.first.is_set(Character.EPSILON)) { modified = true; non_terminal.first.set(Character.EPSILON); + rule.first.set(Character.EPSILON); } }, } @@ -135,14 +139,14 @@ fn generate_follows(self: *Self) void { for (self.non_terminals) |*non_terminal| { for (non_terminal.rules()) |rule| { - for (0..rule.len) |character_index| { + for (0..rule.items.len) |character_index| { - switch (rule[character_index]) { + switch (rule.items[character_index]) { .non_terminal => |n| { const current_non_terminal = self.non_terminal_by_id(n); const ends_with_epsilon = brk: { - for (character_index + 1..rule.len) |peek_index| { - switch (rule[peek_index]) { + for (character_index + 1..rule.items.len) |peek_index| { + switch (rule.items[peek_index]) { .terminal => |t| { modified = modified or !current_non_terminal.follows.is_set(t); current_non_terminal.follows.set(t); diff --git a/src/non-terminal.zig b/src/non-terminal.zig index 1665a80..f5ecf09 100644 --- a/src/non-terminal.zig +++ b/src/non-terminal.zig @@ -2,9 +2,9 @@ const std = @import("std"); const CharSet = @import("char-set.zig"); const Character = @import("character.zig").Character; const string = @import("string.zig"); +const Rule = @import("rule.zig"); const Self = @This(); -pub const Rule = []Character; id: usize, name: []const u8, @@ -26,7 +26,7 @@ pub fn init(name: []const u8, id: usize, allocator: std.mem.Allocator) !Self { pub fn deinit(self: *Self) void { for (self.rules()) |rule| { - self.allocator.free(rule); + self.allocator.free(rule.items); } self.rule_list.deinit(); @@ -69,7 +69,7 @@ pub fn parse_and_add_variants( head = std.mem.trimLeft(u8, head, &std.ascii.whitespace); while (head.len > 0) { if (head[0] == '|') { - try self.rule_list.append(try rule.toOwnedSlice()); + try self.rule_list.append(Rule { .items = try rule.toOwnedSlice() }); rule = std.ArrayList(Character).init(self.allocator); head = head[1..]; } else if (head[0] == '\'') { @@ -112,7 +112,7 @@ pub fn parse_and_add_variants( return error.EmptyVariant; } - try self.rule_list.append(try rule.toOwnedSlice()); + try self.rule_list.append(Rule { .items = try rule.toOwnedSlice() }); } pub inline fn is_empty(self: *const Self) bool { @@ -131,13 +131,13 @@ pub fn format( try writer.print("{s} ->", .{ self.name }); if (self.rule_list.items.len > 0) { - for (self.rule_list.items[0]) |c| { + for (self.rule_list.items[0].items) |c| { try writer.print(" {}", .{c}); } for (self.rule_list.items[1..]) |r| { try writer.print(" |", .{}); - for (r) |c| { + for (r.items) |c| { try writer.print(" {}", .{c}); } } diff --git a/src/recognizer.zig b/src/recognizer.zig index 2eeae0d..05db2c7 100644 --- a/src/recognizer.zig +++ b/src/recognizer.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Grammar = @import("grammar.zig"); const NonTerminal = @import("non-terminal.zig"); +const Rule = @import("rule.zig"); const Character = @import("character.zig").Character; const Generator = @import("generator.zig").Generator; const gss = @import("gss.zig"); @@ -30,8 +31,8 @@ const State = struct { return other; } - pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool { - return self.inner_position == rule.len; + pub inline fn is_at_end_of_rule(self: *Self, rule: Rule) bool { + return self.inner_position == rule.items.len; } pub fn format( @@ -69,68 +70,6 @@ const State = struct { } }; -pub fn forward_and_consume( - grammar: *Grammar, - base: *gss.Node(State), - graph: *gss.Graph(State), - terminal: u8, - queue: *std.ArrayList(*gss.Node(State)), - node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80), - allocator: std.mem.Allocator, -) !void { - var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator); - try forward_queue.append(.{ base.state, base }); - - while (forward_queue.popOrNull()) |checkpoint| { - const last_state, const last_node = checkpoint; - const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index]; - - if (last_state.inner_position == rule.len) { - for (last_node.parents.items) |parent| { - try forward_queue.append(.{parent.state.next(), parent}); - } - - continue; - } - - switch (rule[last_state.inner_position]) { - .terminal => |t| if (t == terminal) { - const node = try graph.clone(last_node, last_state.next()); - try queue.append(node); - }, - .non_terminal => |n| { - const non_terminal = grammar.non_terminal_by_id(n); - for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| { - if (grammar.is_next_char( - non_terminal.rules()[rule_index], - n, - terminal, - )) { - - const state = State { - .id = n, - .rule_index = rule_index, - .inner_position = 0, - }; - - if (node_cache.get(state)) |parent| { - try parent.parents.append(last_node); - } else { - const parent = try graph.clone(last_node, last_state); - const next = try graph.push(parent, state); - try node_cache.put(state, next); - try forward_queue.append(.{state, next}); - } - } - } - }, - .epsilon => { - try forward_queue.append(.{last_state.next(), last_node}); - }, - } - } -} - pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index]; if (node.state.is_at_end_of_rule(rule)) { @@ -143,7 +82,7 @@ pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool { return node.is_toplevel; } - for (rule[node.state.inner_position..]) |character| { + for (rule.items[node.state.inner_position..]) |character| { switch (character) { .terminal => return false, .non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END), @@ -179,24 +118,64 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo try processing_queue.append(id); } - var next_processing_queue = &queues[1]; - var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator); - defer node_cache.deinit(); + var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator); + + var next_processing_queue = &queues[1]; for (input) |character| { - for (processing_queue.items) |node| { - try forward_and_consume( - grammar, - node, - &graph, - character, - next_processing_queue, - &node_cache, - allocator, - ); + for (processing_queue.items) |base| { + try forward_queue.append(.{ base.state, base }); + + while (forward_queue.popOrNull()) |checkpoint| { + const last_state, const last_node = checkpoint; + const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index]; + + if (last_state.inner_position == rule.items.len) { + for (last_node.parents.items) |parent| { + try forward_queue.append(.{parent.state.next(), parent}); + } + + continue; + } + + switch (rule.items[last_state.inner_position]) { + .terminal => |t| if (t == character) { + const node = try graph.clone(last_node, last_state.next()); + try next_processing_queue.append(node); + }, + .non_terminal => |n| { + const non_terminal = grammar.non_terminal_by_id(n); + for (grammar.non_terminal_by_id(n).rules(), 0..) |child_rule, rule_index| { + if (child_rule.first.is_set(character)) { + + const state = State { + .id = n, + .rule_index = rule_index, + .inner_position = 0, + }; + + if (node_cache.get(state)) |parent| { + try parent.parents.append(last_node); + } else { + const parent = try graph.clone(last_node, last_state); + const next = try graph.push(parent, state); + try node_cache.put(state, next); + try forward_queue.append(.{state, next}); + } + } else if (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character)) { + @panic("not implemented"); + } + } + }, + .epsilon => { + try forward_queue.append(.{last_state.next(), last_node}); + }, + } + } node_cache.clearRetainingCapacity(); + forward_queue.clearRetainingCapacity(); } const swap = processing_queue; diff --git a/src/rule.zig b/src/rule.zig new file mode 100644 index 0000000..5487f68 --- /dev/null +++ b/src/rule.zig @@ -0,0 +1,5 @@ +const CharSet = @import("char-set.zig"); +const Character = @import("character.zig").Character; + +items: []Character, +first: CharSet = CharSet {}, |