aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--grammar/complex-recursion.grm1
-rw-r--r--src/generator.zig2
-rw-r--r--src/grammar.zig18
-rw-r--r--src/non-terminal.zig12
-rw-r--r--src/recognizer.zig135
-rw-r--r--src/rule.zig5
6 files changed, 81 insertions, 92 deletions
diff --git a/grammar/complex-recursion.grm b/grammar/complex-recursion.grm
new file mode 100644
index 0000000..0daebe3
--- /dev/null
+++ b/grammar/complex-recursion.grm
@@ -0,0 +1 @@
+S -> 'a' | S S | S S S
diff --git a/src/generator.zig b/src/generator.zig
index 0676834..e28cc55 100644
--- a/src/generator.zig
+++ b/src/generator.zig
@@ -53,7 +53,7 @@ pub fn Generator(T: type) type {
const rule = rules[index];
const success = blk: {
- for (rule) |character| {
+ for (rule.items) |character| {
switch (character) {
.terminal => |t| try string.append(t),
.non_terminal => |next_n| {
diff --git a/src/grammar.zig b/src/grammar.zig
index ab51275..2169fe7 100644
--- a/src/grammar.zig
+++ b/src/grammar.zig
@@ -80,21 +80,23 @@ fn generate_first(self: *Self) void {
modified = false;
for (self.non_terminals) |*non_terminal| {
- for (non_terminal.rules()) |rule| {
- switch (rule[0]) {
+ for (non_terminal.rules()) |*rule| {
+ switch (rule.items[0]) {
.terminal => |t| {
modified = modified or !non_terminal.first.is_set(t);
non_terminal.first.set(t);
+ rule.first.set(t);
},
.non_terminal, .epsilon => {
var insert_epsilon = true;
- for (rule) |*char| {
+ for (rule.items) |*char| {
switch (char.*) {
.terminal => |t| {
modified = modified or !non_terminal.first.is_set(t);
non_terminal.first.set(t);
+ rule.first.set(t);
insert_epsilon = false;
break;
},
@@ -105,6 +107,7 @@ fn generate_first(self: *Self) void {
modified = modified or (!non_terminal.first.is_set(index) and child_first);
non_terminal.first.set_if(index, child_first);
+ rule.first.set_if(index, child_first);
}
if (!first.is_set(Character.EPSILON)) {
@@ -119,6 +122,7 @@ fn generate_first(self: *Self) void {
if (insert_epsilon and !non_terminal.first.is_set(Character.EPSILON)) {
modified = true;
non_terminal.first.set(Character.EPSILON);
+ rule.first.set(Character.EPSILON);
}
},
}
@@ -135,14 +139,14 @@ fn generate_follows(self: *Self) void {
for (self.non_terminals) |*non_terminal| {
for (non_terminal.rules()) |rule| {
- for (0..rule.len) |character_index| {
+ for (0..rule.items.len) |character_index| {
- switch (rule[character_index]) {
+ switch (rule.items[character_index]) {
.non_terminal => |n| {
const current_non_terminal = self.non_terminal_by_id(n);
const ends_with_epsilon = brk: {
- for (character_index + 1..rule.len) |peek_index| {
- switch (rule[peek_index]) {
+ for (character_index + 1..rule.items.len) |peek_index| {
+ switch (rule.items[peek_index]) {
.terminal => |t| {
modified = modified or !current_non_terminal.follows.is_set(t);
current_non_terminal.follows.set(t);
diff --git a/src/non-terminal.zig b/src/non-terminal.zig
index 1665a80..f5ecf09 100644
--- a/src/non-terminal.zig
+++ b/src/non-terminal.zig
@@ -2,9 +2,9 @@ const std = @import("std");
const CharSet = @import("char-set.zig");
const Character = @import("character.zig").Character;
const string = @import("string.zig");
+const Rule = @import("rule.zig");
const Self = @This();
-pub const Rule = []Character;
id: usize,
name: []const u8,
@@ -26,7 +26,7 @@ pub fn init(name: []const u8, id: usize, allocator: std.mem.Allocator) !Self {
pub fn deinit(self: *Self) void {
for (self.rules()) |rule| {
- self.allocator.free(rule);
+ self.allocator.free(rule.items);
}
self.rule_list.deinit();
@@ -69,7 +69,7 @@ pub fn parse_and_add_variants(
head = std.mem.trimLeft(u8, head, &std.ascii.whitespace);
while (head.len > 0) {
if (head[0] == '|') {
- try self.rule_list.append(try rule.toOwnedSlice());
+ try self.rule_list.append(Rule { .items = try rule.toOwnedSlice() });
rule = std.ArrayList(Character).init(self.allocator);
head = head[1..];
} else if (head[0] == '\'') {
@@ -112,7 +112,7 @@ pub fn parse_and_add_variants(
return error.EmptyVariant;
}
- try self.rule_list.append(try rule.toOwnedSlice());
+ try self.rule_list.append(Rule { .items = try rule.toOwnedSlice() });
}
pub inline fn is_empty(self: *const Self) bool {
@@ -131,13 +131,13 @@ pub fn format(
try writer.print("{s} ->", .{ self.name });
if (self.rule_list.items.len > 0) {
- for (self.rule_list.items[0]) |c| {
+ for (self.rule_list.items[0].items) |c| {
try writer.print(" {}", .{c});
}
for (self.rule_list.items[1..]) |r| {
try writer.print(" |", .{});
- for (r) |c| {
+ for (r.items) |c| {
try writer.print(" {}", .{c});
}
}
diff --git a/src/recognizer.zig b/src/recognizer.zig
index 2eeae0d..05db2c7 100644
--- a/src/recognizer.zig
+++ b/src/recognizer.zig
@@ -1,6 +1,7 @@
const std = @import("std");
const Grammar = @import("grammar.zig");
const NonTerminal = @import("non-terminal.zig");
+const Rule = @import("rule.zig");
const Character = @import("character.zig").Character;
const Generator = @import("generator.zig").Generator;
const gss = @import("gss.zig");
@@ -30,8 +31,8 @@ const State = struct {
return other;
}
- pub inline fn is_at_end_of_rule(self: *Self, rule: NonTerminal.Rule) bool {
- return self.inner_position == rule.len;
+ pub inline fn is_at_end_of_rule(self: *Self, rule: Rule) bool {
+ return self.inner_position == rule.items.len;
}
pub fn format(
@@ -69,68 +70,6 @@ const State = struct {
}
};
-pub fn forward_and_consume(
- grammar: *Grammar,
- base: *gss.Node(State),
- graph: *gss.Graph(State),
- terminal: u8,
- queue: *std.ArrayList(*gss.Node(State)),
- node_cache: *std.HashMap(State, *gss.Node(State), State.Context, 80),
- allocator: std.mem.Allocator,
-) !void {
- var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator);
- try forward_queue.append(.{ base.state, base });
-
- while (forward_queue.popOrNull()) |checkpoint| {
- const last_state, const last_node = checkpoint;
- const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index];
-
- if (last_state.inner_position == rule.len) {
- for (last_node.parents.items) |parent| {
- try forward_queue.append(.{parent.state.next(), parent});
- }
-
- continue;
- }
-
- switch (rule[last_state.inner_position]) {
- .terminal => |t| if (t == terminal) {
- const node = try graph.clone(last_node, last_state.next());
- try queue.append(node);
- },
- .non_terminal => |n| {
- const non_terminal = grammar.non_terminal_by_id(n);
- for (0..grammar.non_terminal_by_id(n).rules().len) |rule_index| {
- if (grammar.is_next_char(
- non_terminal.rules()[rule_index],
- n,
- terminal,
- )) {
-
- const state = State {
- .id = n,
- .rule_index = rule_index,
- .inner_position = 0,
- };
-
- if (node_cache.get(state)) |parent| {
- try parent.parents.append(last_node);
- } else {
- const parent = try graph.clone(last_node, last_state);
- const next = try graph.push(parent, state);
- try node_cache.put(state, next);
- try forward_queue.append(.{state, next});
- }
- }
- }
- },
- .epsilon => {
- try forward_queue.append(.{last_state.next(), last_node});
- },
- }
- }
-}
-
pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
const rule = grammar.non_terminal_by_id(node.state.id).rules()[node.state.rule_index];
if (node.state.is_at_end_of_rule(rule)) {
@@ -143,7 +82,7 @@ pub fn reaches_end_of_entry(grammar: *Grammar, node: *gss.Node(State)) bool {
return node.is_toplevel;
}
- for (rule[node.state.inner_position..]) |character| {
+ for (rule.items[node.state.inner_position..]) |character| {
switch (character) {
.terminal => return false,
.non_terminal => |n| return grammar.non_terminal_by_id(n).follows.is_set(Character.END),
@@ -179,24 +118,64 @@ pub fn check(grammar: *Grammar, input: []const u8, inner_allocator: std.mem.Allo
try processing_queue.append(id);
}
- var next_processing_queue = &queues[1];
-
var node_cache = std.HashMap(State, *gss.Node(State), State.Context, 80).init(allocator);
- defer node_cache.deinit();
+ var forward_queue = std.ArrayList(struct { State, *gss.Node(State) }).init(allocator);
+
+ var next_processing_queue = &queues[1];
for (input) |character| {
- for (processing_queue.items) |node| {
- try forward_and_consume(
- grammar,
- node,
- &graph,
- character,
- next_processing_queue,
- &node_cache,
- allocator,
- );
+ for (processing_queue.items) |base| {
+ try forward_queue.append(.{ base.state, base });
+
+ while (forward_queue.popOrNull()) |checkpoint| {
+ const last_state, const last_node = checkpoint;
+ const rule = grammar.non_terminal_by_id(last_state.id).rules()[last_state.rule_index];
+
+ if (last_state.inner_position == rule.items.len) {
+ for (last_node.parents.items) |parent| {
+ try forward_queue.append(.{parent.state.next(), parent});
+ }
+
+ continue;
+ }
+
+ switch (rule.items[last_state.inner_position]) {
+ .terminal => |t| if (t == character) {
+ const node = try graph.clone(last_node, last_state.next());
+ try next_processing_queue.append(node);
+ },
+ .non_terminal => |n| {
+ const non_terminal = grammar.non_terminal_by_id(n);
+ for (grammar.non_terminal_by_id(n).rules(), 0..) |child_rule, rule_index| {
+ if (child_rule.first.is_set(character)) {
+
+ const state = State {
+ .id = n,
+ .rule_index = rule_index,
+ .inner_position = 0,
+ };
+
+ if (node_cache.get(state)) |parent| {
+ try parent.parents.append(last_node);
+ } else {
+ const parent = try graph.clone(last_node, last_state);
+ const next = try graph.push(parent, state);
+ try node_cache.put(state, next);
+ try forward_queue.append(.{state, next});
+ }
+ } else if (child_rule.first.is_set(Character.EPSILON) and non_terminal.follows.is_set(character)) {
+ @panic("not implemented");
+ }
+ }
+ },
+ .epsilon => {
+ try forward_queue.append(.{last_state.next(), last_node});
+ },
+ }
+ }
node_cache.clearRetainingCapacity();
+ forward_queue.clearRetainingCapacity();
}
const swap = processing_queue;
diff --git a/src/rule.zig b/src/rule.zig
new file mode 100644
index 0000000..5487f68
--- /dev/null
+++ b/src/rule.zig
@@ -0,0 +1,5 @@
+const CharSet = @import("char-set.zig");
+const Character = @import("character.zig").Character;
+
+items: []Character,
+first: CharSet = CharSet {},