aboutsummaryrefslogtreecommitdiff
path: root/src/grammar.zig
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2025-05-06 10:43:09 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2025-05-06 10:43:09 +0200
commiteff19cc15a9bf4df60e7f90c3a7ee525c65266c0 (patch)
treed6034b574e8e2218054d42caadbf25fa17862abf /src/grammar.zig
parente2f01d5df22704bfe62396a0f9a260f86edbde0e (diff)
add full grammar parsing
Diffstat (limited to 'src/grammar.zig')
-rw-r--r--src/grammar.zig248
1 files changed, 151 insertions, 97 deletions
diff --git a/src/grammar.zig b/src/grammar.zig
index ed7eee6..298932c 100644
--- a/src/grammar.zig
+++ b/src/grammar.zig
@@ -5,24 +5,47 @@ const NonTerminal = @import("non-terminal.zig");
const Self = @This();
-non_terminals: [26]NonTerminal,
+non_terminals: []NonTerminal,
+entry_id: usize,
+allocator: std.mem.Allocator,
-pub fn parse(buffer: []const u8, allocator: std.mem.Allocator) !Self {
+pub fn parse(entry: []const u8, buffer: []const u8, allocator: std.mem.Allocator) !Self {
var lines = std.mem.tokenizeScalar(u8, buffer, '\n');
+ var names = std.StringHashMap(usize).init(allocator);
+ defer names.deinit();
- var self: Self = undefined;
- errdefer self.deinit(allocator);
+ while (lines.next()) |line| {
+ const name = try NonTerminal.parse_name(line);
+
+ if (!names.contains(name)) {
+ try names.put(name, names.count());
+ }
+ }
+
+ lines = std.mem.tokenizeScalar(u8, buffer, '\n');
+ var self = Self {
+ .non_terminals = try allocator.alloc(NonTerminal, names.count()),
+ .entry_id = names.get(entry) orelse return error.MissingRule,
+ .allocator = allocator,
+ };
+ errdefer self.deinit();
- for (&self.non_terminals, 'A'..) |*non_terminal, name| {
- non_terminal.* = NonTerminal.init(
- @truncate(name),
- allocator
- );
+ {
+ var iterator = names.iterator();
+ while (iterator.next()) |name_entry| {
+ self.non_terminals[name_entry.value_ptr.*] = try NonTerminal.init(
+ name_entry.key_ptr.*,
+ name_entry.value_ptr.*,
+ allocator
+ );
+ }
}
while (lines.next()) |line| {
- const name, const rule = try NonTerminal.parse_rule(line, allocator);
- try self.non_terminal_by_name(name).add_rule(rule);
+ const name = try NonTerminal.parse_name(line);
+ try self.non_terminal_by_id(
+ names.get(name) orelse return error.MissingRule
+ ).parse_and_add_variants(line, &names);
}
self.generate_first();
@@ -31,20 +54,21 @@ pub fn parse(buffer: []const u8, allocator: std.mem.Allocator) !Self {
return self;
}
-pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
- for (&self.non_terminals) |*non_terminal| {
- non_terminal.deinit(allocator);
+pub fn deinit(self: *Self) void {
+ for (self.non_terminals) |*non_terminal| {
+ non_terminal.deinit();
}
+ self.allocator.free(self.non_terminals);
self.* = undefined;
}
-pub inline fn non_terminal_by_name(self: *Self, name: u8) *NonTerminal {
- return &self.non_terminals[name - 'A'];
+pub inline fn non_terminal_by_id(self: *Self, id: usize) *NonTerminal {
+ return &self.non_terminals[id];
}
pub inline fn entry_point(self: *Self) *NonTerminal {
- return self.non_terminal_by_name('S');
+ return self.non_terminal_by_id(self.entry_id);
}
fn generate_first(self: *Self) void {
@@ -53,7 +77,7 @@ fn generate_first(self: *Self) void {
while (modified) {
modified = false;
- for (&self.non_terminals) |*non_terminal| {
+ for (self.non_terminals) |*non_terminal| {
for (non_terminal.rules()) |rule| {
switch (rule[0]) {
.terminal => |t| {
@@ -62,7 +86,6 @@ fn generate_first(self: *Self) void {
},
.non_terminal, .epsilon => {
- if (non_terminal.first.is_set(Character.EPSILON)) continue;
var insert_epsilon = true;
for (rule) |*char| {
@@ -74,7 +97,7 @@ fn generate_first(self: *Self) void {
break;
},
.non_terminal => |n| {
- const first = self.non_terminal_by_name(n).first;
+ const first = self.non_terminal_by_id(n).first;
for (first.charset, 0..) |child_first, index| {
if (index == Character.EPSILON) continue;
@@ -91,7 +114,7 @@ fn generate_first(self: *Self) void {
}
}
- if (insert_epsilon) {
+ if (insert_epsilon and !non_terminal.first.is_set(Character.EPSILON)) {
modified = true;
non_terminal.first.set(Character.EPSILON);
}
@@ -101,21 +124,20 @@ fn generate_first(self: *Self) void {
}
}
}
-
fn generate_follows(self: *Self) void {
- self.non_terminal_by_name('S').follows.set(Character.END);
+ self.entry_point().follows.set(Character.END);
var modified = true;
while (modified) {
modified = false;
- for (&self.non_terminals) |*non_terminal| {
+ for (self.non_terminals) |*non_terminal| {
for (non_terminal.rules()) |rule| {
for (0..rule.len) |character_index| {
switch (rule[character_index]) {
.non_terminal => |n| {
- const current_non_terminal = self.non_terminal_by_name(n);
+ const current_non_terminal = self.non_terminal_by_id(n);
const ends_with_epsilon = brk: {
for (character_index + 1..rule.len) |peek_index| {
switch (rule[peek_index]) {
@@ -125,7 +147,7 @@ fn generate_follows(self: *Self) void {
break :brk false;
},
.non_terminal => |peek_n| {
- const peek_non_terminal = self.non_terminal_by_name(peek_n);
+ const peek_non_terminal = self.non_terminal_by_id(peek_n);
for (peek_non_terminal.first.charset, 0..) |is_set, index| {
if (Character.EPSILON == index) continue;
@@ -159,112 +181,144 @@ fn generate_follows(self: *Self) void {
}
}
+pub fn format(
+ self: *const Self,
+ comptime fmt: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = fmt;
+ _ = options;
+
+ for (self.non_terminals) |non_terminal| {
+ try writer.print("[{}] ", .{non_terminal});
+ }
+}
+
test "expr" {
const text =
\\S -> B A
- \\A -> + B A
- \\A -> _
+ \\A -> '+' B A
+ \\A -> ''
\\B -> D C
- \\C -> * D C
- \\C -> _
- \\D -> ( S )
- \\D -> a
- \\D -> b
+ \\C -> '*' D C
+ \\C -> ''
+ \\D -> '(' S ')'
+ \\D -> 'a' | 'b'
;
const allocator = std.testing.allocator;
- var grammar = try Self.parse(text, allocator);
- defer grammar.deinit(allocator);
+ var grammar = try Self.parse("S", text, allocator);
+ defer grammar.deinit();
- try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { '(', 'a', 'b' });
- try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { '+', Character.EPSILON });
- try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { '(', 'a', 'b' });
- try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { '*', Character.EPSILON });
- try grammar.non_terminal_by_name('D').first.expect(&[_]u8 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { '+', Character.EPSILON });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { '*', Character.EPSILON });
+ try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { '(', 'a', 'b' });
- try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { ')', Character.END });
- try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { ')', Character.END });
- try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { '+', ')', Character.END });
- try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { '+', ')', Character.END });
- try grammar.non_terminal_by_name('D').follows.expect(&[_]u8 { '*', '+', ')', Character.END });
+ try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { ')', Character.END });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { ')', Character.END });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { '+', ')', Character.END });
+ try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { '+', ')', Character.END });
+ try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { '*', '+', ')', Character.END });
}
test "sample 0" {
const text =
- \\S -> A C B
- \\S -> C b b
- \\S -> B a
- \\A -> d a
- \\A -> B C
- \\B -> g
- \\B -> _
- \\C -> h
- \\C -> _
+ \\S -> A C B | C 'bb' | B 'a'
+ \\A -> 'da' | B C
+ \\B -> 'g' | ''
+ \\C -> 'h' | ''
;
const allocator = std.testing.allocator;
- var grammar = try Self.parse(text, allocator);
- defer grammar.deinit(allocator);
+ var grammar = try Self.parse("S", text, allocator);
+ defer grammar.deinit();
- try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON });
- try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'd', 'g', 'h', Character.EPSILON });
- try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'g', Character.EPSILON });
- try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { 'h', Character.EPSILON });
+ try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'd', 'g', 'h', Character.EPSILON });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'g', Character.EPSILON });
+ try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'h', Character.EPSILON });
- try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END });
- try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'g', 'h', Character.END });
- try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { 'a', 'g', 'h', Character.END });
- try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { 'b', 'g', 'h', Character.END });
+ try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'g', 'h', Character.END });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'g', 'h', Character.END });
+ try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'b', 'g', 'h', Character.END });
}
test "sample 1" {
const text =
- \\S -> a A B b
- \\A -> c
- \\A -> _
- \\B -> d
- \\B -> _
+ \\S -> 'a' A B 'b'
+ \\A -> 'c'
+ \\A -> ''
+ \\B -> 'd'
+ \\B -> ''
;
const allocator = std.testing.allocator;
- var grammar = try Self.parse(text, allocator);
- defer grammar.deinit(allocator);
+ var grammar = try Self.parse("S", text, allocator);
+ defer grammar.deinit();
- try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'a' });
- try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'c', Character.EPSILON });
- try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'd', Character.EPSILON });
+ try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a' });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'c', Character.EPSILON });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd', Character.EPSILON });
- try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END });
- try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'b', 'd' });
- try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { 'b' });
+ try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'b', 'd' });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'b' });
}
test "sample 2" {
const text =
\\S -> A B
- \\S -> e D a
- \\A -> a b
- \\A -> c
- \\B -> d C
- \\C -> e C
- \\C -> _
- \\D -> f D
- \\D -> _
+ \\S -> 'e' D 'a'
+ \\A -> 'ab'
+ \\A -> 'c'
+ \\B -> 'd' C
+ \\C -> 'e' C
+ \\C -> ''
+ \\D -> 'f' D
+ \\D -> ''
+ ;
+
+ const allocator = std.testing.allocator;
+ var grammar = try Self.parse("S", text, allocator);
+ defer grammar.deinit();
+
+ try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a', 'c', 'e' });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c' });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd' });
+ try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'e', Character.EPSILON });
+ try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { 'f', Character.EPSILON });
+
+ try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'd' });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { Character.END });
+ try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { 'a' });
+}
+
+test "sample 3" {
+ const text =
+ \\S -> A S 'd'
+ \\S -> B S
+ \\S -> ''
+ \\A -> 'a'
+ \\A -> 'c'
+ \\B -> 'a'
+ \\B -> 'b'
;
const allocator = std.testing.allocator;
- var grammar = try Self.parse(text, allocator);
- defer grammar.deinit(allocator);
+ var grammar = try Self.parse("S", text, allocator);
+ defer grammar.deinit();
- try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'a', 'c', 'e' });
- try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'a', 'c' });
- try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'd' });
- try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { 'e', Character.EPSILON });
- try grammar.non_terminal_by_name('D').first.expect(&[_]u8 { 'f', Character.EPSILON });
+ try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a', 'b', 'c', Character.EPSILON });
+ try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c' });
+ try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'a', 'b' });
- try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END });
- try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'd' });
- try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { Character.END });
- try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { Character.END });
- try grammar.non_terminal_by_name('D').follows.expect(&[_]u8 { 'a' });
+ try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { 'd', Character.END });
+ try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd' });
+ try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd', Character.END });
}