aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2025-04-24 20:06:50 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2025-04-24 20:06:50 +0200
commitf593da7580f423b1405f4705081368acef0b3c21 (patch)
tree875e25e99da4f531e44e7537c7d8006da9ee0aa5 /src
parent839271627d0cfd2240ec30a603ff60a0165fe6a2 (diff)
fix naming
the struct RuleList is now named NonTerminal and its member rhs is now called Rule.
Diffstat (limited to 'src')
-rw-r--r--src/grammar.zig156
-rw-r--r--src/main.zig36
-rw-r--r--src/non-terminal.zig (renamed from src/rule-list.zig)40
3 files changed, 135 insertions, 97 deletions
diff --git a/src/grammar.zig b/src/grammar.zig
index de29a15..ed7eee6 100644
--- a/src/grammar.zig
+++ b/src/grammar.zig
@@ -1,28 +1,28 @@
const std = @import("std");
const Character = @import("character.zig").Character;
-const RuleList = @import("rule-list.zig");
+const NonTerminal = @import("non-terminal.zig");
const Self = @This();
-rules: [26]RuleList,
+non_terminals: [26]NonTerminal,
-pub fn init(buffer: []const u8, allocator: std.mem.Allocator) !Self {
+pub fn parse(buffer: []const u8, allocator: std.mem.Allocator) !Self {
var lines = std.mem.tokenizeScalar(u8, buffer, '\n');
var self: Self = undefined;
errdefer self.deinit(allocator);
- for (&self.rules, 'A'..) |*rule, name| {
- rule.* = RuleList.init(
+ for (&self.non_terminals, 'A'..) |*non_terminal, name| {
+ non_terminal.* = NonTerminal.init(
@truncate(name),
allocator
);
}
while (lines.next()) |line| {
- const name, const rhs = try RuleList.parse_rule(line, allocator);
- try self.rule_by_name(name).add_rule(rhs);
+ const name, const rule = try NonTerminal.parse_rule(line, allocator);
+ try self.non_terminal_by_name(name).add_rule(rule);
}
self.generate_first();
@@ -32,19 +32,19 @@ pub fn init(buffer: []const u8, allocator: std.mem.Allocator) !Self {
}
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
- for (&self.rules) |*rule| {
- rule.deinit(allocator);
+ for (&self.non_terminals) |*non_terminal| {
+ non_terminal.deinit(allocator);
}
self.* = undefined;
}
-pub inline fn rule_by_name(self: *Self, name: u8) *RuleList {
- return &self.rules[name - 'A'];
+pub inline fn non_terminal_by_name(self: *Self, name: u8) *NonTerminal {
+ return &self.non_terminals[name - 'A'];
}
-pub inline fn entry_point(self: *Self) *RuleList {
- return self.rule('S');
+pub inline fn entry_point(self: *Self) *NonTerminal {
+ return self.non_terminal_by_name('S');
}
fn generate_first(self: *Self) void {
@@ -53,33 +53,33 @@ fn generate_first(self: *Self) void {
while (modified) {
modified = false;
- for (&self.rules) |*rule| {
- for (rule.rhs.items) |rhs| {
- switch (rhs[0]) {
+ for (&self.non_terminals) |*non_terminal| {
+ for (non_terminal.rules()) |rule| {
+ switch (rule[0]) {
.terminal => |t| {
- modified = modified or !rule.first.is_set(t);
- rule.first.set(t);
+ modified = modified or !non_terminal.first.is_set(t);
+ non_terminal.first.set(t);
},
.non_terminal, .epsilon => {
- if (rule.first.is_set(Character.EPSILON)) continue;
+ if (non_terminal.first.is_set(Character.EPSILON)) continue;
var insert_epsilon = true;
- for (rhs) |*char| {
+ for (rule) |*char| {
switch (char.*) {
.terminal => |t| {
- modified = modified or !rule.first.is_set(t);
- rule.first.set(t);
+ modified = modified or !non_terminal.first.is_set(t);
+ non_terminal.first.set(t);
insert_epsilon = false;
break;
},
.non_terminal => |n| {
- const first = self.rule_by_name(n).first;
+ const first = self.non_terminal_by_name(n).first;
for (first.charset, 0..) |child_first, index| {
if (index == Character.EPSILON) continue;
- modified = modified or (!rule.first.is_set(index) and child_first);
- rule.first.set_if(index, child_first);
+ modified = modified or (!non_terminal.first.is_set(index) and child_first);
+ non_terminal.first.set_if(index, child_first);
}
if (!first.is_set(Character.EPSILON)) {
@@ -93,7 +93,7 @@ fn generate_first(self: *Self) void {
if (insert_epsilon) {
modified = true;
- rule.first.set(Character.EPSILON);
+ non_terminal.first.set(Character.EPSILON);
}
},
}
@@ -103,37 +103,37 @@ fn generate_first(self: *Self) void {
}
fn generate_follows(self: *Self) void {
- self.rule_by_name('S').follows.set(Character.END);
+ self.non_terminal_by_name('S').follows.set(Character.END);
var modified = true;
while (modified) {
modified = false;
- for (&self.rules) |*parent_rules| {
- for (parent_rules.rhs.items) |rule| {
+ for (&self.non_terminals) |*non_terminal| {
+ for (non_terminal.rules()) |rule| {
for (0..rule.len) |character_index| {
switch (rule[character_index]) {
.non_terminal => |n| {
- const current_rule = self.rule_by_name(n);
+ const current_non_terminal = self.non_terminal_by_name(n);
const ends_with_epsilon = brk: {
for (character_index + 1..rule.len) |peek_index| {
switch (rule[peek_index]) {
.terminal => |t| {
- modified = modified or !current_rule.follows.is_set(t);
- current_rule.follows.set(t);
+ modified = modified or !current_non_terminal.follows.is_set(t);
+ current_non_terminal.follows.set(t);
break :brk false;
},
.non_terminal => |peek_n| {
- const peek_rule = self.rule_by_name(peek_n);
- for (peek_rule.first.charset, 0..) |is_set, index| {
+ const peek_non_terminal = self.non_terminal_by_name(peek_n);
+ for (peek_non_terminal.first.charset, 0..) |is_set, index| {
if (Character.EPSILON == index) continue;
- modified = modified or (!current_rule.follows.is_set(index) and is_set);
- current_rule.follows.set_if(index, is_set);
+ modified = modified or (!current_non_terminal.follows.is_set(index) and is_set);
+ current_non_terminal.follows.set_if(index, is_set);
}
- if (!peek_rule.first.is_set(Character.EPSILON)) {
+ if (!peek_non_terminal.first.is_set(Character.EPSILON)) {
break :brk false;
}
},
@@ -144,9 +144,9 @@ fn generate_follows(self: *Self) void {
};
if (ends_with_epsilon) {
- for (parent_rules.follows.charset, 0..) |is_set, char_index| {
- modified = modified or (!current_rule.follows.is_set(char_index) and is_set);
- current_rule.follows.set_if(char_index, is_set);
+ for (non_terminal.follows.charset, 0..) |is_set, char_index| {
+ modified = modified or (!current_non_terminal.follows.is_set(char_index) and is_set);
+ current_non_terminal.follows.set_if(char_index, is_set);
}
}
},
@@ -173,20 +173,20 @@ test "expr" {
;
const allocator = std.testing.allocator;
- var grammar = try Self.init(text, allocator);
+ var grammar = try Self.parse(text, allocator);
defer grammar.deinit(allocator);
- try grammar.rule_by_name('S').first.expect(&[_]u8 { '(', 'a', 'b' });
- try grammar.rule_by_name('A').first.expect(&[_]u8 { '+', Character.EPSILON });
- try grammar.rule_by_name('B').first.expect(&[_]u8 { '(', 'a', 'b' });
- try grammar.rule_by_name('C').first.expect(&[_]u8 { '*', Character.EPSILON });
- try grammar.rule_by_name('D').first.expect(&[_]u8 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { '+', Character.EPSILON });
+ try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { '(', 'a', 'b' });
+ try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { '*', Character.EPSILON });
+ try grammar.non_terminal_by_name('D').first.expect(&[_]u8 { '(', 'a', 'b' });
- try grammar.rule_by_name('S').follows.expect(&[_]u8 { ')', Character.END });
- try grammar.rule_by_name('A').follows.expect(&[_]u8 { ')', Character.END });
- try grammar.rule_by_name('B').follows.expect(&[_]u8 { '+', ')', Character.END });
- try grammar.rule_by_name('C').follows.expect(&[_]u8 { '+', ')', Character.END });
- try grammar.rule_by_name('D').follows.expect(&[_]u8 { '*', '+', ')', Character.END });
+ try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { ')', Character.END });
+ try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { ')', Character.END });
+ try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { '+', ')', Character.END });
+ try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { '+', ')', Character.END });
+ try grammar.non_terminal_by_name('D').follows.expect(&[_]u8 { '*', '+', ')', Character.END });
}
test "sample 0" {
@@ -203,18 +203,18 @@ test "sample 0" {
;
const allocator = std.testing.allocator;
- var grammar = try Self.init(text, allocator);
+ var grammar = try Self.parse(text, allocator);
defer grammar.deinit(allocator);
- try grammar.rule_by_name('S').first.expect(&[_]u8 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON });
- try grammar.rule_by_name('A').first.expect(&[_]u8 { 'd', 'g', 'h', Character.EPSILON });
- try grammar.rule_by_name('B').first.expect(&[_]u8 { 'g', Character.EPSILON });
- try grammar.rule_by_name('C').first.expect(&[_]u8 { 'h', Character.EPSILON });
+ try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON });
+ try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'd', 'g', 'h', Character.EPSILON });
+ try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'g', Character.EPSILON });
+ try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { 'h', Character.EPSILON });
- try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END });
- try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'g', 'h', Character.END });
- try grammar.rule_by_name('B').follows.expect(&[_]u8 { 'a', 'g', 'h', Character.END });
- try grammar.rule_by_name('C').follows.expect(&[_]u8 { 'b', 'g', 'h', Character.END });
+ try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END });
+ try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'g', 'h', Character.END });
+ try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { 'a', 'g', 'h', Character.END });
+ try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { 'b', 'g', 'h', Character.END });
}
test "sample 1" {
@@ -227,16 +227,16 @@ test "sample 1" {
;
const allocator = std.testing.allocator;
- var grammar = try Self.init(text, allocator);
+ var grammar = try Self.parse(text, allocator);
defer grammar.deinit(allocator);
- try grammar.rule_by_name('S').first.expect(&[_]u8 { 'a' });
- try grammar.rule_by_name('A').first.expect(&[_]u8 { 'c', Character.EPSILON });
- try grammar.rule_by_name('B').first.expect(&[_]u8 { 'd', Character.EPSILON });
+ try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'a' });
+ try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'c', Character.EPSILON });
+ try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'd', Character.EPSILON });
- try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END });
- try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'b', 'd' });
- try grammar.rule_by_name('B').follows.expect(&[_]u8 { 'b' });
+ try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END });
+ try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'b', 'd' });
+ try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { 'b' });
}
test "sample 2" {
@@ -253,18 +253,18 @@ test "sample 2" {
;
const allocator = std.testing.allocator;
- var grammar = try Self.init(text, allocator);
+ var grammar = try Self.parse(text, allocator);
defer grammar.deinit(allocator);
- try grammar.rule_by_name('S').first.expect(&[_]u8 { 'a', 'c', 'e' });
- try grammar.rule_by_name('A').first.expect(&[_]u8 { 'a', 'c' });
- try grammar.rule_by_name('B').first.expect(&[_]u8 { 'd' });
- try grammar.rule_by_name('C').first.expect(&[_]u8 { 'e', Character.EPSILON });
- try grammar.rule_by_name('D').first.expect(&[_]u8 { 'f', Character.EPSILON });
+ try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'a', 'c', 'e' });
+ try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'a', 'c' });
+ try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'd' });
+ try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { 'e', Character.EPSILON });
+ try grammar.non_terminal_by_name('D').first.expect(&[_]u8 { 'f', Character.EPSILON });
- try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END });
- try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'd' });
- try grammar.rule_by_name('B').follows.expect(&[_]u8 { Character.END });
- try grammar.rule_by_name('C').follows.expect(&[_]u8 { Character.END });
- try grammar.rule_by_name('D').follows.expect(&[_]u8 { 'a' });
+ try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END });
+ try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'd' });
+ try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { Character.END });
+ try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { Character.END });
+ try grammar.non_terminal_by_name('D').follows.expect(&[_]u8 { 'a' });
}
diff --git a/src/main.zig b/src/main.zig
index 62877cb..7a0513e 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,8 +1,42 @@
const std = @import("std");
+pub const Grammar = @import("grammar.zig");
+pub const recognizer = @import("recognizer.zig");
-pub const grammar = @import("grammar.zig");
+fn help() !noreturn {
+ const stderr = std.io.getStdErr().writer();
+ try stderr.print("gll [grammar] [input]\n", .{});
+ std.process.exit(1);
+}
+
+fn read_whole_file(path: []const u8, allocator: std.mem.Allocator) ![]const u8 {
+ const file = try std.fs.cwd().openFile(path, .{});
+ defer file.close();
+ const stat = try file.stat();
+ return file.readToEndAlloc(allocator, stat.size);
+}
pub fn main() !void {
+ var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ const allocator = gpa.allocator();
+ defer {
+ if (gpa.deinit() == .leak) {
+ @panic("memory leak detected");
+ }
+ }
+
+ var args = std.process.args();
+ _ = args.next();
+
+ const grammar_path = args.next() orelse try help();
+ const input_path = args.next() orelse try help();
+
+ const grammar_text = try read_whole_file(grammar_path, allocator);
+ defer allocator.free(grammar_text);
+ const input = try read_whole_file(input_path, allocator);
+ defer allocator.free(input);
+
+ var grammar = try Grammar.parse(grammar_text, allocator);
+ defer grammar.deinit(allocator);
}
test {
diff --git a/src/rule-list.zig b/src/non-terminal.zig
index a438750..097c603 100644
--- a/src/rule-list.zig
+++ b/src/non-terminal.zig
@@ -3,41 +3,45 @@ const CharSet = @import("char-set.zig");
const Character = @import("character.zig").Character;
const Self = @This();
-const Rhs = []Character;
+pub const Rule = []Character;
name: u8,
-rhs: std.ArrayList(Rhs),
+rule_list: std.ArrayList(Rule),
first: CharSet,
follows: CharSet,
pub fn init(name: u8, allocator: std.mem.Allocator) Self {
return Self {
.name = name,
- .rhs = std.ArrayList(Rhs).init(allocator),
+ .rule_list = std.ArrayList(Rule).init(allocator),
.first = CharSet {},
.follows = CharSet {},
};
}
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
- for (self.rhs.items) |item| {
- allocator.free(item);
+ for (self.rules()) |rule| {
+ allocator.free(rule);
}
- self.rhs.deinit();
+ self.rule_list.deinit();
}
-pub fn add_rule(self: *Self, rhs: Rhs) !void {
- try self.rhs.append(rhs);
+pub inline fn rules(self: *Self) []Rule {
+ return self.rule_list.items;
+}
+
+pub fn add_rule(self: *Self, rule: Rule) !void {
+ try self.rule_list.append(rule);
}
pub fn parse_rule(
buffer: []const u8,
allocator: std.mem.Allocator
-) !struct { u8, Rhs } {
+) !struct { u8, Rule } {
- var rhs = std.ArrayList(Character).init(allocator);
- errdefer rhs.deinit();
+ var rule = std.ArrayList(Character).init(allocator);
+ errdefer rule.deinit();
var tokens = std.mem.tokenizeAny(u8, buffer, &std.ascii.whitespace);
@@ -49,18 +53,18 @@ pub fn parse_rule(
while (tokens.next()) |token| {
if (token.len > 1) return error.InvalidCharacter;
- try rhs.append(Character.from_u8(token[0]));
+ try rule.append(Character.from_u8(token[0]));
}
- if (rhs.items.len == 0) {
+ if (rule.items.len == 0) {
return error.EmptyProduction;
}
- return .{ lhs[0], try rhs.toOwnedSlice() };
+ return .{ lhs[0], try rule.toOwnedSlice() };
}
pub inline fn is_empty(self: *const Self) bool {
- return self.rhs.items.len == 0;
+ return self.rules().len == 0;
}
pub fn format(
@@ -74,12 +78,12 @@ pub fn format(
try writer.print("[{c} -> ", .{ self.name });
- if (self.rhs.items.len > 0) {
- for (self.rhs.items[0]) |c| {
+ if (self.rules().len > 0) {
+ for (self.rules()[0]) |c| {
try writer.print("{}", .{c});
}
- for (self.rhs.items[1..]) |r| {
+ for (self.rules()[1..]) |r| {
try writer.print(" | ", .{});
for (r) |c| {
try writer.print("{}", .{c});