diff options
| -rw-r--r-- | src/grammar.zig | 156 | ||||
| -rw-r--r-- | src/main.zig | 36 | ||||
| -rw-r--r-- | src/non-terminal.zig (renamed from src/rule-list.zig) | 40 |
3 files changed, 135 insertions, 97 deletions
diff --git a/src/grammar.zig b/src/grammar.zig index de29a15..ed7eee6 100644 --- a/src/grammar.zig +++ b/src/grammar.zig @@ -1,28 +1,28 @@ const std = @import("std"); const Character = @import("character.zig").Character; -const RuleList = @import("rule-list.zig"); +const NonTerminal = @import("non-terminal.zig"); const Self = @This(); -rules: [26]RuleList, +non_terminals: [26]NonTerminal, -pub fn init(buffer: []const u8, allocator: std.mem.Allocator) !Self { +pub fn parse(buffer: []const u8, allocator: std.mem.Allocator) !Self { var lines = std.mem.tokenizeScalar(u8, buffer, '\n'); var self: Self = undefined; errdefer self.deinit(allocator); - for (&self.rules, 'A'..) |*rule, name| { - rule.* = RuleList.init( + for (&self.non_terminals, 'A'..) |*non_terminal, name| { + non_terminal.* = NonTerminal.init( @truncate(name), allocator ); } while (lines.next()) |line| { - const name, const rhs = try RuleList.parse_rule(line, allocator); - try self.rule_by_name(name).add_rule(rhs); + const name, const rule = try NonTerminal.parse_rule(line, allocator); + try self.non_terminal_by_name(name).add_rule(rule); } self.generate_first(); @@ -32,19 +32,19 @@ pub fn init(buffer: []const u8, allocator: std.mem.Allocator) !Self { } pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { - for (&self.rules) |*rule| { - rule.deinit(allocator); + for (&self.non_terminals) |*non_terminal| { + non_terminal.deinit(allocator); } self.* = undefined; } -pub inline fn rule_by_name(self: *Self, name: u8) *RuleList { - return &self.rules[name - 'A']; +pub inline fn non_terminal_by_name(self: *Self, name: u8) *NonTerminal { + return &self.non_terminals[name - 'A']; } -pub inline fn entry_point(self: *Self) *RuleList { - return self.rule('S'); +pub inline fn entry_point(self: *Self) *NonTerminal { + return self.non_terminal_by_name('S'); } fn generate_first(self: *Self) void { @@ -53,33 +53,33 @@ fn generate_first(self: *Self) void { while (modified) { modified = false; - for (&self.rules) |*rule| { - for (rule.rhs.items) |rhs| { - switch (rhs[0]) { + for (&self.non_terminals) |*non_terminal| { + for (non_terminal.rules()) |rule| { + switch (rule[0]) { .terminal => |t| { - modified = modified or !rule.first.is_set(t); - rule.first.set(t); + modified = modified or !non_terminal.first.is_set(t); + non_terminal.first.set(t); }, .non_terminal, .epsilon => { - if (rule.first.is_set(Character.EPSILON)) continue; + if (non_terminal.first.is_set(Character.EPSILON)) continue; var insert_epsilon = true; - for (rhs) |*char| { + for (rule) |*char| { switch (char.*) { .terminal => |t| { - modified = modified or !rule.first.is_set(t); - rule.first.set(t); + modified = modified or !non_terminal.first.is_set(t); + non_terminal.first.set(t); insert_epsilon = false; break; }, .non_terminal => |n| { - const first = self.rule_by_name(n).first; + const first = self.non_terminal_by_name(n).first; for (first.charset, 0..) |child_first, index| { if (index == Character.EPSILON) continue; - modified = modified or (!rule.first.is_set(index) and child_first); - rule.first.set_if(index, child_first); + modified = modified or (!non_terminal.first.is_set(index) and child_first); + non_terminal.first.set_if(index, child_first); } if (!first.is_set(Character.EPSILON)) { @@ -93,7 +93,7 @@ fn generate_first(self: *Self) void { if (insert_epsilon) { modified = true; - rule.first.set(Character.EPSILON); + non_terminal.first.set(Character.EPSILON); } }, } @@ -103,37 +103,37 @@ fn generate_first(self: *Self) void { } fn generate_follows(self: *Self) void { - self.rule_by_name('S').follows.set(Character.END); + self.non_terminal_by_name('S').follows.set(Character.END); var modified = true; while (modified) { modified = false; - for (&self.rules) |*parent_rules| { - for (parent_rules.rhs.items) |rule| { + for (&self.non_terminals) |*non_terminal| { + for (non_terminal.rules()) |rule| { for (0..rule.len) |character_index| { switch (rule[character_index]) { .non_terminal => |n| { - const current_rule = self.rule_by_name(n); + const current_non_terminal = self.non_terminal_by_name(n); const ends_with_epsilon = brk: { for (character_index + 1..rule.len) |peek_index| { switch (rule[peek_index]) { .terminal => |t| { - modified = modified or !current_rule.follows.is_set(t); - current_rule.follows.set(t); + modified = modified or !current_non_terminal.follows.is_set(t); + current_non_terminal.follows.set(t); break :brk false; }, .non_terminal => |peek_n| { - const peek_rule = self.rule_by_name(peek_n); - for (peek_rule.first.charset, 0..) |is_set, index| { + const peek_non_terminal = self.non_terminal_by_name(peek_n); + for (peek_non_terminal.first.charset, 0..) |is_set, index| { if (Character.EPSILON == index) continue; - modified = modified or (!current_rule.follows.is_set(index) and is_set); - current_rule.follows.set_if(index, is_set); + modified = modified or (!current_non_terminal.follows.is_set(index) and is_set); + current_non_terminal.follows.set_if(index, is_set); } - if (!peek_rule.first.is_set(Character.EPSILON)) { + if (!peek_non_terminal.first.is_set(Character.EPSILON)) { break :brk false; } }, @@ -144,9 +144,9 @@ fn generate_follows(self: *Self) void { }; if (ends_with_epsilon) { - for (parent_rules.follows.charset, 0..) |is_set, char_index| { - modified = modified or (!current_rule.follows.is_set(char_index) and is_set); - current_rule.follows.set_if(char_index, is_set); + for (non_terminal.follows.charset, 0..) |is_set, char_index| { + modified = modified or (!current_non_terminal.follows.is_set(char_index) and is_set); + current_non_terminal.follows.set_if(char_index, is_set); } } }, @@ -173,20 +173,20 @@ test "expr" { ; const allocator = std.testing.allocator; - var grammar = try Self.init(text, allocator); + var grammar = try Self.parse(text, allocator); defer grammar.deinit(allocator); - try grammar.rule_by_name('S').first.expect(&[_]u8 { '(', 'a', 'b' }); - try grammar.rule_by_name('A').first.expect(&[_]u8 { '+', Character.EPSILON }); - try grammar.rule_by_name('B').first.expect(&[_]u8 { '(', 'a', 'b' }); - try grammar.rule_by_name('C').first.expect(&[_]u8 { '*', Character.EPSILON }); - try grammar.rule_by_name('D').first.expect(&[_]u8 { '(', 'a', 'b' }); + try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { '(', 'a', 'b' }); + try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { '+', Character.EPSILON }); + try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { '(', 'a', 'b' }); + try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { '*', Character.EPSILON }); + try grammar.non_terminal_by_name('D').first.expect(&[_]u8 { '(', 'a', 'b' }); - try grammar.rule_by_name('S').follows.expect(&[_]u8 { ')', Character.END }); - try grammar.rule_by_name('A').follows.expect(&[_]u8 { ')', Character.END }); - try grammar.rule_by_name('B').follows.expect(&[_]u8 { '+', ')', Character.END }); - try grammar.rule_by_name('C').follows.expect(&[_]u8 { '+', ')', Character.END }); - try grammar.rule_by_name('D').follows.expect(&[_]u8 { '*', '+', ')', Character.END }); + try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { ')', Character.END }); + try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { ')', Character.END }); + try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { '+', ')', Character.END }); + try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { '+', ')', Character.END }); + try grammar.non_terminal_by_name('D').follows.expect(&[_]u8 { '*', '+', ')', Character.END }); } test "sample 0" { @@ -203,18 +203,18 @@ test "sample 0" { ; const allocator = std.testing.allocator; - var grammar = try Self.init(text, allocator); + var grammar = try Self.parse(text, allocator); defer grammar.deinit(allocator); - try grammar.rule_by_name('S').first.expect(&[_]u8 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON }); - try grammar.rule_by_name('A').first.expect(&[_]u8 { 'd', 'g', 'h', Character.EPSILON }); - try grammar.rule_by_name('B').first.expect(&[_]u8 { 'g', Character.EPSILON }); - try grammar.rule_by_name('C').first.expect(&[_]u8 { 'h', Character.EPSILON }); + try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON }); + try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'd', 'g', 'h', Character.EPSILON }); + try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'g', Character.EPSILON }); + try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { 'h', Character.EPSILON }); - try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END }); - try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'g', 'h', Character.END }); - try grammar.rule_by_name('B').follows.expect(&[_]u8 { 'a', 'g', 'h', Character.END }); - try grammar.rule_by_name('C').follows.expect(&[_]u8 { 'b', 'g', 'h', Character.END }); + try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END }); + try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'g', 'h', Character.END }); + try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { 'a', 'g', 'h', Character.END }); + try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { 'b', 'g', 'h', Character.END }); } test "sample 1" { @@ -227,16 +227,16 @@ test "sample 1" { ; const allocator = std.testing.allocator; - var grammar = try Self.init(text, allocator); + var grammar = try Self.parse(text, allocator); defer grammar.deinit(allocator); - try grammar.rule_by_name('S').first.expect(&[_]u8 { 'a' }); - try grammar.rule_by_name('A').first.expect(&[_]u8 { 'c', Character.EPSILON }); - try grammar.rule_by_name('B').first.expect(&[_]u8 { 'd', Character.EPSILON }); + try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'a' }); + try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'c', Character.EPSILON }); + try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'd', Character.EPSILON }); - try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END }); - try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'b', 'd' }); - try grammar.rule_by_name('B').follows.expect(&[_]u8 { 'b' }); + try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END }); + try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'b', 'd' }); + try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { 'b' }); } test "sample 2" { @@ -253,18 +253,18 @@ test "sample 2" { ; const allocator = std.testing.allocator; - var grammar = try Self.init(text, allocator); + var grammar = try Self.parse(text, allocator); defer grammar.deinit(allocator); - try grammar.rule_by_name('S').first.expect(&[_]u8 { 'a', 'c', 'e' }); - try grammar.rule_by_name('A').first.expect(&[_]u8 { 'a', 'c' }); - try grammar.rule_by_name('B').first.expect(&[_]u8 { 'd' }); - try grammar.rule_by_name('C').first.expect(&[_]u8 { 'e', Character.EPSILON }); - try grammar.rule_by_name('D').first.expect(&[_]u8 { 'f', Character.EPSILON }); + try grammar.non_terminal_by_name('S').first.expect(&[_]u8 { 'a', 'c', 'e' }); + try grammar.non_terminal_by_name('A').first.expect(&[_]u8 { 'a', 'c' }); + try grammar.non_terminal_by_name('B').first.expect(&[_]u8 { 'd' }); + try grammar.non_terminal_by_name('C').first.expect(&[_]u8 { 'e', Character.EPSILON }); + try grammar.non_terminal_by_name('D').first.expect(&[_]u8 { 'f', Character.EPSILON }); - try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END }); - try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'd' }); - try grammar.rule_by_name('B').follows.expect(&[_]u8 { Character.END }); - try grammar.rule_by_name('C').follows.expect(&[_]u8 { Character.END }); - try grammar.rule_by_name('D').follows.expect(&[_]u8 { 'a' }); + try grammar.non_terminal_by_name('S').follows.expect(&[_]u8 { Character.END }); + try grammar.non_terminal_by_name('A').follows.expect(&[_]u8 { 'd' }); + try grammar.non_terminal_by_name('B').follows.expect(&[_]u8 { Character.END }); + try grammar.non_terminal_by_name('C').follows.expect(&[_]u8 { Character.END }); + try grammar.non_terminal_by_name('D').follows.expect(&[_]u8 { 'a' }); } diff --git a/src/main.zig b/src/main.zig index 62877cb..7a0513e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,8 +1,42 @@ const std = @import("std"); +pub const Grammar = @import("grammar.zig"); +pub const recognizer = @import("recognizer.zig"); -pub const grammar = @import("grammar.zig"); +fn help() !noreturn { + const stderr = std.io.getStdErr().writer(); + try stderr.print("gll [grammar] [input]\n", .{}); + std.process.exit(1); +} + +fn read_whole_file(path: []const u8, allocator: std.mem.Allocator) ![]const u8 { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + const stat = try file.stat(); + return file.readToEndAlloc(allocator, stat.size); +} pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + defer { + if (gpa.deinit() == .leak) { + @panic("memory leak detected"); + } + } + + var args = std.process.args(); + _ = args.next(); + + const grammar_path = args.next() orelse try help(); + const input_path = args.next() orelse try help(); + + const grammar_text = try read_whole_file(grammar_path, allocator); + defer allocator.free(grammar_text); + const input = try read_whole_file(input_path, allocator); + defer allocator.free(input); + + var grammar = try Grammar.parse(grammar_text, allocator); + defer grammar.deinit(allocator); } test { diff --git a/src/rule-list.zig b/src/non-terminal.zig index a438750..097c603 100644 --- a/src/rule-list.zig +++ b/src/non-terminal.zig @@ -3,41 +3,45 @@ const CharSet = @import("char-set.zig"); const Character = @import("character.zig").Character; const Self = @This(); -const Rhs = []Character; +pub const Rule = []Character; name: u8, -rhs: std.ArrayList(Rhs), +rule_list: std.ArrayList(Rule), first: CharSet, follows: CharSet, pub fn init(name: u8, allocator: std.mem.Allocator) Self { return Self { .name = name, - .rhs = std.ArrayList(Rhs).init(allocator), + .rule_list = std.ArrayList(Rule).init(allocator), .first = CharSet {}, .follows = CharSet {}, }; } pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { - for (self.rhs.items) |item| { - allocator.free(item); + for (self.rules()) |rule| { + allocator.free(rule); } - self.rhs.deinit(); + self.rule_list.deinit(); } -pub fn add_rule(self: *Self, rhs: Rhs) !void { - try self.rhs.append(rhs); +pub inline fn rules(self: *Self) []Rule { + return self.rule_list.items; +} + +pub fn add_rule(self: *Self, rule: Rule) !void { + try self.rule_list.append(rule); } pub fn parse_rule( buffer: []const u8, allocator: std.mem.Allocator -) !struct { u8, Rhs } { +) !struct { u8, Rule } { - var rhs = std.ArrayList(Character).init(allocator); - errdefer rhs.deinit(); + var rule = std.ArrayList(Character).init(allocator); + errdefer rule.deinit(); var tokens = std.mem.tokenizeAny(u8, buffer, &std.ascii.whitespace); @@ -49,18 +53,18 @@ pub fn parse_rule( while (tokens.next()) |token| { if (token.len > 1) return error.InvalidCharacter; - try rhs.append(Character.from_u8(token[0])); + try rule.append(Character.from_u8(token[0])); } - if (rhs.items.len == 0) { + if (rule.items.len == 0) { return error.EmptyProduction; } - return .{ lhs[0], try rhs.toOwnedSlice() }; + return .{ lhs[0], try rule.toOwnedSlice() }; } pub inline fn is_empty(self: *const Self) bool { - return self.rhs.items.len == 0; + return self.rules().len == 0; } pub fn format( @@ -74,12 +78,12 @@ pub fn format( try writer.print("[{c} -> ", .{ self.name }); - if (self.rhs.items.len > 0) { - for (self.rhs.items[0]) |c| { + if (self.rules().len > 0) { + for (self.rules()[0]) |c| { try writer.print("{}", .{c}); } - for (self.rhs.items[1..]) |r| { + for (self.rules()[1..]) |r| { try writer.print(" | ", .{}); for (r) |c| { try writer.print("{}", .{c}); |