const std = @import("std"); const Character = @import("character.zig").Character; const RuleList = @import("rule-list.zig"); const Grammar = struct { const Self = @This(); rules: [26]RuleList, pub fn init(buffer: []const u8, allocator: std.mem.Allocator) !Self { var lines = std.mem.tokenizeScalar(u8, buffer, '\n'); var self: Self = undefined; errdefer self.deinit(allocator); for (&self.rules, 'A'..) |*rule, name| { rule.* = RuleList.init( @truncate(name), allocator ); } while (lines.next()) |line| { const name, const rhs = try RuleList.parse_rule(line, allocator); try self.rule_by_name(name).add_rule(rhs); } self.generate_first(); self.generate_follows(); return self; } pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { for (&self.rules) |*rule| { rule.deinit(allocator); } self.* = undefined; } pub inline fn rule_by_name(self: *Self, name: u8) *RuleList { return &self.rules[name - 'A']; } pub inline fn entry_point(self: *Self) *RuleList { return self.rule('S'); } fn generate_first(self: *Self) void { var modified = true; while (modified) { modified = false; for (&self.rules) |*rule| { for (rule.rhs.items) |rhs| { switch (rhs[0]) { .terminal => |t| { modified = modified or !rule.first.is_set(t); rule.first.set(t); }, .non_terminal, .epsilon => { if (rule.first.is_set(Character.EPSILON)) continue; var insert_epsilon = true; for (rhs) |*char| { switch (char.*) { .terminal => |t| { modified = modified or !rule.first.is_set(t); rule.first.set(t); insert_epsilon = false; break; }, .non_terminal => |n| { const first = self.rule_by_name(n).first; for (first.charset, 0..) |child_first, index| { if (index == Character.EPSILON) continue; modified = modified or (!rule.first.is_set(index) and child_first); rule.first.set_if(index, child_first); } if (!first.is_set(Character.EPSILON)) { insert_epsilon = false; break; } }, .epsilon => {}, } } if (insert_epsilon) { modified = true; rule.first.set(Character.EPSILON); } }, } } } } } fn generate_follows(self: *Self) void { self.rule_by_name('S').follows.set(Character.END); var modified = true; while (modified) { modified = false; for (&self.rules) |*parent_rules| { for (parent_rules.rhs.items) |rule| { for (0..rule.len) |character_index| { switch (rule[character_index]) { .non_terminal => |n| { const current_rule = self.rule_by_name(n); const ends_with_epsilon = brk: { for (character_index + 1..rule.len) |peek_index| { switch (rule[peek_index]) { .terminal => |t| { modified = modified or !current_rule.follows.is_set(t); current_rule.follows.set(t); break :brk false; }, .non_terminal => |peek_n| { const peek_rule = self.rule_by_name(peek_n); for (peek_rule.first.charset, 0..) |is_set, index| { if (Character.EPSILON == index) continue; modified = modified or (!current_rule.follows.is_set(index) and is_set); current_rule.follows.set_if(index, is_set); } if (!peek_rule.first.is_set(Character.EPSILON)) { break :brk false; } }, .epsilon => {}, } } break :brk true; }; if (ends_with_epsilon) { for (parent_rules.follows.charset, 0..) |is_set, char_index| { modified = modified or (!current_rule.follows.is_set(char_index) and is_set); current_rule.follows.set_if(char_index, is_set); } } }, else => {} } } } } } } }; test "expr" { const text = \\S -> B A \\A -> + B A \\A -> _ \\B -> D C \\C -> * D C \\C -> _ \\D -> ( S ) \\D -> a \\D -> b ; const allocator = std.testing.allocator; var grammar = try Grammar.init(text, allocator); defer grammar.deinit(allocator); try grammar.rule_by_name('S').first.expect(&[_]u8 { '(', 'a', 'b' }); try grammar.rule_by_name('A').first.expect(&[_]u8 { '+', Character.EPSILON }); try grammar.rule_by_name('B').first.expect(&[_]u8 { '(', 'a', 'b' }); try grammar.rule_by_name('C').first.expect(&[_]u8 { '*', Character.EPSILON }); try grammar.rule_by_name('D').first.expect(&[_]u8 { '(', 'a', 'b' }); try grammar.rule_by_name('S').follows.expect(&[_]u8 { ')', Character.END }); try grammar.rule_by_name('A').follows.expect(&[_]u8 { ')', Character.END }); try grammar.rule_by_name('B').follows.expect(&[_]u8 { '+', ')', Character.END }); try grammar.rule_by_name('C').follows.expect(&[_]u8 { '+', ')', Character.END }); try grammar.rule_by_name('D').follows.expect(&[_]u8 { '*', '+', ')', Character.END }); } test "sample 0" { const text = \\S -> A C B \\S -> C b b \\S -> B a \\A -> d a \\A -> B C \\B -> g \\B -> _ \\C -> h \\C -> _ ; const allocator = std.testing.allocator; var grammar = try Grammar.init(text, allocator); defer grammar.deinit(allocator); try grammar.rule_by_name('S').first.expect(&[_]u8 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON }); try grammar.rule_by_name('A').first.expect(&[_]u8 { 'd', 'g', 'h', Character.EPSILON }); try grammar.rule_by_name('B').first.expect(&[_]u8 { 'g', Character.EPSILON }); try grammar.rule_by_name('C').first.expect(&[_]u8 { 'h', Character.EPSILON }); try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END }); try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'g', 'h', Character.END }); try grammar.rule_by_name('B').follows.expect(&[_]u8 { 'a', 'g', 'h', Character.END }); try grammar.rule_by_name('C').follows.expect(&[_]u8 { 'b', 'g', 'h', Character.END }); } test "sample 1" { const text = \\S -> a A B b \\A -> c \\A -> _ \\B -> d \\B -> _ ; const allocator = std.testing.allocator; var grammar = try Grammar.init(text, allocator); defer grammar.deinit(allocator); try grammar.rule_by_name('S').first.expect(&[_]u8 { 'a' }); try grammar.rule_by_name('A').first.expect(&[_]u8 { 'c', Character.EPSILON }); try grammar.rule_by_name('B').first.expect(&[_]u8 { 'd', Character.EPSILON }); try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END }); try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'b', 'd' }); try grammar.rule_by_name('B').follows.expect(&[_]u8 { 'b' }); } test "sample 2" { const text = \\S -> A B \\S -> e D a \\A -> a b \\A -> c \\B -> d C \\C -> e C \\C -> _ \\D -> f D \\D -> _ ; const allocator = std.testing.allocator; var grammar = try Grammar.init(text, allocator); defer grammar.deinit(allocator); try grammar.rule_by_name('S').first.expect(&[_]u8 { 'a', 'c', 'e' }); try grammar.rule_by_name('A').first.expect(&[_]u8 { 'a', 'c' }); try grammar.rule_by_name('B').first.expect(&[_]u8 { 'd' }); try grammar.rule_by_name('C').first.expect(&[_]u8 { 'e', Character.EPSILON }); try grammar.rule_by_name('D').first.expect(&[_]u8 { 'f', Character.EPSILON }); try grammar.rule_by_name('S').follows.expect(&[_]u8 { Character.END }); try grammar.rule_by_name('A').follows.expect(&[_]u8 { 'd' }); try grammar.rule_by_name('B').follows.expect(&[_]u8 { Character.END }); try grammar.rule_by_name('C').follows.expect(&[_]u8 { Character.END }); try grammar.rule_by_name('D').follows.expect(&[_]u8 { 'a' }); }