const std = @import("std"); const Character = @import("character.zig").Character; const NonTerminal = @import("non-terminal.zig"); const Self = @This(); non_terminals: []NonTerminal, entry_id: usize, allocator: std.mem.Allocator, pub fn parse(entry: []const u8, buffer: []const u8, allocator: std.mem.Allocator) !Self { var lines = std.mem.tokenizeScalar(u8, buffer, '\n'); var names = std.StringHashMap(usize).init(allocator); defer names.deinit(); while (lines.next()) |line| { const name = try NonTerminal.parse_name(line); if (!names.contains(name)) { try names.put(name, names.count()); } } lines = std.mem.tokenizeScalar(u8, buffer, '\n'); var self = Self { .non_terminals = try allocator.alloc(NonTerminal, names.count()), .entry_id = names.get(entry) orelse return error.MissingRule, .allocator = allocator, }; errdefer self.deinit(); { var iterator = names.iterator(); while (iterator.next()) |name_entry| { self.non_terminals[name_entry.value_ptr.*] = try NonTerminal.init( name_entry.key_ptr.*, name_entry.value_ptr.*, allocator ); } } while (lines.next()) |line| { const name = try NonTerminal.parse_name(line); try self.non_terminal_by_id( names.get(name) orelse return error.MissingRule ).parse_and_add_variants(line, &names); } self.generate_first(); self.generate_follows(); std.debug.print("{}\n", .{self}); return self; } pub fn deinit(self: *Self) void { for (self.non_terminals) |*non_terminal| { non_terminal.deinit(); } self.allocator.free(self.non_terminals); self.* = undefined; } pub inline fn non_terminal_by_id(self: *Self, id: usize) *NonTerminal { return &self.non_terminals[id]; } pub inline fn entry_point(self: *Self) *NonTerminal { return self.non_terminal_by_id(self.entry_id); } fn generate_first(self: *Self) void { var modified = true; while (modified) { modified = false; for (self.non_terminals) |*non_terminal| { for (non_terminal.rules()) |*rule| { switch (rule.items[0]) { .terminal => |t| { modified = modified or !non_terminal.first.is_set(t); non_terminal.first.set(t); rule.first.set(t); }, .non_terminal, .epsilon => { var insert_epsilon = true; for (rule.items) |*char| { switch (char.*) { .terminal => |t| { modified = modified or !non_terminal.first.is_set(t); non_terminal.first.set(t); rule.first.set(t); insert_epsilon = false; break; }, .non_terminal => |n| { const first = self.non_terminal_by_id(n).first; for (first.charset, 0..) |child_first, index| { if (index == Character.EPSILON) continue; modified = modified or (!non_terminal.first.is_set(index) and child_first); non_terminal.first.set_if(index, child_first); rule.first.set_if(index, child_first); } if (!first.is_set(Character.EPSILON)) { insert_epsilon = false; break; } }, .epsilon => {}, } } if (insert_epsilon and !non_terminal.first.is_set(Character.EPSILON)) { modified = true; non_terminal.first.set(Character.EPSILON); rule.first.set(Character.EPSILON); } }, } } } } } fn generate_follows(self: *Self) void { self.entry_point().follows.set(Character.END); var modified = true; while (modified) { modified = false; for (self.non_terminals) |*non_terminal| { for (non_terminal.rules()) |rule| { for (0..rule.items.len) |character_index| { switch (rule.items[character_index]) { .non_terminal => |n| { const current_non_terminal = self.non_terminal_by_id(n); const ends_with_epsilon = brk: { for (character_index + 1..rule.items.len) |peek_index| { switch (rule.items[peek_index]) { .terminal => |t| { modified = modified or !current_non_terminal.follows.is_set(t); current_non_terminal.follows.set(t); break :brk false; }, .non_terminal => |peek_n| { const peek_non_terminal = self.non_terminal_by_id(peek_n); for (peek_non_terminal.first.charset, 0..) |is_set, index| { if (Character.EPSILON == index) continue; modified = modified or (!current_non_terminal.follows.is_set(index) and is_set); current_non_terminal.follows.set_if(index, is_set); } if (!peek_non_terminal.first.is_set(Character.EPSILON)) { break :brk false; } }, .epsilon => {}, } } break :brk true; }; if (ends_with_epsilon) { for (non_terminal.follows.charset, 0..) |is_set, char_index| { modified = modified or (!current_non_terminal.follows.is_set(char_index) and is_set); current_non_terminal.follows.set_if(char_index, is_set); } } }, else => {} } } } } } } pub fn is_next_char(self: *const Self, rule_slice: []Character, current: usize, char: u8) bool { var is_char_in_first = false; var is_epsilon_in_first = true; for (rule_slice) |character| { switch(character) { .terminal => |c| { is_char_in_first = c == char; break; }, .non_terminal => |n| { is_char_in_first = self.non_terminals[n].first.is_set(char); if (is_char_in_first or !self.non_terminals[n].first.is_set(Character.EPSILON)) { break; } }, else => {}, } } for (rule_slice) |character| { switch(character) { .terminal => is_epsilon_in_first = false, .non_terminal => |n| { if (!self.non_terminals[n].first.is_set(Character.EPSILON)) { is_epsilon_in_first = false; break; } }, else => {}, } } const result = is_char_in_first or (is_epsilon_in_first and self.non_terminals[current].follows.is_set(char)); return result; } pub fn format( self: *const Self, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype, ) !void { _ = fmt; _ = options; for (self.non_terminals) |non_terminal| { try writer.print("[{}] ", .{non_terminal}); } } test "expr" { const text = \\S -> B A \\A -> '+' B A \\A -> '' \\B -> D C \\C -> '*' D C \\C -> '' \\D -> '(' S ')' \\D -> 'a' | 'b' ; const allocator = std.testing.allocator; var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { '(', 'a', 'b' }); try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { '+', Character.EPSILON }); try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { '(', 'a', 'b' }); try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { '*', Character.EPSILON }); try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { '(', 'a', 'b' }); try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { ')', Character.END }); try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { ')', Character.END }); try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { '+', ')', Character.END }); try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { '+', ')', Character.END }); try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { '*', '+', ')', Character.END }); } test "sample 0" { const text = \\S -> A C B | C 'bb' | B 'a' \\A -> 'da' | B C \\B -> 'g' | '' \\C -> 'h' | '' ; const allocator = std.testing.allocator; var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON }); try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'd', 'g', 'h', Character.EPSILON }); try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'g', Character.EPSILON }); try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'h', Character.EPSILON }); try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END }); try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'g', 'h', Character.END }); try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'g', 'h', Character.END }); try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { 'b', 'g', 'h', Character.END }); } test "sample 1" { const text = \\S -> 'a' A B 'b' \\A -> 'c' \\A -> '' \\B -> 'd' \\B -> '' ; const allocator = std.testing.allocator; var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a' }); try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'c', Character.EPSILON }); try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd', Character.EPSILON }); try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END }); try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'b', 'd' }); try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'b' }); } test "sample 2" { const text = \\S -> A B \\S -> 'e' D 'a' \\A -> 'ab' \\A -> 'c' \\B -> 'd' C \\C -> 'e' C \\C -> '' \\D -> 'f' D \\D -> '' ; const allocator = std.testing.allocator; var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a', 'c', 'e' }); try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c' }); try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'd' }); try grammar.non_terminal_by_id(3).first.expect(&[_]u9 { 'e', Character.EPSILON }); try grammar.non_terminal_by_id(4).first.expect(&[_]u9 { 'f', Character.EPSILON }); try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { Character.END }); try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'd' }); try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { Character.END }); try grammar.non_terminal_by_id(3).follows.expect(&[_]u9 { Character.END }); try grammar.non_terminal_by_id(4).follows.expect(&[_]u9 { 'a' }); } test "sample 3" { const text = \\S -> A S 'd' \\S -> B S \\S -> '' \\A -> 'a' \\A -> 'c' \\B -> 'a' \\B -> 'b' ; const allocator = std.testing.allocator; var grammar = try Self.parse("S", text, allocator); defer grammar.deinit(); try grammar.non_terminal_by_id(0).first.expect(&[_]u9 { 'a', 'b', 'c', Character.EPSILON }); try grammar.non_terminal_by_id(1).first.expect(&[_]u9 { 'a', 'c' }); try grammar.non_terminal_by_id(2).first.expect(&[_]u9 { 'a', 'b' }); try grammar.non_terminal_by_id(0).follows.expect(&[_]u9 { 'd', Character.END }); try grammar.non_terminal_by_id(1).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd' }); try grammar.non_terminal_by_id(2).follows.expect(&[_]u9 { 'a', 'b', 'c', 'd', Character.END }); }