From 839271627d0cfd2240ec30a603ff60a0165fe6a2 Mon Sep 17 00:00:00 2001 From: Nathan Reiner Date: Thu, 24 Apr 2025 16:24:35 +0200 Subject: change Grammar to in-file struct --- src/grammar.zig | 252 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 125 insertions(+), 127 deletions(-) diff --git a/src/grammar.zig b/src/grammar.zig index 0f25095..de29a15 100644 --- a/src/grammar.zig +++ b/src/grammar.zig @@ -3,163 +3,161 @@ const std = @import("std"); const Character = @import("character.zig").Character; const RuleList = @import("rule-list.zig"); -const Grammar = struct { - const Self = @This(); +const Self = @This(); - rules: [26]RuleList, +rules: [26]RuleList, - pub fn init(buffer: []const u8, allocator: std.mem.Allocator) !Self { - var lines = std.mem.tokenizeScalar(u8, buffer, '\n'); +pub fn init(buffer: []const u8, allocator: std.mem.Allocator) !Self { + var lines = std.mem.tokenizeScalar(u8, buffer, '\n'); - var self: Self = undefined; - errdefer self.deinit(allocator); + var self: Self = undefined; + errdefer self.deinit(allocator); - for (&self.rules, 'A'..) |*rule, name| { - rule.* = RuleList.init( - @truncate(name), - allocator - ); - } - - while (lines.next()) |line| { - const name, const rhs = try RuleList.parse_rule(line, allocator); - try self.rule_by_name(name).add_rule(rhs); - } - - self.generate_first(); - self.generate_follows(); + for (&self.rules, 'A'..) |*rule, name| { + rule.* = RuleList.init( + @truncate(name), + allocator + ); + } - return self; + while (lines.next()) |line| { + const name, const rhs = try RuleList.parse_rule(line, allocator); + try self.rule_by_name(name).add_rule(rhs); } - pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { - for (&self.rules) |*rule| { - rule.deinit(allocator); - } + self.generate_first(); + self.generate_follows(); - self.* = undefined; - } + return self; +} - pub inline fn rule_by_name(self: *Self, name: u8) *RuleList { - return &self.rules[name - 'A']; +pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { + for (&self.rules) |*rule| { + rule.deinit(allocator); } - pub inline fn entry_point(self: *Self) *RuleList { - return self.rule('S'); - } + self.* = undefined; +} + +pub inline fn rule_by_name(self: *Self, name: u8) *RuleList { + return &self.rules[name - 'A']; +} - fn generate_first(self: *Self) void { - var modified = true; +pub inline fn entry_point(self: *Self) *RuleList { + return self.rule('S'); +} - while (modified) { - modified = false; +fn generate_first(self: *Self) void { + var modified = true; - for (&self.rules) |*rule| { - for (rule.rhs.items) |rhs| { - switch (rhs[0]) { - .terminal => |t| { - modified = modified or !rule.first.is_set(t); - rule.first.set(t); - }, + while (modified) { + modified = false; - .non_terminal, .epsilon => { - if (rule.first.is_set(Character.EPSILON)) continue; - var insert_epsilon = true; + for (&self.rules) |*rule| { + for (rule.rhs.items) |rhs| { + switch (rhs[0]) { + .terminal => |t| { + modified = modified or !rule.first.is_set(t); + rule.first.set(t); + }, + + .non_terminal, .epsilon => { + if (rule.first.is_set(Character.EPSILON)) continue; + var insert_epsilon = true; + + for (rhs) |*char| { + switch (char.*) { + .terminal => |t| { + modified = modified or !rule.first.is_set(t); + rule.first.set(t); + insert_epsilon = false; + break; + }, + .non_terminal => |n| { + const first = self.rule_by_name(n).first; + for (first.charset, 0..) |child_first, index| { + if (index == Character.EPSILON) continue; + + modified = modified or (!rule.first.is_set(index) and child_first); + rule.first.set_if(index, child_first); + } - for (rhs) |*char| { - switch (char.*) { - .terminal => |t| { - modified = modified or !rule.first.is_set(t); - rule.first.set(t); + if (!first.is_set(Character.EPSILON)) { insert_epsilon = false; break; - }, - .non_terminal => |n| { - const first = self.rule_by_name(n).first; - for (first.charset, 0..) |child_first, index| { - if (index == Character.EPSILON) continue; - - modified = modified or (!rule.first.is_set(index) and child_first); - rule.first.set_if(index, child_first); - } - - if (!first.is_set(Character.EPSILON)) { - insert_epsilon = false; - break; - } - }, - .epsilon => {}, - } + } + }, + .epsilon => {}, } + } - if (insert_epsilon) { - modified = true; - rule.first.set(Character.EPSILON); - } - }, - } + if (insert_epsilon) { + modified = true; + rule.first.set(Character.EPSILON); + } + }, } } } } +} - fn generate_follows(self: *Self) void { - self.rule_by_name('S').follows.set(Character.END); - var modified = true; - - while (modified) { - modified = false; - - for (&self.rules) |*parent_rules| { - for (parent_rules.rhs.items) |rule| { - for (0..rule.len) |character_index| { - - switch (rule[character_index]) { - .non_terminal => |n| { - const current_rule = self.rule_by_name(n); - const ends_with_epsilon = brk: { - for (character_index + 1..rule.len) |peek_index| { - switch (rule[peek_index]) { - .terminal => |t| { - modified = modified or !current_rule.follows.is_set(t); - current_rule.follows.set(t); +fn generate_follows(self: *Self) void { + self.rule_by_name('S').follows.set(Character.END); + var modified = true; + + while (modified) { + modified = false; + + for (&self.rules) |*parent_rules| { + for (parent_rules.rhs.items) |rule| { + for (0..rule.len) |character_index| { + + switch (rule[character_index]) { + .non_terminal => |n| { + const current_rule = self.rule_by_name(n); + const ends_with_epsilon = brk: { + for (character_index + 1..rule.len) |peek_index| { + switch (rule[peek_index]) { + .terminal => |t| { + modified = modified or !current_rule.follows.is_set(t); + current_rule.follows.set(t); + break :brk false; + }, + .non_terminal => |peek_n| { + const peek_rule = self.rule_by_name(peek_n); + for (peek_rule.first.charset, 0..) |is_set, index| { + if (Character.EPSILON == index) continue; + + modified = modified or (!current_rule.follows.is_set(index) and is_set); + current_rule.follows.set_if(index, is_set); + } + + if (!peek_rule.first.is_set(Character.EPSILON)) { break :brk false; - }, - .non_terminal => |peek_n| { - const peek_rule = self.rule_by_name(peek_n); - for (peek_rule.first.charset, 0..) |is_set, index| { - if (Character.EPSILON == index) continue; - - modified = modified or (!current_rule.follows.is_set(index) and is_set); - current_rule.follows.set_if(index, is_set); - } - - if (!peek_rule.first.is_set(Character.EPSILON)) { - break :brk false; - } - }, - .epsilon => {}, - } - } - break :brk true; - }; - - if (ends_with_epsilon) { - for (parent_rules.follows.charset, 0..) |is_set, char_index| { - modified = modified or (!current_rule.follows.is_set(char_index) and is_set); - current_rule.follows.set_if(char_index, is_set); + } + }, + .epsilon => {}, } } - }, - else => {} - } + break :brk true; + }; + if (ends_with_epsilon) { + for (parent_rules.follows.charset, 0..) |is_set, char_index| { + modified = modified or (!current_rule.follows.is_set(char_index) and is_set); + current_rule.follows.set_if(char_index, is_set); + } + } + }, + else => {} } + } } } } -}; +} test "expr" { const text = @@ -175,7 +173,7 @@ test "expr" { ; const allocator = std.testing.allocator; - var grammar = try Grammar.init(text, allocator); + var grammar = try Self.init(text, allocator); defer grammar.deinit(allocator); try grammar.rule_by_name('S').first.expect(&[_]u8 { '(', 'a', 'b' }); @@ -205,7 +203,7 @@ test "sample 0" { ; const allocator = std.testing.allocator; - var grammar = try Grammar.init(text, allocator); + var grammar = try Self.init(text, allocator); defer grammar.deinit(allocator); try grammar.rule_by_name('S').first.expect(&[_]u8 { 'd', 'g', 'h', 'b', 'a', Character.EPSILON }); @@ -229,7 +227,7 @@ test "sample 1" { ; const allocator = std.testing.allocator; - var grammar = try Grammar.init(text, allocator); + var grammar = try Self.init(text, allocator); defer grammar.deinit(allocator); try grammar.rule_by_name('S').first.expect(&[_]u8 { 'a' }); @@ -255,7 +253,7 @@ test "sample 2" { ; const allocator = std.testing.allocator; - var grammar = try Grammar.init(text, allocator); + var grammar = try Self.init(text, allocator); defer grammar.deinit(allocator); try grammar.rule_by_name('S').first.expect(&[_]u8 { 'a', 'c', 'e' }); -- cgit v1.2.3-70-g09d2