From ae10b7d764d9587ab92a682781f8479107e8dff0 Mon Sep 17 00:00:00 2001 From: Nathan Reiner Date: Sat, 19 Jul 2025 20:18:15 +0200 Subject: add pex --- src/pex/block.zig | 90 +++++++++++++++++++++++++ src/pex/instruction.zig | 9 +++ src/pex/node.zig | 36 ++++++++++ src/pex/optimizer/root.zig | 38 +++++++++++ src/pex/root.zig | 163 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 336 insertions(+) create mode 100644 src/pex/block.zig create mode 100644 src/pex/instruction.zig create mode 100644 src/pex/node.zig create mode 100644 src/pex/optimizer/root.zig create mode 100644 src/pex/root.zig (limited to 'src/pex') diff --git a/src/pex/block.zig b/src/pex/block.zig new file mode 100644 index 0000000..3ae690e --- /dev/null +++ b/src/pex/block.zig @@ -0,0 +1,90 @@ +const std = @import("std"); +const NonTerminal = @import("../non-terminal.zig"); +const Rule = @import("../rule.zig"); +const Node = @import("node.zig"); + +const Self = @This(); + +id: usize, +heads: std.ArrayListUnmanaged(*Node) = .empty, + +pub fn compile( + non_terminal: *NonTerminal, + node_pool: *std.heap.MemoryPool(Node), + allocator: std.mem.Allocator +) !Self { + var self: Self = .{ .id = non_terminal.id }; + + for (non_terminal.rules()) |*rule| { + try self.heads.append(allocator, try compile_variant(rule, node_pool)); + } + + return self; +} + +pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { + for (self.heads.items) |head| { + head.deinit(); + } + self.heads.deinit(allocator); + self.* = undefined; +} + +pub fn clone( + self: *const Self, + allocator: std.mem.Allocator, + node_pool: *std.heap.MemoryPool(Node), +) !Self { + var self_clone = Self{ + .id = self.id, + .heads = try .initCapacity(allocator, self.heads.items.len), + }; + errdefer self_clone.deinit(allocator); + + for (self.heads.items) |head| { + try self_clone.heads.append(allocator, try head.clone(node_pool)); + } + + return self_clone; +} + +fn compile_variant( + rule: *Rule, + node_pool: *std.heap.MemoryPool(Node), +) !*Node { + const first = blk: { + for (rule.items, 0..) |item, index| { + switch (item) { + .terminal, .non_terminal => break :blk index, + .epsilon => {}, + } + } + + break :blk rule.items.len; + }; + + if (first == rule.items.len) { + return try Node.create(.{ .@"return" = void{} }, node_pool); + } + + const head = switch (rule.items[0]) { + .terminal => |c| try Node.create(.{ .@"check" = c }, node_pool), + .non_terminal => |id| try Node.create(.{ .@"call" = id }, node_pool), + .epsilon => unreachable, + }; + + var current = head; + for (rule.items[1..]) |item| { + const node = switch (item) { + .terminal => |c| try Node.create(.{ .@"check" = c }, node_pool), + .non_terminal => |id| try Node.create(.{ .@"call" = id }, node_pool), + .epsilon => continue, + }; + current.next = node; + current = node; + } + + current.next = try Node.create(.{ .@"return" = void{} }, node_pool); + + return head; +} diff --git a/src/pex/instruction.zig b/src/pex/instruction.zig new file mode 100644 index 0000000..5edffef --- /dev/null +++ b/src/pex/instruction.zig @@ -0,0 +1,9 @@ +const std = @import("std"); +const Node = @import("node.zig"); + +pub const Instruction = union(enum) { + check: u8, + call: usize, + jump: *std.ArrayListUnmanaged(*Node), + @"return": void, +}; diff --git a/src/pex/node.zig b/src/pex/node.zig new file mode 100644 index 0000000..7abb274 --- /dev/null +++ b/src/pex/node.zig @@ -0,0 +1,36 @@ +const std = @import("std"); +const Instruction = @import("instruction.zig").Instruction; + +const Self = @This(); + +instruction: Instruction, +next: ?*Self = null, + +pub fn create(instruction: Instruction, node_pool: *std.heap.MemoryPool(Self)) !*Self { + const node = try node_pool.create(); + node.* = .{ .instruction = instruction }; + return node; +} + +pub fn deinit(self: *Self) void { + if (self.next) |next| { + next.deinit(); + } + self.* = undefined; +} + +pub fn clone( + self: *Self, + node_pool: *std.heap.MemoryPool(Self), +) !*Self { + const self_clone = try node_pool.create(); + self_clone.instruction = self.instruction; + + if (self.next) |next| { + self_clone.next = try next.clone(node_pool); + } else { + self_clone.next = null; + } + + return self_clone; +} diff --git a/src/pex/optimizer/root.zig b/src/pex/optimizer/root.zig new file mode 100644 index 0000000..68ce968 --- /dev/null +++ b/src/pex/optimizer/root.zig @@ -0,0 +1,38 @@ +const std = @import("std"); +const Block = @import("../block.zig"); +const Node = @import("../node.zig"); +const Rule = @import("../../rule.zig"); + +pub fn optimize_block( + blocks: []Block, + raw: *Block, + target: *Block, + node_pool: *std.heap.MemoryPool(Node), + allocator: std.mem.Allocator, +) !void { + target.* = try raw.clone(allocator, node_pool); + _ = blocks; + + for (target.heads.items) |head| { + try hard_link_right_recursion(target, head); + } +} + +pub fn hard_link_right_recursion( + block: *Block, + current: *Node, +) !void { + switch (current.instruction) { + .call => |id| if (id == block.id) { + if (current.next.?.instruction == .@"return") { + current.instruction = .{ .jump = &block.heads }; + return; + } + }, + else => {}, + } + + if (current.next) |next| { + try hard_link_right_recursion(block, next); + } +} diff --git a/src/pex/root.zig b/src/pex/root.zig new file mode 100644 index 0000000..c653ca5 --- /dev/null +++ b/src/pex/root.zig @@ -0,0 +1,163 @@ +const std = @import("std"); +pub const Block = @import("block.zig"); +pub const Instruction = @import("instruction.zig").Instruction; +pub const Node = @import("node.zig"); +pub const Grammar = @import("../grammar.zig"); +const optimizer = @import("optimizer/root.zig"); + +const Self = @This(); + +grammar: *Grammar, +blocks: []Block, +node_pool: std.heap.MemoryPool(Node), + +pub fn compile(grammar: *Grammar, allocator: std.mem.Allocator) !Self { + const raw_blocks = try allocator.alloc(Block, grammar.non_terminals.len); + defer { + for (raw_blocks) |*block| { + block.deinit(allocator); + } + allocator.free(raw_blocks); + } + + var raw_node_pool: std.heap.MemoryPool(Node) = .init(allocator); + defer raw_node_pool.deinit(); + + for (raw_blocks, grammar.non_terminals) |*block, *non_terminal| { + block.* = try .compile(non_terminal, &raw_node_pool, allocator); + } + + var self: Self = .{ + .grammar = grammar, + .blocks = try allocator.alloc(Block, grammar.non_terminals.len), + .node_pool = .init(allocator), + }; + + for (self.blocks, raw_blocks) |*optimized, *raw| { + try optimizer.optimize_block(raw_blocks, raw, optimized, &self.node_pool, allocator); + } + + return self; +} + +pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { + for (self.blocks) |*block| { + block.deinit(allocator); + } + + allocator.free(self.blocks); + self.node_pool.deinit(); + self.* = undefined; +} + +pub fn entry_point(self: *const Self) *Block { + return &self.blocks[0]; +} + +test "terminals-single" { + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse("S", "S -> 'a'", allocator); + defer grammar.deinit(); + + var pex = try Self.compile(&grammar, allocator); + defer pex.deinit(allocator); + + try std.testing.expectEqual(2, pex.blocks.len); + try std.testing.expectEqual(1, pex.blocks[1].heads.items.len); + try std.testing.expectEqual( + Instruction{ .check = 'a' }, + pex.blocks[1].heads.items[0].instruction + ); + try std.testing.expectEqual(1, pex.blocks[1].heads.items[0].next.items.len); + try std.testing.expectEqual( + Instruction{ .@"return" = void{} }, + pex.blocks[1].heads.items[0].next.items[0].instruction + ); +} + +test "terminals-single-thread" { + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse("S", "S -> 'abc'", allocator); + defer grammar.deinit(); + + var pex = try Self.compile(&grammar, allocator); + defer pex.deinit(allocator); + + try std.testing.expectEqual(2, pex.blocks.len); + try std.testing.expectEqual(1, pex.blocks[1].heads.items.len); + + var current = pex.blocks[1].heads.items[0]; + for ("abc") |c| { + try std.testing.expectEqual( + Instruction{ .check = c }, + current.instruction, + ); + try std.testing.expectEqual(1, current.next.items.len); + current = current.next.items[0]; + } + + try std.testing.expectEqual( + Instruction{ .@"return" = void{} }, + current.instruction, + ); +} + +test "direct-left-recursion" { + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse("S", "S -> 'a' | S 'a'", allocator); + defer grammar.deinit(); + + var pex = try Self.compile(&grammar, allocator); + defer pex.deinit(allocator); + + try std.testing.expectEqual(2, pex.blocks.len); + try std.testing.expectEqual(1, pex.blocks[1].heads.items.len); + + try std.testing.expectEqual( + Instruction{ .check = 'a' }, + pex.blocks[1].heads.items[0].instruction, + ); + + try std.testing.expectEqual( + Instruction{ .@"return" = void{} }, + pex.blocks[1].heads.items[0].next.items[0].instruction, + ); + + try std.testing.expectEqual( + Instruction{ .check = 'a' }, + pex.blocks[1].heads.items[0].next.items[1].next.items[0].instruction, + ); + + try std.testing.expectEqual( + Instruction{ .@"return" = void{} }, + pex.blocks[1] + .heads.items[0] + .next.items[1] + .next.items[0] + .next.items[0] + .instruction, + ); + + try std.testing.expect( + pex.blocks[1] + .heads.items[0] + .next.items[1] + .next.items[0] + .next.items[1] + .instruction == .jump + ); + + try std.testing.expectEqual( + Instruction{ .@"return" = void{} }, + pex.blocks[1] + .heads.items[0] + .next.items[1] + .next.items[0] + .next.items[1] + .next.items[0] + .instruction, + ); +} -- cgit v1.2.3-70-g09d2