aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2025-04-26 14:23:28 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2025-04-26 14:23:28 +0200
commitcf4d53c3eb35028839e6b267230c23df68b1e94a (patch)
treeac564add1e8b0ee1b9d111a7692ec2ab7fc0499e /src
parentf593da7580f423b1405f4705081368acef0b3c21 (diff)
first working implementation (unoptimized)
Diffstat (limited to 'src')
-rw-r--r--src/argument.zig197
-rw-r--r--src/generator.zig82
-rw-r--r--src/gss.zig30
-rw-r--r--src/main.zig125
-rw-r--r--src/non-terminal.zig6
-rw-r--r--src/recognizer.zig240
6 files changed, 656 insertions, 24 deletions
diff --git a/src/argument.zig b/src/argument.zig
new file mode 100644
index 0000000..fa9612d
--- /dev/null
+++ b/src/argument.zig
@@ -0,0 +1,197 @@
+const std = @import("std");
+pub const Grammar = @import("grammar.zig");
+
+fn help(err: ?anyerror) noreturn {
+ const stderr = std.io.getStdErr().writer();
+
+ if (err) |e| {
+ stderr.print("error: {s}\n", .{@errorName(e)}) catch unreachable;
+ }
+
+ stderr.writeAll(
+ \\mry [command] [options]
+ \\
+ \\Commands:
+ \\
+ \\ generate [grammar] [options]
+ \\ Options:
+ \\ -o, --output entry Output string to file
+ \\ By default stdout will be used.
+ \\
+ \\ -c, --count n Number of texts to generate. (default: 1)
+ \\
+ \\ -n, --non-empty Only output texts which are non-empty.
+ \\
+ \\ recognize [grammar] [options]
+ \\ Options:
+ \\ -i, --input entry Specify input source, if the path
+ \\ points to a directory it will scan
+ \\ all files in it. By default stdin will
+ \\ be used as input.
+ \\
+ \\General Options
+ \\ -h, --help Print usage
+ \\
+ ) catch unreachable;
+ std.process.exit(@intFromBool(err != null));
+}
+
+fn check(arg_or_null: ?[]const u8) []const u8 {
+ const arg = arg_or_null orelse help(error.MissingArgument);
+
+ if (std.mem.eql(u8, arg, "-h") or std.mem.eql(u8, arg, "--help")) {
+ help(null);
+ }
+ return arg;
+}
+
+fn parse_enum(T: type, arg: []const u8) T {
+ return std.meta.stringToEnum(T, arg) orelse help(error.InvalidArgument);
+}
+
+fn parse_int(n: []const u8) usize {
+ return std.fmt.parseInt(usize, n, 10) catch |e| help(e);
+}
+
+fn check_flags(arg: []const u8, comptime flags: []const []const u8) bool {
+ inline for (flags) |flag| {
+ if (std.mem.eql(u8, arg, flag)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+pub const Mode = enum {
+ recognize,
+ generate,
+};
+
+pub const Entry = struct {
+ const Self = @This();
+
+ name: []const u8,
+ file: std.fs.File,
+
+ pub fn open(path: []const u8) Self {
+ var cwd = std.fs.cwd();
+
+ return Self {
+ .name = path,
+ .file = cwd.openFile(path, .{}) catch |e| help(e)
+ };
+ }
+
+ pub fn read_file(path: []const u8, allocator: std.mem.Allocator) []const u8 {
+ var cwd = std.fs.cwd();
+
+ var file = cwd.openFile(path, .{}) catch |e| help(e);
+ defer file.close();
+ const stat = file.stat() catch |e| help(e);
+
+ return file.readToEndAlloc(allocator, stat.size) catch |e| help(e);
+ }
+
+ pub fn stdin() Self {
+ return Self {
+ .name = "[stdin]",
+ .file = std.io.getStdIn(),
+ };
+ }
+
+ pub fn stdout() Self {
+ return Self {
+ .name = "[stdout]",
+ .file = std.io.getStdOut(),
+ };
+ }
+};
+
+pub const RecognizeArgs = struct {
+ input: Entry,
+ grammar: Grammar,
+};
+
+pub const GenerateArgs = struct {
+ count: usize,
+ empty: bool,
+ output: Entry,
+ grammar: Grammar,
+};
+
+pub const Args = union(Mode) {
+ const Self = @This();
+
+ recognize: RecognizeArgs,
+ generate: GenerateArgs,
+
+ pub fn parse(allocator: std.mem.Allocator) Self {
+ var args = std.process.args();
+ _ = args.next();
+
+ const mode = parse_enum(Mode, check(args.next()));
+
+ const text = Entry.read_file(check(args.next()), allocator);
+ defer allocator.free(text);
+ const grammar = Grammar.parse(
+ text,
+ allocator
+ ) catch |e| help(e);
+
+ switch (mode) {
+ .recognize => {
+ var input: ?Entry = null;
+
+ while (args.next()) |arg| {
+ if (check_flags(arg, &[_][]const u8 { "-i", "--input" })) {
+ input = Entry.open(check(args.next()));
+ } else help(error.InvalidArgument);
+ }
+
+ return Self {
+ .recognize = .{
+ .input = input orelse Entry.stdin(),
+ .grammar = grammar,
+ },
+ };
+ },
+
+ .generate => {
+ var count: usize = 1;
+ var output: ?Entry = null;
+ var empty: bool = true;
+
+ while (args.next()) |arg| {
+ if (check_flags(arg, &[_][]const u8 { "-o", "--output" })) {
+ output = Entry.open(check(args.next()));
+ } else if (check_flags(arg, &[_][]const u8 { "-c", "--count" })) {
+ count = parse_int(check(args.next()));
+ } else if (check_flags(arg, &[_][]const u8 { "-n", "--non-empty" })) {
+ empty = false;
+ } else help(error.InvalidArgument);
+ }
+
+ return Self {
+ .generate = .{
+ .count = count,
+ .empty = empty,
+ .output = output orelse Entry.stdout(),
+ .grammar = grammar,
+ },
+ };
+ },
+ }
+ }
+
+ pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
+ switch (self.*) {
+ .recognize => |*rec| {
+ rec.grammar.deinit(allocator);
+ },
+ .generate => |*gen| {
+ gen.grammar.deinit(allocator);
+ }
+ }
+ }
+};
diff --git a/src/generator.zig b/src/generator.zig
new file mode 100644
index 0000000..8d0c888
--- /dev/null
+++ b/src/generator.zig
@@ -0,0 +1,82 @@
+const std = @import("std");
+
+const Grammar = @import("grammar.zig");
+
+pub fn Generator(T: type) type {
+ const GenError = error { MaxDepth } || std.mem.Allocator.Error;
+
+ return struct {
+ const Self = @This();
+
+ inner: T = .{},
+
+ pub fn sentential_from_grammar(
+ self: *Self,
+ grammar: *Grammar,
+ max_depth: usize,
+ allocator: std.mem.Allocator
+ ) GenError![]u8 {
+ var string = std.ArrayList(u8).init(allocator);
+
+ while (true) {
+ self.generate_sentential_recursive(
+ grammar,
+ &string,
+ grammar.entry_point().name,
+ max_depth
+ ) catch |err| switch (err) {
+ error.MaxDepth => {
+ string.clearAndFree();
+ continue;
+ },
+ else => return err,
+ };
+ break;
+ }
+
+ return string.toOwnedSlice();
+ }
+
+ fn generate_sentential_recursive(
+ self: *Self,
+ grammar: *Grammar,
+ string: *std.ArrayList(u8),
+ n: u8,
+ max_depth: usize
+ ) GenError!void {
+ if (max_depth == 0) {
+ return error.MaxDepth;
+ }
+
+ const rules = grammar.non_terminal_by_name(n).rules();
+ const index = self.inner.next(rules.len);
+ const rule = rules[index];
+
+ const success = blk: {
+ for (rule) |character| {
+ switch (character) {
+ .terminal => |t| try string.append(t),
+ .non_terminal => |next_n| {
+ self.generate_sentential_recursive(
+ grammar,
+ string,
+ next_n,
+ max_depth - 1
+ ) catch |err| switch (err) {
+ error.MaxDepth => break :blk false,
+ else => return err,
+ };
+ },
+ else => {},
+ }
+ }
+
+ break :blk true;
+ };
+
+ if (success) return;
+
+ return error.MaxDepth;
+ }
+ };
+}
diff --git a/src/gss.zig b/src/gss.zig
new file mode 100644
index 0000000..ba8542d
--- /dev/null
+++ b/src/gss.zig
@@ -0,0 +1,30 @@
+const std = @import("std");
+
+pub fn Node(T: type) type {
+ return struct {
+ const Self = @This();
+
+ parent: ?*Node = null,
+ state: T,
+
+ pub fn init(state: T) Self {
+ return Self { .state = state };
+ }
+
+ pub fn push(self: *Self, state: T, allocator: std.mem.Allocator) !*Self {
+ const node = try allocator.create(Self);
+ node.parent = self;
+ node.state = state;
+ return node;
+ }
+
+ pub fn pop(self: *Self, allocator: std.mem.Allocator) struct { T, *Self } {
+ const parent = self.parent;
+ const state = self.state;
+
+ allocator.destroy(self);
+
+ return .{ state, parent };
+ }
+ };
+}
diff --git a/src/main.zig b/src/main.zig
index 7a0513e..4dec1f1 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,18 +1,107 @@
const std = @import("std");
pub const Grammar = @import("grammar.zig");
+pub const gss = @import("gss.zig");
pub const recognizer = @import("recognizer.zig");
+pub const argument = @import("argument.zig");
+pub const Generator = @import("generator.zig").Generator;
-fn help() !noreturn {
- const stderr = std.io.getStdErr().writer();
- try stderr.print("gll [grammar] [input]\n", .{});
- std.process.exit(1);
+const Args = argument.Args;
+const RecognizeArgs = argument.RecognizeArgs;
+const GenerateArgs = argument.GenerateArgs;
+
+fn write_result(
+ writer: anytype,
+ is_tty: bool,
+ name: []const u8,
+ index: usize,
+ input: []const u8,
+ accepted: bool,
+) !void {
+ if (is_tty) {
+ try writer.print("{s}[{}] {s}\x1b[0m: \"\x1b[3m{s}\x1b[0m\"\n", .{
+ name,
+ index,
+ if (accepted) "\x1b[32maccept"
+ else "\x1b[31mreject",
+ input,
+ });
+ } else {
+ try writer.print("{s}[{}] {s}: \"{s}\"\n", .{
+ name,
+ index,
+ if (accepted) "accept"
+ else "reject",
+ input,
+ });
+ }
}
-fn read_whole_file(path: []const u8, allocator: std.mem.Allocator) ![]const u8 {
- const file = try std.fs.cwd().openFile(path, .{});
- defer file.close();
- const stat = try file.stat();
- return file.readToEndAlloc(allocator, stat.size);
+fn recognize(args: *RecognizeArgs, allocator: std.mem.Allocator) !void {
+ const stdout = std.io.getStdOut();
+ const writer = std.io.getStdOut().writer();
+
+ var reader = args.input.file.reader();
+ var index: usize = 0;
+ const stderr = std.io.getStdErr();
+
+ if (args.input.file.isTty()) {
+ try stderr.writeAll("> ");
+ }
+
+ var read_arena = std.heap.ArenaAllocator.init(allocator);
+ defer read_arena.deinit();
+
+ while (try reader.readUntilDelimiterOrEofAlloc(
+ read_arena.allocator(),
+ '\n',
+ std.math.maxInt(usize)
+ )) |buffer| {
+
+ const trimmed = std.mem.trim(u8, buffer, &std.ascii.whitespace);
+ var arena = std.heap.ArenaAllocator.init(allocator);
+ defer arena.deinit();
+
+ try write_result(
+ writer,
+ stdout.isTty(),
+ args.input.name,
+ index,
+ trimmed,
+ try recognizer.check(
+ &args.grammar,
+ trimmed,
+ arena.allocator()
+ ));
+
+ index += 1;
+
+ if (args.input.file.isTty()) {
+ try stderr.writeAll("> ");
+ }
+ }
+}
+
+fn generate(args: *GenerateArgs, allocator: std.mem.Allocator) !void {
+ var writer = args.output.file.writer();
+ var count: usize = 0;
+
+ var generator = Generator(struct {
+ const Self = @This();
+
+ pub fn next(_: *Self, n: usize) usize {
+ return std.crypto.random.uintLessThan(usize, n);
+ }
+ }){};
+
+ while (count < args.count) {
+ const text = try generator.sentential_from_grammar(&args.grammar, 1000, allocator);
+ defer allocator.free(text);
+
+ if (args.empty or text.len > 0) {
+ try writer.print("{s}\n", .{text});
+ count += 1;
+ }
+ }
}
pub fn main() !void {
@@ -23,20 +112,14 @@ pub fn main() !void {
@panic("memory leak detected");
}
}
-
- var args = std.process.args();
- _ = args.next();
-
- const grammar_path = args.next() orelse try help();
- const input_path = args.next() orelse try help();
- const grammar_text = try read_whole_file(grammar_path, allocator);
- defer allocator.free(grammar_text);
- const input = try read_whole_file(input_path, allocator);
- defer allocator.free(input);
+ var arguments = Args.parse(allocator);
+ defer arguments.deinit(allocator);
- var grammar = try Grammar.parse(grammar_text, allocator);
- defer grammar.deinit(allocator);
+ try switch(arguments) {
+ .recognize => |*args| recognize(args, allocator),
+ .generate => |*args| generate(args, allocator),
+ };
}
test {
diff --git a/src/non-terminal.zig b/src/non-terminal.zig
index 097c603..eed30e3 100644
--- a/src/non-terminal.zig
+++ b/src/non-terminal.zig
@@ -78,12 +78,12 @@ pub fn format(
try writer.print("[{c} -> ", .{ self.name });
- if (self.rules().len > 0) {
- for (self.rules()[0]) |c| {
+ if (self.rule_list.items.len > 0) {
+ for (self.rule_list.items[0]) |c| {
try writer.print("{}", .{c});
}
- for (self.rules()[1..]) |r| {
+ for (self.rule_list.items[1..]) |r| {
try writer.print(" | ", .{});
for (r) |c| {
try writer.print("{}", .{c});
diff --git a/src/recognizer.zig b/src/recognizer.zig
new file mode 100644
index 0000000..3adb7a4
--- /dev/null
+++ b/src/recognizer.zig
@@ -0,0 +1,240 @@
+const std = @import("std");
+const Grammar = @import("grammar.zig");
+const NonTerminal = @import("non-terminal.zig");
+const Character = @import("character.zig").Character;
+const Generator = @import("generator.zig").Generator;
+const gss = @import("gss.zig");
+
+const State = struct {
+ const Self = @This();
+
+ name: u8,
+ rule: NonTerminal.Rule,
+ inner_position: usize,
+ input_position: usize,
+
+ pub fn format(
+ self: *const Self,
+ comptime fmt: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = fmt;
+ _ = options;
+
+ try writer.print("[ {c}, {s}, {}, {} ]", .{
+ self.name,
+ self.rule,
+ self.inner_position,
+ self.input_position,
+ });
+ }
+};
+
+pub fn check(grammar: *Grammar, input: []const u8, allocator: std.mem.Allocator) !bool {
+ var stacks = std.ArrayList(std.ArrayList(State)).init(allocator);
+ defer {
+ for (stacks.items) |stack| {
+ stack.deinit();
+ }
+ stacks.deinit();
+ }
+
+ const entry = grammar.entry_point();
+
+ for (entry.rules()) |rule| {
+ const state = State {
+ .name = entry.name,
+ .rule = rule,
+ .inner_position = 0,
+ .input_position = 0,
+ };
+
+ var stack = std.ArrayList(State).init(allocator);
+ try stack.append(state);
+ try stacks.append(stack);
+ }
+
+ var remove_queue = std.ArrayList(usize).init(allocator);
+ defer remove_queue.deinit();
+ var add_queue = std.ArrayList(std.ArrayList(State)).init(allocator);
+ defer add_queue.deinit();
+
+ while (stacks.items.len > 0) {
+ for (stacks.items, 0..) |*stack, stack_index| {
+ var state: *State = &stack.items[stack.items.len - 1];
+
+ if (state.inner_position == state.rule.len) {
+ if (
+ stack.items.len == 1 and
+ state.name == entry.name and
+ state.input_position == input.len
+ ) {
+ return true;
+ }
+
+ const old_state = stack.pop();
+ if (stack.items.len > 0) {
+ state = &stack.items[stack.items.len - 1];
+ state.inner_position += 1;
+ state.input_position = old_state.input_position;
+ } else {
+ try remove_queue.append(stack_index);
+ }
+ continue;
+ }
+
+ const epsilon_only = state.input_position == input.len;
+
+ switch (state.rule[state.inner_position]) {
+ .terminal => |t| {
+ if (!epsilon_only and input[state.input_position] == t) {
+ state.inner_position += 1;
+ state.input_position += 1;
+ } else {
+ try remove_queue.append(stack_index);
+ }
+ },
+ .non_terminal => |n| {
+ const non_terminal = grammar.non_terminal_by_name(n);
+ const rules = non_terminal.rules();
+
+ if (!epsilon_only and non_terminal.first.is_set(input[state.input_position])) {
+ for (rules) |rule| {
+ const next_state = State {
+ .name = non_terminal.name,
+ .rule = rule,
+ .inner_position = 0,
+ .input_position = state.input_position,
+ };
+ var new_stack = try stack.clone();
+ try new_stack.append(next_state);
+ try add_queue.append(new_stack);
+ }
+ }
+
+ if (non_terminal.first.is_set(Character.EPSILON)) {
+ state.inner_position += 1;
+ } else {
+ try remove_queue.append(stack_index);
+ }
+ },
+ .epsilon => {
+ state.inner_position += 1;
+ },
+ }
+ }
+
+ for (remove_queue.items, 0..) |index, offset| {
+ const stack = stacks.orderedRemove(index - offset);
+ stack.deinit();
+ }
+
+ for (add_queue.items) |stack| {
+ try stacks.append(stack);
+ }
+
+ remove_queue.clearAndFree();
+ add_queue.clearAndFree();
+ }
+
+ return false;
+}
+
+test "expr" {
+ const text =
+ \\S -> B A
+ \\A -> + B A
+ \\A -> _
+ \\B -> D C
+ \\C -> * D C
+ \\C -> _
+ \\D -> ( S )
+ \\D -> a
+ \\D -> b
+ ;
+
+ const input = "b+a*b";
+
+ const allocator = std.testing.allocator;
+
+ var grammar = try Grammar.parse(text, allocator);
+ defer grammar.deinit(allocator);
+
+ try std.testing.expect(try check(&grammar, input, allocator));
+}
+
+test "simple 0 - success" {
+ const text =
+ \\S -> A S d
+ \\S -> B S
+ \\S -> _
+ \\A -> a
+ \\A -> c
+ \\B -> a
+ \\B -> b
+ ;
+
+ const input = "aad";
+
+ const allocator = std.testing.allocator;
+
+ var grammar = try Grammar.parse(text, allocator);
+ defer grammar.deinit(allocator);
+
+ try std.testing.expect(try check(&grammar, input, allocator));
+}
+
+test "simple 0 - fail" {
+ const text =
+ \\S -> A S d
+ \\S -> B S
+ \\S -> _
+ \\A -> a
+ \\A -> c
+ \\B -> a
+ \\B -> b
+ ;
+
+ const input = "accd";
+
+ const allocator = std.testing.allocator;
+
+ var grammar = try Grammar.parse(text, allocator);
+ defer grammar.deinit(allocator);
+
+ try std.testing.expect(!try check(&grammar, input, allocator));
+}
+
+test "simple 0 - fuzzy" {
+ const text =
+ \\S -> A S d
+ \\S -> B S
+ \\S -> _
+ \\A -> a
+ \\A -> c
+ \\B -> a
+ \\B -> b
+ ;
+
+ const allocator = std.testing.allocator;
+
+ var grammar = try Grammar.parse(text, allocator);
+ defer grammar.deinit(allocator);
+
+ var generator = Generator(struct {
+ const Self = @This();
+
+ pub fn next(_: *Self, n: usize) usize {
+ return std.crypto.random.uintLessThan(usize, n);
+ }
+ }){};
+
+ for (0..100) |_| {
+ const input = try generator.sentential_from_grammar(&grammar, 1000, allocator);
+ defer allocator.free(input);
+
+ try std.testing.expect(try check(&grammar, input, allocator));
+ }
+
+}