diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/argument.zig | 197 | ||||
| -rw-r--r-- | src/generator.zig | 82 | ||||
| -rw-r--r-- | src/gss.zig | 30 | ||||
| -rw-r--r-- | src/main.zig | 125 | ||||
| -rw-r--r-- | src/non-terminal.zig | 6 | ||||
| -rw-r--r-- | src/recognizer.zig | 240 |
6 files changed, 656 insertions, 24 deletions
diff --git a/src/argument.zig b/src/argument.zig new file mode 100644 index 0000000..fa9612d --- /dev/null +++ b/src/argument.zig @@ -0,0 +1,197 @@ +const std = @import("std"); +pub const Grammar = @import("grammar.zig"); + +fn help(err: ?anyerror) noreturn { + const stderr = std.io.getStdErr().writer(); + + if (err) |e| { + stderr.print("error: {s}\n", .{@errorName(e)}) catch unreachable; + } + + stderr.writeAll( + \\mry [command] [options] + \\ + \\Commands: + \\ + \\ generate [grammar] [options] + \\ Options: + \\ -o, --output entry Output string to file + \\ By default stdout will be used. + \\ + \\ -c, --count n Number of texts to generate. (default: 1) + \\ + \\ -n, --non-empty Only output texts which are non-empty. + \\ + \\ recognize [grammar] [options] + \\ Options: + \\ -i, --input entry Specify input source, if the path + \\ points to a directory it will scan + \\ all files in it. By default stdin will + \\ be used as input. + \\ + \\General Options + \\ -h, --help Print usage + \\ + ) catch unreachable; + std.process.exit(@intFromBool(err != null)); +} + +fn check(arg_or_null: ?[]const u8) []const u8 { + const arg = arg_or_null orelse help(error.MissingArgument); + + if (std.mem.eql(u8, arg, "-h") or std.mem.eql(u8, arg, "--help")) { + help(null); + } + return arg; +} + +fn parse_enum(T: type, arg: []const u8) T { + return std.meta.stringToEnum(T, arg) orelse help(error.InvalidArgument); +} + +fn parse_int(n: []const u8) usize { + return std.fmt.parseInt(usize, n, 10) catch |e| help(e); +} + +fn check_flags(arg: []const u8, comptime flags: []const []const u8) bool { + inline for (flags) |flag| { + if (std.mem.eql(u8, arg, flag)) { + return true; + } + } + + return false; +} + +pub const Mode = enum { + recognize, + generate, +}; + +pub const Entry = struct { + const Self = @This(); + + name: []const u8, + file: std.fs.File, + + pub fn open(path: []const u8) Self { + var cwd = std.fs.cwd(); + + return Self { + .name = path, + .file = cwd.openFile(path, .{}) catch |e| help(e) + }; + } + + pub fn read_file(path: []const u8, allocator: std.mem.Allocator) []const u8 { + var cwd = std.fs.cwd(); + + var file = cwd.openFile(path, .{}) catch |e| help(e); + defer file.close(); + const stat = file.stat() catch |e| help(e); + + return file.readToEndAlloc(allocator, stat.size) catch |e| help(e); + } + + pub fn stdin() Self { + return Self { + .name = "[stdin]", + .file = std.io.getStdIn(), + }; + } + + pub fn stdout() Self { + return Self { + .name = "[stdout]", + .file = std.io.getStdOut(), + }; + } +}; + +pub const RecognizeArgs = struct { + input: Entry, + grammar: Grammar, +}; + +pub const GenerateArgs = struct { + count: usize, + empty: bool, + output: Entry, + grammar: Grammar, +}; + +pub const Args = union(Mode) { + const Self = @This(); + + recognize: RecognizeArgs, + generate: GenerateArgs, + + pub fn parse(allocator: std.mem.Allocator) Self { + var args = std.process.args(); + _ = args.next(); + + const mode = parse_enum(Mode, check(args.next())); + + const text = Entry.read_file(check(args.next()), allocator); + defer allocator.free(text); + const grammar = Grammar.parse( + text, + allocator + ) catch |e| help(e); + + switch (mode) { + .recognize => { + var input: ?Entry = null; + + while (args.next()) |arg| { + if (check_flags(arg, &[_][]const u8 { "-i", "--input" })) { + input = Entry.open(check(args.next())); + } else help(error.InvalidArgument); + } + + return Self { + .recognize = .{ + .input = input orelse Entry.stdin(), + .grammar = grammar, + }, + }; + }, + + .generate => { + var count: usize = 1; + var output: ?Entry = null; + var empty: bool = true; + + while (args.next()) |arg| { + if (check_flags(arg, &[_][]const u8 { "-o", "--output" })) { + output = Entry.open(check(args.next())); + } else if (check_flags(arg, &[_][]const u8 { "-c", "--count" })) { + count = parse_int(check(args.next())); + } else if (check_flags(arg, &[_][]const u8 { "-n", "--non-empty" })) { + empty = false; + } else help(error.InvalidArgument); + } + + return Self { + .generate = .{ + .count = count, + .empty = empty, + .output = output orelse Entry.stdout(), + .grammar = grammar, + }, + }; + }, + } + } + + pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { + switch (self.*) { + .recognize => |*rec| { + rec.grammar.deinit(allocator); + }, + .generate => |*gen| { + gen.grammar.deinit(allocator); + } + } + } +}; diff --git a/src/generator.zig b/src/generator.zig new file mode 100644 index 0000000..8d0c888 --- /dev/null +++ b/src/generator.zig @@ -0,0 +1,82 @@ +const std = @import("std"); + +const Grammar = @import("grammar.zig"); + +pub fn Generator(T: type) type { + const GenError = error { MaxDepth } || std.mem.Allocator.Error; + + return struct { + const Self = @This(); + + inner: T = .{}, + + pub fn sentential_from_grammar( + self: *Self, + grammar: *Grammar, + max_depth: usize, + allocator: std.mem.Allocator + ) GenError![]u8 { + var string = std.ArrayList(u8).init(allocator); + + while (true) { + self.generate_sentential_recursive( + grammar, + &string, + grammar.entry_point().name, + max_depth + ) catch |err| switch (err) { + error.MaxDepth => { + string.clearAndFree(); + continue; + }, + else => return err, + }; + break; + } + + return string.toOwnedSlice(); + } + + fn generate_sentential_recursive( + self: *Self, + grammar: *Grammar, + string: *std.ArrayList(u8), + n: u8, + max_depth: usize + ) GenError!void { + if (max_depth == 0) { + return error.MaxDepth; + } + + const rules = grammar.non_terminal_by_name(n).rules(); + const index = self.inner.next(rules.len); + const rule = rules[index]; + + const success = blk: { + for (rule) |character| { + switch (character) { + .terminal => |t| try string.append(t), + .non_terminal => |next_n| { + self.generate_sentential_recursive( + grammar, + string, + next_n, + max_depth - 1 + ) catch |err| switch (err) { + error.MaxDepth => break :blk false, + else => return err, + }; + }, + else => {}, + } + } + + break :blk true; + }; + + if (success) return; + + return error.MaxDepth; + } + }; +} diff --git a/src/gss.zig b/src/gss.zig new file mode 100644 index 0000000..ba8542d --- /dev/null +++ b/src/gss.zig @@ -0,0 +1,30 @@ +const std = @import("std"); + +pub fn Node(T: type) type { + return struct { + const Self = @This(); + + parent: ?*Node = null, + state: T, + + pub fn init(state: T) Self { + return Self { .state = state }; + } + + pub fn push(self: *Self, state: T, allocator: std.mem.Allocator) !*Self { + const node = try allocator.create(Self); + node.parent = self; + node.state = state; + return node; + } + + pub fn pop(self: *Self, allocator: std.mem.Allocator) struct { T, *Self } { + const parent = self.parent; + const state = self.state; + + allocator.destroy(self); + + return .{ state, parent }; + } + }; +} diff --git a/src/main.zig b/src/main.zig index 7a0513e..4dec1f1 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,18 +1,107 @@ const std = @import("std"); pub const Grammar = @import("grammar.zig"); +pub const gss = @import("gss.zig"); pub const recognizer = @import("recognizer.zig"); +pub const argument = @import("argument.zig"); +pub const Generator = @import("generator.zig").Generator; -fn help() !noreturn { - const stderr = std.io.getStdErr().writer(); - try stderr.print("gll [grammar] [input]\n", .{}); - std.process.exit(1); +const Args = argument.Args; +const RecognizeArgs = argument.RecognizeArgs; +const GenerateArgs = argument.GenerateArgs; + +fn write_result( + writer: anytype, + is_tty: bool, + name: []const u8, + index: usize, + input: []const u8, + accepted: bool, +) !void { + if (is_tty) { + try writer.print("{s}[{}] {s}\x1b[0m: \"\x1b[3m{s}\x1b[0m\"\n", .{ + name, + index, + if (accepted) "\x1b[32maccept" + else "\x1b[31mreject", + input, + }); + } else { + try writer.print("{s}[{}] {s}: \"{s}\"\n", .{ + name, + index, + if (accepted) "accept" + else "reject", + input, + }); + } } -fn read_whole_file(path: []const u8, allocator: std.mem.Allocator) ![]const u8 { - const file = try std.fs.cwd().openFile(path, .{}); - defer file.close(); - const stat = try file.stat(); - return file.readToEndAlloc(allocator, stat.size); +fn recognize(args: *RecognizeArgs, allocator: std.mem.Allocator) !void { + const stdout = std.io.getStdOut(); + const writer = std.io.getStdOut().writer(); + + var reader = args.input.file.reader(); + var index: usize = 0; + const stderr = std.io.getStdErr(); + + if (args.input.file.isTty()) { + try stderr.writeAll("> "); + } + + var read_arena = std.heap.ArenaAllocator.init(allocator); + defer read_arena.deinit(); + + while (try reader.readUntilDelimiterOrEofAlloc( + read_arena.allocator(), + '\n', + std.math.maxInt(usize) + )) |buffer| { + + const trimmed = std.mem.trim(u8, buffer, &std.ascii.whitespace); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + try write_result( + writer, + stdout.isTty(), + args.input.name, + index, + trimmed, + try recognizer.check( + &args.grammar, + trimmed, + arena.allocator() + )); + + index += 1; + + if (args.input.file.isTty()) { + try stderr.writeAll("> "); + } + } +} + +fn generate(args: *GenerateArgs, allocator: std.mem.Allocator) !void { + var writer = args.output.file.writer(); + var count: usize = 0; + + var generator = Generator(struct { + const Self = @This(); + + pub fn next(_: *Self, n: usize) usize { + return std.crypto.random.uintLessThan(usize, n); + } + }){}; + + while (count < args.count) { + const text = try generator.sentential_from_grammar(&args.grammar, 1000, allocator); + defer allocator.free(text); + + if (args.empty or text.len > 0) { + try writer.print("{s}\n", .{text}); + count += 1; + } + } } pub fn main() !void { @@ -23,20 +112,14 @@ pub fn main() !void { @panic("memory leak detected"); } } - - var args = std.process.args(); - _ = args.next(); - - const grammar_path = args.next() orelse try help(); - const input_path = args.next() orelse try help(); - const grammar_text = try read_whole_file(grammar_path, allocator); - defer allocator.free(grammar_text); - const input = try read_whole_file(input_path, allocator); - defer allocator.free(input); + var arguments = Args.parse(allocator); + defer arguments.deinit(allocator); - var grammar = try Grammar.parse(grammar_text, allocator); - defer grammar.deinit(allocator); + try switch(arguments) { + .recognize => |*args| recognize(args, allocator), + .generate => |*args| generate(args, allocator), + }; } test { diff --git a/src/non-terminal.zig b/src/non-terminal.zig index 097c603..eed30e3 100644 --- a/src/non-terminal.zig +++ b/src/non-terminal.zig @@ -78,12 +78,12 @@ pub fn format( try writer.print("[{c} -> ", .{ self.name }); - if (self.rules().len > 0) { - for (self.rules()[0]) |c| { + if (self.rule_list.items.len > 0) { + for (self.rule_list.items[0]) |c| { try writer.print("{}", .{c}); } - for (self.rules()[1..]) |r| { + for (self.rule_list.items[1..]) |r| { try writer.print(" | ", .{}); for (r) |c| { try writer.print("{}", .{c}); diff --git a/src/recognizer.zig b/src/recognizer.zig new file mode 100644 index 0000000..3adb7a4 --- /dev/null +++ b/src/recognizer.zig @@ -0,0 +1,240 @@ +const std = @import("std"); +const Grammar = @import("grammar.zig"); +const NonTerminal = @import("non-terminal.zig"); +const Character = @import("character.zig").Character; +const Generator = @import("generator.zig").Generator; +const gss = @import("gss.zig"); + +const State = struct { + const Self = @This(); + + name: u8, + rule: NonTerminal.Rule, + inner_position: usize, + input_position: usize, + + pub fn format( + self: *const Self, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + + try writer.print("[ {c}, {s}, {}, {} ]", .{ + self.name, + self.rule, + self.inner_position, + self.input_position, + }); + } +}; + +pub fn check(grammar: *Grammar, input: []const u8, allocator: std.mem.Allocator) !bool { + var stacks = std.ArrayList(std.ArrayList(State)).init(allocator); + defer { + for (stacks.items) |stack| { + stack.deinit(); + } + stacks.deinit(); + } + + const entry = grammar.entry_point(); + + for (entry.rules()) |rule| { + const state = State { + .name = entry.name, + .rule = rule, + .inner_position = 0, + .input_position = 0, + }; + + var stack = std.ArrayList(State).init(allocator); + try stack.append(state); + try stacks.append(stack); + } + + var remove_queue = std.ArrayList(usize).init(allocator); + defer remove_queue.deinit(); + var add_queue = std.ArrayList(std.ArrayList(State)).init(allocator); + defer add_queue.deinit(); + + while (stacks.items.len > 0) { + for (stacks.items, 0..) |*stack, stack_index| { + var state: *State = &stack.items[stack.items.len - 1]; + + if (state.inner_position == state.rule.len) { + if ( + stack.items.len == 1 and + state.name == entry.name and + state.input_position == input.len + ) { + return true; + } + + const old_state = stack.pop(); + if (stack.items.len > 0) { + state = &stack.items[stack.items.len - 1]; + state.inner_position += 1; + state.input_position = old_state.input_position; + } else { + try remove_queue.append(stack_index); + } + continue; + } + + const epsilon_only = state.input_position == input.len; + + switch (state.rule[state.inner_position]) { + .terminal => |t| { + if (!epsilon_only and input[state.input_position] == t) { + state.inner_position += 1; + state.input_position += 1; + } else { + try remove_queue.append(stack_index); + } + }, + .non_terminal => |n| { + const non_terminal = grammar.non_terminal_by_name(n); + const rules = non_terminal.rules(); + + if (!epsilon_only and non_terminal.first.is_set(input[state.input_position])) { + for (rules) |rule| { + const next_state = State { + .name = non_terminal.name, + .rule = rule, + .inner_position = 0, + .input_position = state.input_position, + }; + var new_stack = try stack.clone(); + try new_stack.append(next_state); + try add_queue.append(new_stack); + } + } + + if (non_terminal.first.is_set(Character.EPSILON)) { + state.inner_position += 1; + } else { + try remove_queue.append(stack_index); + } + }, + .epsilon => { + state.inner_position += 1; + }, + } + } + + for (remove_queue.items, 0..) |index, offset| { + const stack = stacks.orderedRemove(index - offset); + stack.deinit(); + } + + for (add_queue.items) |stack| { + try stacks.append(stack); + } + + remove_queue.clearAndFree(); + add_queue.clearAndFree(); + } + + return false; +} + +test "expr" { + const text = + \\S -> B A + \\A -> + B A + \\A -> _ + \\B -> D C + \\C -> * D C + \\C -> _ + \\D -> ( S ) + \\D -> a + \\D -> b + ; + + const input = "b+a*b"; + + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse(text, allocator); + defer grammar.deinit(allocator); + + try std.testing.expect(try check(&grammar, input, allocator)); +} + +test "simple 0 - success" { + const text = + \\S -> A S d + \\S -> B S + \\S -> _ + \\A -> a + \\A -> c + \\B -> a + \\B -> b + ; + + const input = "aad"; + + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse(text, allocator); + defer grammar.deinit(allocator); + + try std.testing.expect(try check(&grammar, input, allocator)); +} + +test "simple 0 - fail" { + const text = + \\S -> A S d + \\S -> B S + \\S -> _ + \\A -> a + \\A -> c + \\B -> a + \\B -> b + ; + + const input = "accd"; + + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse(text, allocator); + defer grammar.deinit(allocator); + + try std.testing.expect(!try check(&grammar, input, allocator)); +} + +test "simple 0 - fuzzy" { + const text = + \\S -> A S d + \\S -> B S + \\S -> _ + \\A -> a + \\A -> c + \\B -> a + \\B -> b + ; + + const allocator = std.testing.allocator; + + var grammar = try Grammar.parse(text, allocator); + defer grammar.deinit(allocator); + + var generator = Generator(struct { + const Self = @This(); + + pub fn next(_: *Self, n: usize) usize { + return std.crypto.random.uintLessThan(usize, n); + } + }){}; + + for (0..100) |_| { + const input = try generator.sentential_from_grammar(&grammar, 1000, allocator); + defer allocator.free(input); + + try std.testing.expect(try check(&grammar, input, allocator)); + } + +} |