diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/compile.zig | 290 | ||||
-rw-r--r-- | src/main.zig | 26 | ||||
-rw-r--r-- | src/parse.zig | 36 |
3 files changed, 266 insertions, 86 deletions
diff --git a/src/compile.zig b/src/compile.zig index 8cf61ca..c0ba77d 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -5,8 +5,30 @@ const Token = root.Lexer.Token; const parse = root.parse; const Location = root.Lexer.Location; +/// Virtual register pub const VReg = enum(u32) { _ }; pub const BlockRef = enum(u32) { _ }; +/// Id for local variables in a procedure +pub const LVar = enum(u32) { _ }; + +fn IdCounter(Id: type) type { + const Inner = @typeInfo(Id).@"enum".tag_type; + std.debug.assert(@typeInfo(Inner) == .int); + return struct { + counter: Inner, + + const init: Self = .{ .counter = 0 }; + + fn get(self: *Self) Id { + const id: Id = @enumFromInt(self.counter); + self.counter = std.math.add(Inner, self.counter, 1) catch + std.debug.panic("IdCounter({}) ran out of ids!", .{Id}); + return id; + } + + const Self = @This(); + }; +} pub const Instr = struct { loc: Location, @@ -63,23 +85,25 @@ pub const Instr = struct { pub const Branch = struct { cond: VReg, - true: BlockRef, - false: BlockRef, + true: Jump, + false: Jump, pub const may_end_block = {}; - pub fn sources(self: Branch) Sources { - return Sources.fromSlice(&.{self.cond}) catch unreachable; + pub fn sources(_: Branch) Sources { + @panic("Sources not implemented for Branch!!!"); + // return Sources.fromSlice(&.{self.cond}) catch unreachable; } }; pub const Jump = struct { to: BlockRef, + args: std.ArrayListUnmanaged(VReg) = .empty, pub const may_end_block = {}; - pub fn sources(_: Jump) Sources { - return Sources.init(0) catch unreachable; + pub fn sources(self: Jump) Sources { + return Sources.fromSlice(self.args) catch unreachable; } }; @@ -131,15 +155,28 @@ pub const Instr = struct { @intFromEnum(proc_call.arg), }, ), - .branch => |branch| try writer.print( - "branch %{} ? ${} : ${}", - .{ + .branch => |branch| { + try writer.print("branch %{} ? ${}(", .{ @intFromEnum(branch.cond), - @intFromEnum(branch.true), - @intFromEnum(branch.false), - }, - ), - .jump => |jump| try writer.print("jump ${}", .{@intFromEnum(jump.to)}), + @intFromEnum(branch.true.to), + }); + for (branch.true.args.items, 0..) |arg, i| { + try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); + } + try writer.print(") : ${}(", .{@intFromEnum(branch.false.to)}); + for (branch.false.args.items, 0..) |arg, i| { + try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); + } + try writer.print(")", .{}); + }, + .jump => |jump| { + try writer.print("jump ${}(", .{@intFromEnum(jump.to)}); + for (jump.args.items, 0..) |arg, i| { + try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); + } + try writer.print(")", .{}); + }, + .exit => |_| try writer.print("exit", .{}), } } @@ -148,21 +185,34 @@ pub const Instr = struct { pub const Procedure = struct { blocks: []BasicBlock, - fn init(allocator: Allocator, blocks: []BasicBlock) !Procedure { - for (blocks) |*block| { - try block.finalize(allocator); - } + fn init(blocks: []BasicBlock) !Procedure { + // for (blocks) |*block| { + // try block.finalize(allocator); + // } + return .{ .blocks = blocks }; } + + pub fn format(self: Procedure, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = .{ fmt, options }; + for (self.blocks, 0..) |block, i| { + try writer.print("${}{}", .{ i, block }); + } + } }; pub const BasicBlock = struct { - // arguments: []Reg, + params: std.AutoArrayHashMapUnmanaged(LVar, VReg) = .empty, instrs: std.ArrayListUnmanaged(Instr) = .empty, vreg_last_use: std.AutoHashMapUnmanaged(VReg, usize) = .empty, fn finalize(self: *BasicBlock, allocator: Allocator) !void { + std.debug.assert(self.instrs.items > 0); + std.debug.assert(switch (self.instrs.getLast().type) { + inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"), + }); + self.vreg_last_use = .empty; for (0.., self.instrs.items) |i, instr| { for (instr.sources().slice()) |src| @@ -170,29 +220,64 @@ pub const BasicBlock = struct { if (instr.dest()) |dest| try self.vreg_last_use.put(allocator, dest, i); } - std.debug.assert(switch (self.instrs.items[self.instrs.items.len - 1].type) { - inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"), - }); } + + fn immediate_successors(self: *BasicBlock) std.BoundedArray(BlockRef, 2) { + return switch (self.instrs.getLast().type) { + .branch => |branch| std.BoundedArray(BlockRef, 2).fromSlice(&.{ branch.true.to, branch.false.to }) catch unreachable, + .jump => |jump| std.BoundedArray(BlockRef, 2).fromSlice(&.{jump.to}) catch unreachable, + else => .{}, + }; + } + + pub fn format(self: BasicBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = .{ fmt, options }; + try writer.print("(", .{}); + { + var it = self.params.iterator(); + var first = true; + while (it.next()) |ent| { + try writer.print("{s}%{}", .{ if (first) "" else ", ", @intFromEnum(ent.value_ptr.*) }); + first = false; + } + } + try writer.print("):\n", .{}); + for (self.instrs.items) |instr| { + try writer.print(" {}\n", .{instr}); + } + } + + const Info = struct { + local_current_vreg: std.AutoArrayHashMapUnmanaged(LVar, VReg) = .empty, + }; }; pub fn compile(allocator: Allocator, source: []const u8, block: parse.Block) !Procedure { var ctx: CompileContext = .{ .allocator = allocator, .source = source, - .register_counter = 0, + .vreg_ctr = .init, + .lvar_ctr = .init, .scope = .{ .locals = .empty, .parent = null }, .blocks = .empty, + .block_infos = .empty, .current_block = @enumFromInt(0), }; + _ = try ctx.switchToNewBlock(); try ctx.compileBlock(block); try ctx.addInstr(.{ .loc = .{ .start = 0, .end = 0 }, .type = .{ .exit = .{} }, }); - return try .init(allocator, try ctx.blocks.toOwnedSlice(allocator)); + try ctx.assignArguments(); + return try .init(try ctx.blocks.toOwnedSlice(allocator)); } +const Scope = struct { + locals: std.StringHashMapUnmanaged(LVar), + parent: ?*Scope, +}; + const CompileError = error{ OutOfMemory, CanOnlyCallIdentifiers, @@ -203,20 +288,17 @@ const CompileError = error{ const CompileContext = struct { allocator: Allocator, source: []const u8, - register_counter: u32, + + vreg_ctr: IdCounter(VReg), + lvar_ctr: IdCounter(LVar), + scope: Scope, + blocks: std.ArrayListUnmanaged(BasicBlock), + block_infos: std.ArrayListUnmanaged(BasicBlock.Info), current_block: BlockRef, - // instrs: std.ArrayListUnmanaged(Instr), - - const Scope = struct { - locals: std.StringHashMapUnmanaged(VReg), - parent: ?*Scope, - }; fn compileBlock(self: *CompileContext, block: parse.Block) !void { - _ = try self.switchToNewBlock(); - var parent = self.scope; self.scope = .{ .locals = .empty, @@ -235,32 +317,31 @@ const CompileContext = struct { switch (stmt.type) { .expr => |expr| _ = try self.compileExpr(expr), .block => |block| { - const curr = self.current_block; - const after = try self.switchToNewBlock(); try self.compileBlock(block); - const b = self.current_block; - try self.addInstr(.{ - .loc = stmt.loc, - .type = .{ .jump = .{ .to = after } }, - }); - self.current_block = curr; - try self.addInstr(.{ - .loc = stmt.loc, - .type = .{ .jump = .{ .to = b } }, - }); - self.current_block = after; }, - .declare_var => |declare_var| { - const val = try self.compileExpr(declare_var.value); - const name = declare_var.ident.getIdent(self.source); - try self.scope.locals.put(self.allocator, name, val); + .assign_var => |assign_var| { + const local = if (assign_var.is_decl) blk: { + const local = self.lvar_ctr.get(); + const name = assign_var.ident.getIdent(self.source); + try self.scope.locals.put(self.allocator, name, local); + break :blk local; + } else blk: { + var scope: ?*Scope = &self.scope; + while (scope) |s| : (scope = s.parent) { + if (s.locals.get(assign_var.ident.getIdent(self.source))) |local| + break :blk local; + } else return error.UnknownVariable; + }; + + const val = try self.compileExpr(assign_var.value); + try self.assignLocalVar(local, val); }, } } fn compileExpr(self: *CompileContext, expr: *const parse.Expr) !VReg { // This is not used by all expression types, but creating an unused virtual register is not a problem. - const dest = self.register(); + const dest = self.vreg_ctr.get(); switch (expr.type) { .integer_literal => try self.addInstr(.{ .loc = expr.loc, @@ -301,8 +382,8 @@ const CompileContext = struct { .identifier => { var scope: ?*Scope = &self.scope; while (scope) |s| : (scope = s.parent) { - if (s.locals.get(expr.loc.getIdent(self.source))) |reg| { - return reg; + if (s.locals.get(expr.loc.getIdent(self.source))) |local| { + return self.getLocalVar(local); } } return error.UnknownVariable; @@ -313,20 +394,21 @@ const CompileContext = struct { const after = try self.switchToNewBlock(); + const t = try self.switchToNewBlock(); try self.compileBlock(@"if".then); try self.addInstr(.{ .loc = expr.loc, .type = .{ .jump = .{ .to = after } }, }); - const t = self.current_block; const f = if (@"if".@"else") |@"else"| blk: { + const f = try self.switchToNewBlock(); try self.compileBlock(@"else"); try self.addInstr(.{ .loc = expr.loc, .type = .{ .jump = .{ .to = after } }, }); - break :blk self.current_block; + break :blk f; } else after; self.current_block = curr; @@ -334,8 +416,8 @@ const CompileContext = struct { .loc = expr.loc, .type = .{ .branch = .{ .cond = cond, - .true = t, - .false = f, + .true = .{ .to = t }, + .false = .{ .to = f }, } }, }); @@ -345,9 +427,83 @@ const CompileContext = struct { return dest; } + fn assignArguments(self: *CompileContext) !void { + var immediate_predecessors: std.AutoHashMapUnmanaged(usize, std.ArrayListUnmanaged(usize)) = .empty; + defer immediate_predecessors.deinit(self.allocator); + defer { + var it = immediate_predecessors.valueIterator(); + while (it.next()) |val| val.deinit(self.allocator); + } + + for (self.blocks.items, 0..) |block, i| { + switch (block.instrs.getLast().type) { + .branch => |branch| { + const pt = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(branch.true.to)); + if (!pt.found_existing) { + pt.value_ptr.* = .empty; + } + try pt.value_ptr.append(self.allocator, i); + + const pf = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(branch.false.to)); + if (!pf.found_existing) { + pf.value_ptr.* = .empty; + } + try pf.value_ptr.append(self.allocator, i); + }, + .jump => |jump| { + const p = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(jump.to)); + if (!p.found_existing) { + p.value_ptr.* = .empty; + } + try p.value_ptr.append(self.allocator, i); + }, + else => {}, + } + } + + var block_ids: std.ArrayListUnmanaged(usize) = .empty; + for (0..self.blocks.items.len) |i| try block_ids.append(self.allocator, i); + while (block_ids.pop()) |ref| { + const block = &self.blocks.items[ref]; + const info = &self.block_infos.items[ref]; + const ty = &block.instrs.items[block.instrs.items.len - 1].type; + const got_new_params = switch (ty.*) { + .branch => blk: { + const t = try self.assignArgumentsInner(block, info, &ty.branch.true); + const f = try self.assignArgumentsInner(block, info, &ty.branch.false); + break :blk t or f; + }, + .jump => try self.assignArgumentsInner(block, info, &ty.jump), + else => false, + }; + if (got_new_params) { + try block_ids.appendSlice(self.allocator, immediate_predecessors.get(ref).?.items); + } + } + } + + fn assignArgumentsInner(self: *CompileContext, block: *BasicBlock, info: *BasicBlock.Info, jump: *Instr.Jump) !bool { + var got_new_params = false; + var it = self.blocks.items[@intFromEnum(jump.to)].params.iterator(); + // Skip over the arguments that we've already handled in an earlier call + for (0..jump.args.items.len) |_| _ = it.next(); + while (it.next()) |ent| { + try jump.args.append(self.allocator, info.local_current_vreg.get(ent.key_ptr.*) orelse blk: { + const reg = self.vreg_ctr.get(); + try block.params.put(self.allocator, ent.key_ptr.*, reg); + try info.local_current_vreg.put(self.allocator, ent.key_ptr.*, reg); + got_new_params = true; + break :blk reg; + }); + } + + return got_new_params; + } + fn switchToNewBlock(self: *CompileContext) !BlockRef { const ref: BlockRef = @enumFromInt(self.blocks.items.len); try self.blocks.append(self.allocator, .{}); + try self.block_infos.append(self.allocator, .{}); self.current_block = ref; return ref; } @@ -357,9 +513,21 @@ const CompileContext = struct { .instrs.append(self.allocator, instr); } - fn register(self: *CompileContext) VReg { - const reg: VReg = @enumFromInt(self.register_counter); - self.register_counter += 1; - return reg; + fn getLocalVar(self: *CompileContext, local: LVar) !VReg { + if (self.block_infos.items[@intFromEnum(self.current_block)] + .local_current_vreg.get(local)) |vreg| return vreg; + + const vreg = self.vreg_ctr.get(); + try self.blocks.items[@intFromEnum(self.current_block)] + .params.put(self.allocator, local, vreg); + try self.block_infos.items[@intFromEnum(self.current_block)] + .local_current_vreg.put(self.allocator, local, vreg); + + return vreg; + } + + fn assignLocalVar(self: *CompileContext, local: LVar, vreg: VReg) !void { + try self.block_infos.items[@intFromEnum(self.current_block)] + .local_current_vreg.put(self.allocator, local, vreg); } }; diff --git a/src/main.zig b/src/main.zig index 48637c2..314459b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -21,8 +21,6 @@ pub fn main() !void { std.io.getStdOut(); const run = if (args.next()) |arg| std.mem.eql(u8, arg, "run") else false; - const output = out_file.writer(); - // var br = std.io.bufferedReader(std.io.getStdIn().reader()); // const stdin = br.reader(); // @@ -40,11 +38,11 @@ pub fn main() !void { \\{ \\ let x = 10 \\ if x { - \\ let x = read_int(0) + \\ # let x = read_int(0) \\ # print(18446744073709551615) - \\ # print(x + x) + \\ x = x + x \\ } else { - \\ print(10) + \\ x = 69 \\ } \\ print(x) \\} @@ -63,16 +61,16 @@ pub fn main() !void { std.debug.print("Unexpected token {}, expected end of file\n", .{lexer.next()}); } const procedure = try compile.compile(allocator, source, ast); - std.debug.print("Bytecode instructions:\n", .{}); - for (procedure.blocks, 0..) |block, i| { - std.debug.print(" ${}:\n", .{i}); - for (block.instrs.items) |instr| { - std.debug.print(" {}\n", .{instr}); - } - } + std.debug.print("Bytecode instructions:\n{}", .{procedure}); + // for (procedure.blocks, 0..) |block, i| { + // std.debug.print(" ${}:\n", .{i}); + // for (block.instrs.items) |instr| { + // std.debug.print(" {}\n", .{instr}); + // } + // } // std.debug.print("Last use of each virtual register: {}\n", .{fmtHashMap(block.vreg_last_use)}); - const elf = try codegen.create_elf(allocator, procedure); - try output.writeAll(elf); + // const elf = try codegen.create_elf(allocator, procedure); + // try out_file.writer().writeAll(elf); std.debug.print("Run output:\n", .{}); if (run) { out_file.close(); diff --git a/src/parse.zig b/src/parse.zig index 92fb219..8347687 100644 --- a/src/parse.zig +++ b/src/parse.zig @@ -43,11 +43,12 @@ pub const Stmt = struct { pub const Type = union(enum) { expr: *const Expr, - declare_var: DeclareVar, + assign_var: AssignVar, block: Block, - pub const DeclareVar = struct { + pub const AssignVar = struct { ident: Lexer.Location, + is_decl: bool, value: *const Expr, }; }; @@ -57,9 +58,10 @@ pub const Stmt = struct { return switch (self.type) { .expr => |expr| writer.print("{}", .{fmt(expr, source, indent)}), .block => |b| writer.print("{}", .{fmt(b, source, indent)}), - .declare_var => |declare_var| writer.print("let {s} = {}", .{ - declare_var.ident.getIdent(source), - fmt(declare_var.value, source, indent), + .assign_var => |assign_var| writer.print("{s}{s} = {}", .{ + if (assign_var.is_decl) "let " else "", + assign_var.ident.getIdent(source), + fmt(assign_var.value, source, indent), }), }; } @@ -129,7 +131,7 @@ pub const Expr = struct { } }; -const ParseError = error{ OutOfMemory, ExpectedRightParen, UnexpectedToken }; +const ParseError = error{ OutOfMemory, ExpectedRightParen, UnexpectedToken, InvalidAssignTarget }; pub fn block(allocator: Allocator, lexer: *Lexer) !Block { const left_curly = try mustEat(lexer, .left_curly); @@ -153,7 +155,7 @@ pub fn statement(allocator: Allocator, lexer: *Lexer) ParseError!Stmt { const value = try expression(allocator, lexer); return .{ .loc = let.loc.combine(value.loc), - .type = .{ .declare_var = .{ .ident = ident.loc, .value = value } }, + .type = .{ .assign_var = .{ .ident = ident.loc, .is_decl = true, .value = value } }, }; }, .left_curly => { @@ -164,10 +166,22 @@ pub fn statement(allocator: Allocator, lexer: *Lexer) ParseError!Stmt { }; }, else => { - const expr = try expression(allocator, lexer); + const lhs = try expression(allocator, lexer); + if (lexer.peek().type == .equal) { + _ = try mustEat(lexer, .equal); + const value = try expression(allocator, lexer); + if (lhs.type != .identifier) { + std.debug.print("Invalid assign target. Found '{s}', expected an identifier.", .{@tagName(lhs.type)}); + return error.InvalidAssignTarget; + } + return .{ + .loc = lhs.loc, + .type = .{ .assign_var = .{ .ident = lhs.loc, .is_decl = false, .value = value } }, + }; + } return .{ - .loc = expr.loc, - .type = .{ .expr = expr }, + .loc = lhs.loc, + .type = .{ .expr = lhs }, }; }, } @@ -247,7 +261,7 @@ pub fn parsePrimaryExpr(allocator: Allocator, lexer: *Lexer) !*Expr { .integer_literal => .{ .loc = token.loc, .type = .integer_literal }, .identifier => .{ .loc = token.loc, .type = .identifier }, else => |t| { - std.debug.print("Expected '(', integer literal, or identifier. Got {}\n", .{t}); + std.debug.print("Expected '(', integer literal, or identifier. Got '{s}'\n", .{@tagName(t)}); return error.UnexpectedToken; }, }); |