diff options
author | Mathias Magnusson <mathias@magnusson.space> | 2025-06-07 00:05:22 +0200 |
---|---|---|
committer | Mathias Magnusson <mathias@magnusson.space> | 2025-06-07 00:57:32 +0200 |
commit | 1ed87d6c58cab383f18590093f651ef35c4fa671 (patch) | |
tree | c76bc3b8797f6e9973439462565b015da1dcbd80 /src/compile.zig | |
parent | 8c9a56311ac2774bf83eda4ea0aa9bde0aec125f (diff) | |
download | huginn-1ed87d6c58cab383f18590093f651ef35c4fa671.tar.gz |
add variable reassignments and basic block arguments
Diffstat (limited to 'src/compile.zig')
-rw-r--r-- | src/compile.zig | 290 |
1 files changed, 229 insertions, 61 deletions
diff --git a/src/compile.zig b/src/compile.zig index 8cf61ca..c0ba77d 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -5,8 +5,30 @@ const Token = root.Lexer.Token; const parse = root.parse; const Location = root.Lexer.Location; +/// Virtual register pub const VReg = enum(u32) { _ }; pub const BlockRef = enum(u32) { _ }; +/// Id for local variables in a procedure +pub const LVar = enum(u32) { _ }; + +fn IdCounter(Id: type) type { + const Inner = @typeInfo(Id).@"enum".tag_type; + std.debug.assert(@typeInfo(Inner) == .int); + return struct { + counter: Inner, + + const init: Self = .{ .counter = 0 }; + + fn get(self: *Self) Id { + const id: Id = @enumFromInt(self.counter); + self.counter = std.math.add(Inner, self.counter, 1) catch + std.debug.panic("IdCounter({}) ran out of ids!", .{Id}); + return id; + } + + const Self = @This(); + }; +} pub const Instr = struct { loc: Location, @@ -63,23 +85,25 @@ pub const Instr = struct { pub const Branch = struct { cond: VReg, - true: BlockRef, - false: BlockRef, + true: Jump, + false: Jump, pub const may_end_block = {}; - pub fn sources(self: Branch) Sources { - return Sources.fromSlice(&.{self.cond}) catch unreachable; + pub fn sources(_: Branch) Sources { + @panic("Sources not implemented for Branch!!!"); + // return Sources.fromSlice(&.{self.cond}) catch unreachable; } }; pub const Jump = struct { to: BlockRef, + args: std.ArrayListUnmanaged(VReg) = .empty, pub const may_end_block = {}; - pub fn sources(_: Jump) Sources { - return Sources.init(0) catch unreachable; + pub fn sources(self: Jump) Sources { + return Sources.fromSlice(self.args) catch unreachable; } }; @@ -131,15 +155,28 @@ pub const Instr = struct { @intFromEnum(proc_call.arg), }, ), - .branch => |branch| try writer.print( - "branch %{} ? ${} : ${}", - .{ + .branch => |branch| { + try writer.print("branch %{} ? ${}(", .{ @intFromEnum(branch.cond), - @intFromEnum(branch.true), - @intFromEnum(branch.false), - }, - ), - .jump => |jump| try writer.print("jump ${}", .{@intFromEnum(jump.to)}), + @intFromEnum(branch.true.to), + }); + for (branch.true.args.items, 0..) |arg, i| { + try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); + } + try writer.print(") : ${}(", .{@intFromEnum(branch.false.to)}); + for (branch.false.args.items, 0..) |arg, i| { + try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); + } + try writer.print(")", .{}); + }, + .jump => |jump| { + try writer.print("jump ${}(", .{@intFromEnum(jump.to)}); + for (jump.args.items, 0..) |arg, i| { + try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); + } + try writer.print(")", .{}); + }, + .exit => |_| try writer.print("exit", .{}), } } @@ -148,21 +185,34 @@ pub const Instr = struct { pub const Procedure = struct { blocks: []BasicBlock, - fn init(allocator: Allocator, blocks: []BasicBlock) !Procedure { - for (blocks) |*block| { - try block.finalize(allocator); - } + fn init(blocks: []BasicBlock) !Procedure { + // for (blocks) |*block| { + // try block.finalize(allocator); + // } + return .{ .blocks = blocks }; } + + pub fn format(self: Procedure, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = .{ fmt, options }; + for (self.blocks, 0..) |block, i| { + try writer.print("${}{}", .{ i, block }); + } + } }; pub const BasicBlock = struct { - // arguments: []Reg, + params: std.AutoArrayHashMapUnmanaged(LVar, VReg) = .empty, instrs: std.ArrayListUnmanaged(Instr) = .empty, vreg_last_use: std.AutoHashMapUnmanaged(VReg, usize) = .empty, fn finalize(self: *BasicBlock, allocator: Allocator) !void { + std.debug.assert(self.instrs.items > 0); + std.debug.assert(switch (self.instrs.getLast().type) { + inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"), + }); + self.vreg_last_use = .empty; for (0.., self.instrs.items) |i, instr| { for (instr.sources().slice()) |src| @@ -170,29 +220,64 @@ pub const BasicBlock = struct { if (instr.dest()) |dest| try self.vreg_last_use.put(allocator, dest, i); } - std.debug.assert(switch (self.instrs.items[self.instrs.items.len - 1].type) { - inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"), - }); } + + fn immediate_successors(self: *BasicBlock) std.BoundedArray(BlockRef, 2) { + return switch (self.instrs.getLast().type) { + .branch => |branch| std.BoundedArray(BlockRef, 2).fromSlice(&.{ branch.true.to, branch.false.to }) catch unreachable, + .jump => |jump| std.BoundedArray(BlockRef, 2).fromSlice(&.{jump.to}) catch unreachable, + else => .{}, + }; + } + + pub fn format(self: BasicBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = .{ fmt, options }; + try writer.print("(", .{}); + { + var it = self.params.iterator(); + var first = true; + while (it.next()) |ent| { + try writer.print("{s}%{}", .{ if (first) "" else ", ", @intFromEnum(ent.value_ptr.*) }); + first = false; + } + } + try writer.print("):\n", .{}); + for (self.instrs.items) |instr| { + try writer.print(" {}\n", .{instr}); + } + } + + const Info = struct { + local_current_vreg: std.AutoArrayHashMapUnmanaged(LVar, VReg) = .empty, + }; }; pub fn compile(allocator: Allocator, source: []const u8, block: parse.Block) !Procedure { var ctx: CompileContext = .{ .allocator = allocator, .source = source, - .register_counter = 0, + .vreg_ctr = .init, + .lvar_ctr = .init, .scope = .{ .locals = .empty, .parent = null }, .blocks = .empty, + .block_infos = .empty, .current_block = @enumFromInt(0), }; + _ = try ctx.switchToNewBlock(); try ctx.compileBlock(block); try ctx.addInstr(.{ .loc = .{ .start = 0, .end = 0 }, .type = .{ .exit = .{} }, }); - return try .init(allocator, try ctx.blocks.toOwnedSlice(allocator)); + try ctx.assignArguments(); + return try .init(try ctx.blocks.toOwnedSlice(allocator)); } +const Scope = struct { + locals: std.StringHashMapUnmanaged(LVar), + parent: ?*Scope, +}; + const CompileError = error{ OutOfMemory, CanOnlyCallIdentifiers, @@ -203,20 +288,17 @@ const CompileError = error{ const CompileContext = struct { allocator: Allocator, source: []const u8, - register_counter: u32, + + vreg_ctr: IdCounter(VReg), + lvar_ctr: IdCounter(LVar), + scope: Scope, + blocks: std.ArrayListUnmanaged(BasicBlock), + block_infos: std.ArrayListUnmanaged(BasicBlock.Info), current_block: BlockRef, - // instrs: std.ArrayListUnmanaged(Instr), - - const Scope = struct { - locals: std.StringHashMapUnmanaged(VReg), - parent: ?*Scope, - }; fn compileBlock(self: *CompileContext, block: parse.Block) !void { - _ = try self.switchToNewBlock(); - var parent = self.scope; self.scope = .{ .locals = .empty, @@ -235,32 +317,31 @@ const CompileContext = struct { switch (stmt.type) { .expr => |expr| _ = try self.compileExpr(expr), .block => |block| { - const curr = self.current_block; - const after = try self.switchToNewBlock(); try self.compileBlock(block); - const b = self.current_block; - try self.addInstr(.{ - .loc = stmt.loc, - .type = .{ .jump = .{ .to = after } }, - }); - self.current_block = curr; - try self.addInstr(.{ - .loc = stmt.loc, - .type = .{ .jump = .{ .to = b } }, - }); - self.current_block = after; }, - .declare_var => |declare_var| { - const val = try self.compileExpr(declare_var.value); - const name = declare_var.ident.getIdent(self.source); - try self.scope.locals.put(self.allocator, name, val); + .assign_var => |assign_var| { + const local = if (assign_var.is_decl) blk: { + const local = self.lvar_ctr.get(); + const name = assign_var.ident.getIdent(self.source); + try self.scope.locals.put(self.allocator, name, local); + break :blk local; + } else blk: { + var scope: ?*Scope = &self.scope; + while (scope) |s| : (scope = s.parent) { + if (s.locals.get(assign_var.ident.getIdent(self.source))) |local| + break :blk local; + } else return error.UnknownVariable; + }; + + const val = try self.compileExpr(assign_var.value); + try self.assignLocalVar(local, val); }, } } fn compileExpr(self: *CompileContext, expr: *const parse.Expr) !VReg { // This is not used by all expression types, but creating an unused virtual register is not a problem. - const dest = self.register(); + const dest = self.vreg_ctr.get(); switch (expr.type) { .integer_literal => try self.addInstr(.{ .loc = expr.loc, @@ -301,8 +382,8 @@ const CompileContext = struct { .identifier => { var scope: ?*Scope = &self.scope; while (scope) |s| : (scope = s.parent) { - if (s.locals.get(expr.loc.getIdent(self.source))) |reg| { - return reg; + if (s.locals.get(expr.loc.getIdent(self.source))) |local| { + return self.getLocalVar(local); } } return error.UnknownVariable; @@ -313,20 +394,21 @@ const CompileContext = struct { const after = try self.switchToNewBlock(); + const t = try self.switchToNewBlock(); try self.compileBlock(@"if".then); try self.addInstr(.{ .loc = expr.loc, .type = .{ .jump = .{ .to = after } }, }); - const t = self.current_block; const f = if (@"if".@"else") |@"else"| blk: { + const f = try self.switchToNewBlock(); try self.compileBlock(@"else"); try self.addInstr(.{ .loc = expr.loc, .type = .{ .jump = .{ .to = after } }, }); - break :blk self.current_block; + break :blk f; } else after; self.current_block = curr; @@ -334,8 +416,8 @@ const CompileContext = struct { .loc = expr.loc, .type = .{ .branch = .{ .cond = cond, - .true = t, - .false = f, + .true = .{ .to = t }, + .false = .{ .to = f }, } }, }); @@ -345,9 +427,83 @@ const CompileContext = struct { return dest; } + fn assignArguments(self: *CompileContext) !void { + var immediate_predecessors: std.AutoHashMapUnmanaged(usize, std.ArrayListUnmanaged(usize)) = .empty; + defer immediate_predecessors.deinit(self.allocator); + defer { + var it = immediate_predecessors.valueIterator(); + while (it.next()) |val| val.deinit(self.allocator); + } + + for (self.blocks.items, 0..) |block, i| { + switch (block.instrs.getLast().type) { + .branch => |branch| { + const pt = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(branch.true.to)); + if (!pt.found_existing) { + pt.value_ptr.* = .empty; + } + try pt.value_ptr.append(self.allocator, i); + + const pf = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(branch.false.to)); + if (!pf.found_existing) { + pf.value_ptr.* = .empty; + } + try pf.value_ptr.append(self.allocator, i); + }, + .jump => |jump| { + const p = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(jump.to)); + if (!p.found_existing) { + p.value_ptr.* = .empty; + } + try p.value_ptr.append(self.allocator, i); + }, + else => {}, + } + } + + var block_ids: std.ArrayListUnmanaged(usize) = .empty; + for (0..self.blocks.items.len) |i| try block_ids.append(self.allocator, i); + while (block_ids.pop()) |ref| { + const block = &self.blocks.items[ref]; + const info = &self.block_infos.items[ref]; + const ty = &block.instrs.items[block.instrs.items.len - 1].type; + const got_new_params = switch (ty.*) { + .branch => blk: { + const t = try self.assignArgumentsInner(block, info, &ty.branch.true); + const f = try self.assignArgumentsInner(block, info, &ty.branch.false); + break :blk t or f; + }, + .jump => try self.assignArgumentsInner(block, info, &ty.jump), + else => false, + }; + if (got_new_params) { + try block_ids.appendSlice(self.allocator, immediate_predecessors.get(ref).?.items); + } + } + } + + fn assignArgumentsInner(self: *CompileContext, block: *BasicBlock, info: *BasicBlock.Info, jump: *Instr.Jump) !bool { + var got_new_params = false; + var it = self.blocks.items[@intFromEnum(jump.to)].params.iterator(); + // Skip over the arguments that we've already handled in an earlier call + for (0..jump.args.items.len) |_| _ = it.next(); + while (it.next()) |ent| { + try jump.args.append(self.allocator, info.local_current_vreg.get(ent.key_ptr.*) orelse blk: { + const reg = self.vreg_ctr.get(); + try block.params.put(self.allocator, ent.key_ptr.*, reg); + try info.local_current_vreg.put(self.allocator, ent.key_ptr.*, reg); + got_new_params = true; + break :blk reg; + }); + } + + return got_new_params; + } + fn switchToNewBlock(self: *CompileContext) !BlockRef { const ref: BlockRef = @enumFromInt(self.blocks.items.len); try self.blocks.append(self.allocator, .{}); + try self.block_infos.append(self.allocator, .{}); self.current_block = ref; return ref; } @@ -357,9 +513,21 @@ const CompileContext = struct { .instrs.append(self.allocator, instr); } - fn register(self: *CompileContext) VReg { - const reg: VReg = @enumFromInt(self.register_counter); - self.register_counter += 1; - return reg; + fn getLocalVar(self: *CompileContext, local: LVar) !VReg { + if (self.block_infos.items[@intFromEnum(self.current_block)] + .local_current_vreg.get(local)) |vreg| return vreg; + + const vreg = self.vreg_ctr.get(); + try self.blocks.items[@intFromEnum(self.current_block)] + .params.put(self.allocator, local, vreg); + try self.block_infos.items[@intFromEnum(self.current_block)] + .local_current_vreg.put(self.allocator, local, vreg); + + return vreg; + } + + fn assignLocalVar(self: *CompileContext, local: LVar, vreg: VReg) !void { + try self.block_infos.items[@intFromEnum(self.current_block)] + .local_current_vreg.put(self.allocator, local, vreg); } }; |