const std = @import("std"); const Allocator = std.mem.Allocator; const root = @import("root"); const Token = root.Lexer.Token; const parse = root.parse; const Location = root.Lexer.Location; /// Virtual register pub const VReg = enum(u32) { _ }; pub const BlockRef = enum(u32) { _ }; /// Id for local variables in a procedure pub const LVar = enum(u32) { _ }; fn IdCounter(Id: type) type { const Inner = @typeInfo(Id).@"enum".tag_type; std.debug.assert(@typeInfo(Inner) == .int); return struct { counter: Inner, const init: Self = .{ .counter = 0 }; fn get(self: *Self) Id { const id: Id = @enumFromInt(self.counter); self.counter = std.math.add(Inner, self.counter, 1) catch std.debug.panic("IdCounter({}) ran out of ids!", .{Id}); return id; } const Self = @This(); }; } pub const Instr = struct { loc: Location, type: Type, pub const Type = union(enum) { constant: Constant, bin_op: BinOp, proc_call: ProcCall, branch: Branch, jump: Jump, exit: Exit, }; pub const Constant = struct { dest: VReg, value: u64, pub fn sources(_: Constant) Sources { return Sources.init(0) catch unreachable; } }; pub const BinOp = struct { dest: VReg, lhs: VReg, rhs: VReg, op: Op, const Op = enum { add, sub, }; pub fn sources(self: BinOp) Sources { return Sources.fromSlice(&.{ self.lhs, self.rhs }) catch unreachable; } }; pub const ProcCall = struct { dest: VReg, arg: VReg, proc: Proc, const Proc = enum { print, read_int, }; pub fn sources(self: ProcCall) Sources { return Sources.fromSlice(&.{self.arg}) catch unreachable; } }; pub const Branch = struct { cond: VReg, true: Jump, false: Jump, pub const may_end_block = {}; pub fn sources(_: Branch) Sources { @panic("Sources not implemented for Branch!!!"); // return Sources.fromSlice(&.{self.cond}) catch unreachable; } }; pub const Jump = struct { to: BlockRef, args: std.ArrayListUnmanaged(VReg) = .empty, pub const may_end_block = {}; pub fn sources(self: Jump) Sources { return Sources.fromSlice(self.args) catch unreachable; } }; pub const Exit = struct { pub const may_end_block = {}; pub fn sources(_: Exit) Sources { return Sources.init(0) catch unreachable; } }; pub fn sources(self: Instr) Sources { return switch (self.type) { inline else => |instr| instr.sources(), }; } pub fn dest(self: *const Instr) ?VReg { return switch (self.type) { inline .constant, .bin_op, .proc_call => |s| s.dest, .branch, .jump, .exit => null, }; } pub const Sources = std.BoundedArray(VReg, 2); pub fn format(self: Instr, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = .{ fmt, options }; switch (self.type) { .constant => |constant| try writer.print( "%{} = {}", .{ @intFromEnum(constant.dest), constant.value }, ), .bin_op => |bin_op| try writer.print( "%{} = %{} {s} %{}", .{ @intFromEnum(bin_op.dest), @intFromEnum(bin_op.lhs), @tagName(bin_op.op), @intFromEnum(bin_op.rhs), }, ), .proc_call => |proc_call| try writer.print( "%{} = {s} %{}", .{ @intFromEnum(proc_call.dest), @tagName(proc_call.proc), @intFromEnum(proc_call.arg), }, ), .branch => |branch| { try writer.print("branch %{} ? ${}(", .{ @intFromEnum(branch.cond), @intFromEnum(branch.true.to), }); for (branch.true.args.items, 0..) |arg, i| { try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); } try writer.print(") : ${}(", .{@intFromEnum(branch.false.to)}); for (branch.false.args.items, 0..) |arg, i| { try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); } try writer.print(")", .{}); }, .jump => |jump| { try writer.print("jump ${}(", .{@intFromEnum(jump.to)}); for (jump.args.items, 0..) |arg, i| { try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) }); } try writer.print(")", .{}); }, .exit => |_| try writer.print("exit", .{}), } } }; pub const Procedure = struct { blocks: []BasicBlock, fn init(blocks: []BasicBlock) !Procedure { // for (blocks) |*block| { // try block.finalize(allocator); // } return .{ .blocks = blocks }; } pub fn format(self: Procedure, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = .{ fmt, options }; for (self.blocks, 0..) |block, i| { try writer.print("${}{}", .{ i, block }); } } }; pub const BasicBlock = struct { params: std.AutoArrayHashMapUnmanaged(LVar, VReg) = .empty, instrs: std.ArrayListUnmanaged(Instr) = .empty, vreg_last_use: std.AutoHashMapUnmanaged(VReg, usize) = .empty, fn finalize(self: *BasicBlock, allocator: Allocator) !void { std.debug.assert(self.instrs.items > 0); std.debug.assert(switch (self.instrs.getLast().type) { inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"), }); self.vreg_last_use = .empty; for (0.., self.instrs.items) |i, instr| { for (instr.sources().slice()) |src| try self.vreg_last_use.put(allocator, src, i); if (instr.dest()) |dest| try self.vreg_last_use.put(allocator, dest, i); } } fn immediate_successors(self: *BasicBlock) std.BoundedArray(BlockRef, 2) { return switch (self.instrs.getLast().type) { .branch => |branch| std.BoundedArray(BlockRef, 2).fromSlice(&.{ branch.true.to, branch.false.to }) catch unreachable, .jump => |jump| std.BoundedArray(BlockRef, 2).fromSlice(&.{jump.to}) catch unreachable, else => .{}, }; } pub fn format(self: BasicBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = .{ fmt, options }; try writer.print("(", .{}); { var it = self.params.iterator(); var first = true; while (it.next()) |ent| { try writer.print("{s}%{}", .{ if (first) "" else ", ", @intFromEnum(ent.value_ptr.*) }); first = false; } } try writer.print("):\n", .{}); for (self.instrs.items) |instr| { try writer.print(" {}\n", .{instr}); } } const Info = struct { local_current_vreg: std.AutoArrayHashMapUnmanaged(LVar, VReg) = .empty, }; }; pub fn compile(allocator: Allocator, source: []const u8, block: parse.Block) !Procedure { var ctx: CompileContext = .{ .allocator = allocator, .source = source, .vreg_ctr = .init, .lvar_ctr = .init, .scope = .{ .locals = .empty, .parent = null }, .blocks = .empty, .block_infos = .empty, .current_block = @enumFromInt(0), }; _ = try ctx.switchToNewBlock(); try ctx.compileBlock(block); try ctx.addInstr(.{ .loc = .{ .start = 0, .end = 0 }, .type = .{ .exit = .{} }, }); try ctx.assignArguments(); return try .init(try ctx.blocks.toOwnedSlice(allocator)); } const Scope = struct { locals: std.StringHashMapUnmanaged(LVar), parent: ?*Scope, }; const CompileError = error{ OutOfMemory, CanOnlyCallIdentifiers, UnknownProcedure, UnknownVariable, }; const CompileContext = struct { allocator: Allocator, source: []const u8, vreg_ctr: IdCounter(VReg), lvar_ctr: IdCounter(LVar), scope: Scope, blocks: std.ArrayListUnmanaged(BasicBlock), block_infos: std.ArrayListUnmanaged(BasicBlock.Info), current_block: BlockRef, fn compileBlock(self: *CompileContext, block: parse.Block) !void { var parent = self.scope; self.scope = .{ .locals = .empty, .parent = &parent, }; for (block.stmts) |stmt| { try self.compileStmt(stmt); } self.scope.locals.deinit(self.allocator); self.scope = parent; } fn compileStmt(self: *CompileContext, stmt: parse.Stmt) CompileError!void { switch (stmt.type) { .expr => |expr| _ = try self.compileExpr(expr), .block => |block| { try self.compileBlock(block); }, .assign_var => |assign_var| { const local = if (assign_var.is_decl) blk: { const local = self.lvar_ctr.get(); const name = assign_var.ident.getIdent(self.source); try self.scope.locals.put(self.allocator, name, local); break :blk local; } else blk: { var scope: ?*Scope = &self.scope; while (scope) |s| : (scope = s.parent) { if (s.locals.get(assign_var.ident.getIdent(self.source))) |local| break :blk local; } else return error.UnknownVariable; }; const val = try self.compileExpr(assign_var.value); try self.assignLocalVar(local, val); }, .@"while" => |@"while"| { const curr = self.current_block; const cond_block = try self.switchToNewBlock(); self.current_block = curr; try self.addInstr(.{ .loc = stmt.loc, .type = .{ .jump = .{ .to = cond_block } }, }); const do = try self.switchToNewBlock(); const after = try self.switchToNewBlock(); self.current_block = cond_block; const cond = try self.compileExpr(@"while".cond); try self.addInstr(.{ .loc = stmt.loc, .type = .{ .branch = .{ .cond = cond, .true = .{ .to = do }, .false = .{ .to = after }, } }, }); self.current_block = do; try self.compileBlock(@"while".do); try self.addInstr(.{ .loc = stmt.loc, .type = .{ .jump = .{ .to = cond_block } }, }); self.current_block = after; }, } } fn compileExpr(self: *CompileContext, expr: *const parse.Expr) !VReg { // This is not used by all expression types, but creating an unused virtual register is not a problem. const dest = self.vreg_ctr.get(); switch (expr.type) { .integer_literal => try self.addInstr(.{ .loc = expr.loc, .type = .{ .constant = .{ .dest = dest, .value = expr.loc.getInt(self.source) } }, }), .bin_op => |binop| { const lhs = try self.compileExpr(binop.lhs); const rhs = try self.compileExpr(binop.rhs); try self.addInstr(.{ .loc = expr.loc, .type = .{ .bin_op = .{ .dest = dest, .lhs = lhs, .rhs = rhs, .op = switch (binop.op) { .plus => .add, .minus => .sub, }, }, }, }); }, .call => |call| { if (call.proc.type != .identifier) return error.CanOnlyCallIdentifiers; const proc = std.meta.stringToEnum(Instr.ProcCall.Proc, call.proc.loc.getIdent(self.source)) orelse return error.UnknownProcedure; const arg = try self.compileExpr(call.arg); try self.addInstr(.{ .loc = expr.loc, .type = .{ .proc_call = .{ .dest = dest, .arg = arg, .proc = proc, } }, }); }, .identifier => { var scope: ?*Scope = &self.scope; while (scope) |s| : (scope = s.parent) { if (s.locals.get(expr.loc.getIdent(self.source))) |local| { return self.getLocalVar(local); } } return error.UnknownVariable; }, .@"if" => |@"if"| { const cond = try self.compileExpr(@"if".cond); const curr = self.current_block; const after = try self.switchToNewBlock(); const t = try self.switchToNewBlock(); try self.compileBlock(@"if".then); try self.addInstr(.{ .loc = expr.loc, .type = .{ .jump = .{ .to = after } }, }); const f = if (@"if".@"else") |@"else"| blk: { const f = try self.switchToNewBlock(); try self.compileBlock(@"else"); try self.addInstr(.{ .loc = expr.loc, .type = .{ .jump = .{ .to = after } }, }); break :blk f; } else after; self.current_block = curr; try self.addInstr(.{ .loc = expr.loc, .type = .{ .branch = .{ .cond = cond, .true = .{ .to = t }, .false = .{ .to = f }, } }, }); self.current_block = after; }, } return dest; } fn assignArguments(self: *CompileContext) !void { var immediate_predecessors: std.AutoHashMapUnmanaged(usize, std.ArrayListUnmanaged(usize)) = .empty; defer immediate_predecessors.deinit(self.allocator); defer { var it = immediate_predecessors.valueIterator(); while (it.next()) |val| val.deinit(self.allocator); } for (self.blocks.items, 0..) |block, i| { switch (block.instrs.getLast().type) { .branch => |branch| { const pt = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(branch.true.to)); if (!pt.found_existing) { pt.value_ptr.* = .empty; } try pt.value_ptr.append(self.allocator, i); const pf = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(branch.false.to)); if (!pf.found_existing) { pf.value_ptr.* = .empty; } try pf.value_ptr.append(self.allocator, i); }, .jump => |jump| { const p = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(jump.to)); if (!p.found_existing) { p.value_ptr.* = .empty; } try p.value_ptr.append(self.allocator, i); }, else => {}, } } var block_ids: std.ArrayListUnmanaged(usize) = .empty; for (0..self.blocks.items.len) |i| try block_ids.append(self.allocator, i); while (block_ids.pop()) |ref| { const block = &self.blocks.items[ref]; const info = &self.block_infos.items[ref]; const ty = &block.instrs.items[block.instrs.items.len - 1].type; const got_new_params = switch (ty.*) { .branch => blk: { const t = try self.assignArgumentsInner(block, info, &ty.branch.true); const f = try self.assignArgumentsInner(block, info, &ty.branch.false); break :blk t or f; }, .jump => try self.assignArgumentsInner(block, info, &ty.jump), else => false, }; if (got_new_params) { try block_ids.appendSlice(self.allocator, immediate_predecessors.get(ref).?.items); } } } fn assignArgumentsInner(self: *CompileContext, block: *BasicBlock, info: *BasicBlock.Info, jump: *Instr.Jump) !bool { var got_new_params = false; var it = self.blocks.items[@intFromEnum(jump.to)].params.iterator(); // Skip over the arguments that we've already handled in an earlier call for (0..jump.args.items.len) |_| _ = it.next(); while (it.next()) |ent| { try jump.args.append(self.allocator, info.local_current_vreg.get(ent.key_ptr.*) orelse blk: { const reg = self.vreg_ctr.get(); try block.params.put(self.allocator, ent.key_ptr.*, reg); try info.local_current_vreg.put(self.allocator, ent.key_ptr.*, reg); got_new_params = true; break :blk reg; }); } return got_new_params; } fn switchToNewBlock(self: *CompileContext) !BlockRef { const ref: BlockRef = @enumFromInt(self.blocks.items.len); try self.blocks.append(self.allocator, .{}); try self.block_infos.append(self.allocator, .{}); self.current_block = ref; return ref; } fn addInstr(self: *CompileContext, instr: Instr) !void { try self.blocks.items[@intFromEnum(self.current_block)] .instrs.append(self.allocator, instr); } fn getLocalVar(self: *CompileContext, local: LVar) !VReg { if (self.block_infos.items[@intFromEnum(self.current_block)] .local_current_vreg.get(local)) |vreg| return vreg; const vreg = self.vreg_ctr.get(); try self.blocks.items[@intFromEnum(self.current_block)] .params.put(self.allocator, local, vreg); try self.block_infos.items[@intFromEnum(self.current_block)] .local_current_vreg.put(self.allocator, local, vreg); return vreg; } fn assignLocalVar(self: *CompileContext, local: LVar, vreg: VReg) !void { try self.block_infos.items[@intFromEnum(self.current_block)] .local_current_vreg.put(self.allocator, local, vreg); } };