const std = @import("std"); const Allocator = std.mem.Allocator; const root = @import("root"); const Token = root.Lexer.Token; const parse = root.parse; const Location = root.Lexer.Location; /// Virtual register pub const VReg = enum(u32) { _ }; pub const BlockRef = enum(u32) { _ }; /// Id for local variables in a procedure pub const LVar = enum(u32) { _ }; fn IdCounter(Id: type) type { const Inner = @typeInfo(Id).@"enum".tag_type; std.debug.assert(@typeInfo(Inner) == .int); return struct { counter: Inner, const init: Self = .{ .counter = 0 }; fn get(self: *Self) Id { const id: Id = @enumFromInt(self.counter); self.counter = std.math.add(Inner, self.counter, 1) catch std.debug.panic("IdCounter({}) ran out of ids!", .{Id}); return id; } const Self = @This(); }; } pub const Module = struct { procedures: []Procedure, print_block: BlockRef, read_int_block: BlockRef, exit_block: BlockRef, pub fn format(self: Module, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = .{ fmt, options }; for (self.procedures) |proc| { try writer.print("{}", .{proc}); } for ([_]struct { []const u8, BlockRef }{ .{ "print", self.print_block }, .{ "read_int", self.read_int_block }, .{ "exit", self.exit_block }, }) |builtin| { const name, const block = builtin; try writer.print("{s}:\n ${}: @builtin\n\n", .{ name, @intFromEnum(block) }); } } }; pub const Procedure = struct { name: []const u8, blocks: []BasicBlock, param_lvar: ?LVar, locals: std.AutoHashMap(LVar, void), fn init( allocator: Allocator, name: []const u8, blocks: []BasicBlock, param_lvar: ?LVar, locals: std.AutoHashMap(LVar, void), ) !Procedure { for (blocks) |*block| { try block.finalize(allocator); } return .{ .name = name, .blocks = blocks, .param_lvar = param_lvar, .locals = locals, }; } pub fn format(self: Procedure, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = .{ fmt, options }; try writer.print("{s}(", .{self.name}); if (self.param_lvar) |lvar| try writer.print("%{}", .{@intFromEnum(lvar)}); try writer.print("):\n", .{}); for (self.blocks) |block| { try writer.print("{}", .{block}); } try writer.writeAll("\n"); } }; pub const BasicBlock = struct { ref: BlockRef, instrs: std.ArrayListUnmanaged(Instr) = .empty, vreg_last_use: std.AutoHashMapUnmanaged(VReg, usize) = .empty, fn finalize(self: *BasicBlock, allocator: Allocator) !void { std.debug.assert(self.instrs.items.len > 0); std.debug.assert(switch (self.instrs.getLast().type) { inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"), }); self.vreg_last_use = .empty; for (0.., self.instrs.items) |i, instr| { for (instr.sources().slice()) |src| try self.vreg_last_use.put(allocator, src, i); if (instr.dest()) |dest| try self.vreg_last_use.put(allocator, dest, i); } } fn immediate_successors(self: *BasicBlock) std.BoundedArray(BlockRef, 2) { return switch (self.instrs.getLast().type) { .branch => |branch| std.BoundedArray(BlockRef, 2).fromSlice(&.{ branch.true.to, branch.false.to }) catch unreachable, .jump => |jump| std.BoundedArray(BlockRef, 2).fromSlice(&.{jump.to}) catch unreachable, else => .{}, }; } pub fn format(self: BasicBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = .{ fmt, options }; try writer.print(" ${}:\n", .{@intFromEnum(self.ref)}); for (self.instrs.items) |instr| { try writer.print(" {}\n", .{instr}); } } }; pub const Instr = struct { loc: Location, type: Type, pub const Type = union(enum) { constant: Constant, bin_op: BinOp, call: Call, branch: Branch, jump: Jump, assign_local: AssignLocal, get_local: GetLocal, ret: Return, }; pub const Constant = struct { dest: VReg, value: u64, pub fn sources(_: Constant) Sources { return Sources.init(0) catch unreachable; } }; pub const BinOp = struct { dest: VReg, lhs: VReg, rhs: VReg, op: Op, const Op = enum { add, sub, less_than, greater_than, less_or_equal, greater_or_equal, equals, }; pub fn sources(self: BinOp) Sources { return Sources.fromSlice(&.{ self.lhs, self.rhs }) catch unreachable; } }; pub const Call = struct { dest: VReg, arg: VReg, proc: BlockRef, pub fn sources(self: Call) Sources { return Sources.fromSlice(&.{self.arg}) catch unreachable; } }; pub const Branch = struct { cond: VReg, true: BlockRef, false: BlockRef, pub const may_end_block = {}; pub fn sources(_: Branch) Sources { return Sources.init(0) catch unreachable; } }; pub const Jump = struct { to: BlockRef, pub const may_end_block = {}; pub fn sources(_: Jump) Sources { return Sources.init(0) catch unreachable; } }; pub const AssignLocal = struct { local: LVar, val: VReg, pub fn sources(self: AssignLocal) Sources { return Sources.fromSlice(&.{self.val}) catch unreachable; } }; pub const GetLocal = struct { dest: VReg, local: LVar, pub fn sources(_: GetLocal) Sources { return Sources.init(0) catch unreachable; } }; pub const Return = struct { val: VReg, pub const may_end_block = {}; pub fn sources(self: Return) Sources { return Sources.fromSlice(&.{self.val}) catch unreachable; } }; pub fn sources(self: Instr) Sources { return switch (self.type) { inline else => |instr| instr.sources(), }; } pub fn dest(self: *const Instr) ?VReg { return switch (self.type) { inline .constant, .bin_op, .call, .get_local => |s| s.dest, .branch, .jump, .ret, .assign_local => null, }; } pub const Sources = std.BoundedArray(VReg, 2); pub fn format(self: Instr, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = .{ fmt, options }; switch (self.type) { .constant => |constant| try writer.print( "%{} = {}", .{ @intFromEnum(constant.dest), constant.value }, ), .bin_op => |bin_op| try writer.print( "%{} = %{} {s} %{}", .{ @intFromEnum(bin_op.dest), @intFromEnum(bin_op.lhs), @tagName(bin_op.op), @intFromEnum(bin_op.rhs), }, ), .call => |call| try writer.print( "%{} = call ${}(%{})", .{ @intFromEnum(call.dest), @intFromEnum(call.proc), @intFromEnum(call.arg), }, ), .branch => |branch| { try writer.print("branch %{} ? ${} : ${}", .{ @intFromEnum(branch.cond), @intFromEnum(branch.true), @intFromEnum(branch.false), }); }, .jump => |jump| { try writer.print("jump ${}", .{@intFromEnum(jump.to)}); }, .assign_local => |assign_local| { try writer.print("@{} = %{}", .{ @intFromEnum(assign_local.local), @intFromEnum(assign_local.val) }); }, .get_local => |get_local| { try writer.print("%{} = @{}", .{ @intFromEnum(get_local.dest), @intFromEnum(get_local.local) }); }, .ret => |ret| try writer.print("return %{}", .{@intFromEnum(ret.val)}), } } }; const Scope = struct { locals: std.StringHashMapUnmanaged(LVar), parent: ?*Scope, }; const CompileError = error{ OutOfMemory, CanOnlyCallIdentifiers, UnknownProcedure, UnknownVariable, CannotDefineProcedureHere, }; pub fn compile(allocator: Allocator, source: []const u8, file: parse.File) !Module { var block_ctr: IdCounter(BlockRef) = .init; var procedures: std.StringHashMapUnmanaged(BlockRef) = .empty; const first_blocks: []BlockRef = try allocator.alloc(BlockRef, file.decls.len); for (file.decls, first_blocks) |decl, *first| { const id = block_ctr.get(); try procedures.put(allocator, decl.inner.ident.getIdent(source), id); first.* = id; } const print_block = block_ctr.get(); try procedures.put(allocator, "print", print_block); const read_int_block = block_ctr.get(); try procedures.put(allocator, "read_int", read_int_block); const exit_block = block_ctr.get(); try procedures.put(allocator, "exit", exit_block); var ctx: Context = .{ .allocator = allocator, .source = source, .block_ctr = block_ctr, .procedures = procedures, }; const procs = try allocator.alloc(Procedure, file.decls.len); for (procs, file.decls, first_blocks) |*proc, decl, first_block| { proc.* = try compileProcedure( &ctx, decl.inner.ident, decl.inner.value.type.proc, first_block, ); } return .{ .procedures = procs, .print_block = print_block, .read_int_block = read_int_block, .exit_block = exit_block, }; } const Context = struct { allocator: Allocator, source: []const u8, block_ctr: IdCounter(BlockRef), procedures: std.StringHashMapUnmanaged(BlockRef), }; fn compileProcedure( ctx: *Context, name: Location, proc: parse.Expr.Type.Proc, first_block: BlockRef, ) !Procedure { var pctx: ProcedureContext = .{ .ctx = ctx, .vreg_ctr = .init, .lvar_ctr = .init, .scope = .{ .locals = .empty, .parent = null }, .locals = .empty, .blocks = try .init(ctx.allocator, &.{first_block}, &.{.{ .ref = first_block }}), .current_block = first_block, }; const param_lvar = if (proc.param) |param| blk: { const param_name = param.getIdent(ctx.source); const lvar = pctx.lvar_ctr.get(); try pctx.scope.locals.putNoClobber(ctx.allocator, param_name, lvar); try pctx.locals.putNoClobber(ctx.allocator, lvar, {}); break :blk lvar; } else null; try pctx.compileBlock(proc.body); const proc_res = pctx.vreg_ctr.get(); try pctx.addInstr(.{ .loc = .{ .start = 0, .end = 0 }, .type = .{ .constant = .{ .dest = proc_res, .value = 0 } }, }); try pctx.addInstr(.{ .loc = .{ .start = 0, .end = 0 }, .type = .{ .ret = .{ .val = proc_res } }, }); var blocks = try ctx.allocator.alloc(BasicBlock, pctx.blocks.count()); var it = pctx.blocks.iterator(); var i: usize = 0; while (it.next()) |kv| : (i += 1) { std.debug.assert(kv.key_ptr.* == kv.value_ptr.ref); blocks[i] = kv.value_ptr.*; } return try .init( ctx.allocator, name.getIdent(ctx.source), blocks, param_lvar, pctx.locals.promote(ctx.allocator), ); } const ProcedureContext = struct { ctx: *Context, vreg_ctr: IdCounter(VReg), lvar_ctr: IdCounter(LVar), scope: Scope, locals: std.AutoHashMapUnmanaged(LVar, void), blocks: std.AutoArrayHashMapUnmanaged(BlockRef, BasicBlock), current_block: BlockRef, fn compileBlock(self: *ProcedureContext, block: parse.Block) !void { var parent = self.scope; self.scope = .{ .locals = .empty, .parent = &parent, }; for (block.stmts) |stmt| { try self.compileStmt(stmt); } self.scope.locals.deinit(self.ctx.allocator); self.scope = parent; } fn compileStmt(self: *ProcedureContext, stmt: parse.Stmt) CompileError!void { switch (stmt.type) { .expr => |expr| _ = try self.compileExpr(expr), .block => |block| { try self.compileBlock(block); }, .assign_var => |assign_var| { const local = if (assign_var.is_decl) blk: { const local = self.lvar_ctr.get(); const name = assign_var.ident.getIdent(self.ctx.source); try self.scope.locals.put(self.ctx.allocator, name, local); try self.locals.put(self.ctx.allocator, local, {}); break :blk local; } else blk: { var scope: ?*Scope = &self.scope; while (scope) |s| : (scope = s.parent) { if (s.locals.get(assign_var.ident.getIdent(self.ctx.source))) |local| break :blk local; } else { std.log.debug("{s}", .{assign_var.ident.getIdent(self.ctx.source)}); return error.UnknownVariable; } }; const val = try self.compileExpr(assign_var.value); try self.addInstr(.{ .loc = stmt.loc, .type = .{ .assign_local = .{ .local = local, .val = val, } }, }); }, .@"while" => |@"while"| { const curr = self.current_block; const cond_block = try self.switchToNewBlock(); self.current_block = curr; try self.addInstr(.{ .loc = stmt.loc, .type = .{ .jump = .{ .to = cond_block } }, }); const do = try self.switchToNewBlock(); const after = try self.switchToNewBlock(); self.current_block = cond_block; const cond = try self.compileExpr(@"while".cond); try self.addInstr(.{ .loc = stmt.loc, .type = .{ .branch = .{ .cond = cond, .true = do, .false = after, } }, }); self.current_block = do; try self.compileBlock(@"while".do); try self.addInstr(.{ .loc = stmt.loc, .type = .{ .jump = .{ .to = cond_block } }, }); self.current_block = after; }, } } fn compileExpr(self: *ProcedureContext, expr: *const parse.Expr) !VReg { // This is not used by all expression types, but creating an unused virtual register is not a problem. const dest = self.vreg_ctr.get(); switch (expr.type) { .integer_literal => try self.addInstr(.{ .loc = expr.loc, .type = .{ .constant = .{ .dest = dest, .value = expr.loc.getInt(self.ctx.source) } }, }), .bin_op => |binop| { const lhs = try self.compileExpr(binop.lhs); const rhs = try self.compileExpr(binop.rhs); try self.addInstr(.{ .loc = expr.loc, .type = .{ .bin_op = .{ .dest = dest, .lhs = lhs, .rhs = rhs, .op = switch (binop.op) { .plus => .add, .minus => .sub, .left_angle => .less_than, .right_angle => .greater_than, .left_angle_equal => .less_or_equal, .right_angle_equal => .greater_or_equal, .equal_equal => .equals, }, }, }, }); }, .call => |call| { if (call.proc.type != .identifier) return error.CanOnlyCallIdentifiers; const proc = self.ctx.procedures.get(call.proc.loc.getIdent(self.ctx.source)) orelse return error.UnknownProcedure; const arg = try self.compileExpr(call.arg); try self.addInstr(.{ .loc = expr.loc, .type = .{ .call = .{ .dest = dest, .arg = arg, .proc = proc, } }, }); }, .identifier => { const ident = expr.loc.getIdent(self.ctx.source); var scope: ?*Scope = &self.scope; const local: LVar = blk: { while (scope) |s| : (scope = s.parent) { if (s.locals.get(ident)) |local| { break :blk local; } } std.log.debug("{s}", .{expr.loc.getIdent(self.ctx.source)}); return error.UnknownVariable; }; try self.addInstr(.{ .loc = expr.loc, .type = .{ .get_local = .{ .dest = dest, .local = local, } }, }); }, .@"if" => |@"if"| { const cond = try self.compileExpr(@"if".cond); const curr = self.current_block; const after = try self.switchToNewBlock(); const t = try self.switchToNewBlock(); try self.compileBlock(@"if".then); try self.addInstr(.{ .loc = expr.loc, .type = .{ .jump = .{ .to = after } }, }); const f = if (@"if".@"else") |@"else"| blk: { const f = try self.switchToNewBlock(); try self.compileBlock(@"else"); try self.addInstr(.{ .loc = expr.loc, .type = .{ .jump = .{ .to = after } }, }); break :blk f; } else after; self.current_block = curr; try self.addInstr(.{ .loc = expr.loc, .type = .{ .branch = .{ .cond = cond, .true = t, .false = f, } }, }); self.current_block = after; }, .proc => |proc| { _ = proc; return error.CannotDefineProcedureHere; }, .@"return" => |ret| { const val = try self.compileExpr(ret.value); try self.addInstr(.{ .loc = expr.loc, .type = .{ .ret = .{ .val = val } }, }); }, } return dest; } fn switchToNewBlock(self: *ProcedureContext) !BlockRef { const ref = self.ctx.block_ctr.get(); try self.blocks.putNoClobber(self.ctx.allocator, ref, .{ .ref = ref }); self.current_block = ref; return ref; } fn addInstr(self: *ProcedureContext, instr: Instr) !void { try self.blocks.getPtr(self.current_block).? .instrs.append(self.ctx.allocator, instr); } };