diff options
-rw-r--r-- | src/Lexer.zig | 4 | ||||
-rw-r--r-- | src/codegen.zig | 97 | ||||
-rw-r--r-- | src/compile.zig | 210 | ||||
-rw-r--r-- | src/main.zig | 25 | ||||
-rw-r--r-- | src/parse.zig | 43 |
5 files changed, 316 insertions, 63 deletions
diff --git a/src/Lexer.zig b/src/Lexer.zig index cb146c7..f59737e 100644 --- a/src/Lexer.zig +++ b/src/Lexer.zig @@ -19,6 +19,8 @@ pub const Token = struct { // Keywords let, + @"if", + @"else", }; }; @@ -120,7 +122,7 @@ fn identifierOrKeyword(self: *Self) Token { } const value = self.source[self.start..self.pos]; return self.create(switch (std.meta.stringToEnum(Token.Type, value) orelse .invalid) { - .let => .let, + .let, .@"if", .@"else" => |t| t, else => .identifier, }); } diff --git a/src/codegen.zig b/src/codegen.zig index 6b5fb1f..d30026f 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -544,9 +544,16 @@ const RegisterAllocator = struct { } }; +const Relocation = struct { + instr: usize, + target: compile.BlockRef, +}; + const Context = struct { register_allocator: RegisterAllocator, instructions: std.ArrayList(Instruction), + relocations: std.ArrayList(Relocation), + block_starts: std.ArrayList(usize), // Current stuff that changes often, basically here to avoid prop drilling. block: ?*const compile.BasicBlock = null, @@ -557,6 +564,13 @@ const Context = struct { self.instructions.deinit(); } + fn addRelocation(self: *Context, target: compile.BlockRef) !void { + try self.relocations.append(.{ + .instr = self.instructions.items.len, + .target = target, + }); + } + fn emit(self: *Context, inst: Instruction) !void { try self.instructions.append(inst); } @@ -567,12 +581,14 @@ const Context = struct { /// collide with the sources. Should be called before allocating results to allow for more /// register re-use. fn freeUnusedVRegs(self: *Context) !void { - var it = self.register_allocator.allocated.keyIterator(); - while (it.next()) |vreg| { - if (self.block.?.vreg_last_use.get(vreg.*).? <= self.current_instruction_index.?) { - self.register_allocator.free(vreg.*); - } - } + // TODO: make this do stuff again. + _ = self; + // var it = self.register_allocator.allocated.keyIterator(); + // while (it.next()) |vreg| { + // if (self.block.?.vreg_last_use.get(vreg.*).? <= self.current_instruction_index.?) { + // self.register_allocator.free(vreg.*); + // } + // } } fn genConstantInner(self: *Context, reg: Register, value: u64) !void { @@ -701,6 +717,28 @@ const Context = struct { } } + fn genBranch(self: *Context, branch: compile.Instr.Branch) !void { + const cond = self.register_allocator.get(branch.cond); + try self.freeUnusedVRegs(); + + try self.addRelocation(branch.false); + try self.emit(.beq(cond, .zero, 0)); + + try self.addRelocation(branch.true); + try self.emit(.jal(.zero, 0)); + } + + fn genJump(self: *Context, jump: compile.Instr.Jump) !void { + try self.addRelocation(jump.to); + try self.emit(.jal(.zero, 0)); + } + + fn genExit(self: *Context, _: compile.Instr.Exit) !void { + try self.emit(.addi(.a0, .zero, 0)); + try self.emit(.addi(.a7, .zero, 93)); + try self.emit(.ecall()); + } + fn codegenInstr(self: *Context, instr: compile.Instr) !void { switch (instr.type) { inline else => |ty| { @@ -725,24 +763,51 @@ const Context = struct { fn codegenBlock(self: *Context, block: compile.BasicBlock) !void { self.block = █ defer self.block = null; - for (block.instrs, 0..) |instr, i| { + for (block.instrs.items, 0..) |instr, i| { self.current_instruction_index = i; try self.codegenInstr(instr); } } + + fn codegenProc(self: *Context, proc: compile.Procedure) !void { + for (proc.blocks) |block| { + try self.block_starts.append(self.instructions.items.len); + try self.codegenBlock(block); + } + } }; -pub fn create_elf(allocator: Allocator, block: compile.BasicBlock) ![]u8 { - var ctx: Context = .{ .register_allocator = try .init(allocator), .instructions = .init(allocator) }; +pub fn create_elf(allocator: Allocator, proc: compile.Procedure) ![]u8 { + var ctx: Context = .{ + .register_allocator = try .init(allocator), + .instructions = .init(allocator), + .relocations = .init(allocator), + .block_starts = .init(allocator), + }; defer ctx.deinit(); - try ctx.codegenBlock(block); - - try ctx.instructions.appendSlice(&[_]Instruction{ - .addi(.a0, .zero, 0), - .addi(.a7, .zero, 93), - .ecall(), - }); + try ctx.codegenProc(proc); + + // TODO: make this less sheiße + for (ctx.relocations.items) |relocation| { + const instr = &ctx.instructions.items[relocation.instr]; + const opcode: Instruction.Opcode = @truncate(@as(u32, @bitCast(instr.*))); + const target: isize = @intCast(ctx.block_starts.items[@intFromEnum(relocation.target)]); + const from: isize = @intCast(relocation.instr); + switch (opcode) { + 0b1101111 => { + const jal: Instruction.J = instr.j; + instr.* = .jal(jal.rd, @intCast((target - from) * 4)); + }, + 0b1100011 => { + const b: Instruction.B = instr.b; + if (b.funct3 != 0) + std.debug.panic("Not yet implemented instruction with relocation\n", .{}); + instr.* = .beq(b.rs1, b.rs2, @intCast((target - from) * 4)); + }, + else => std.debug.panic("Not yet implemented instruction with relocation\n", .{}), + } + } std.debug.print("allocated regs: {}\n", .{root.fmtHashMap(ctx.register_allocator.allocated)}); diff --git a/src/compile.zig b/src/compile.zig index 5d57e71..0918d30 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -6,6 +6,7 @@ const parse = root.parse; const Location = root.Lexer.Location; pub const VReg = enum(u32) { _ }; +pub const BlockRef = enum(u32) { _ }; pub const Instr = struct { loc: Location, @@ -15,6 +16,9 @@ pub const Instr = struct { constant: Constant, bin_op: BinOp, proc_call: ProcCall, + branch: Branch, + jump: Jump, + exit: Exit, }; pub const Constant = struct { @@ -57,6 +61,30 @@ pub const Instr = struct { } }; + pub const Branch = struct { + cond: VReg, + true: BlockRef, + false: BlockRef, + + pub fn sources(self: Branch) Sources { + return Sources.fromSlice(&.{self.cond}) catch unreachable; + } + }; + + pub const Jump = struct { + to: BlockRef, + + pub fn sources(_: Jump) Sources { + return Sources.init(0) catch unreachable; + } + }; + + pub const Exit = struct { + pub fn sources(_: Exit) Sources { + return Sources.init(0) catch unreachable; + } + }; + pub fn sources(self: Instr) Sources { return switch (self.type) { inline else => |instr| instr.sources(), @@ -66,43 +94,94 @@ pub const Instr = struct { pub fn dest(self: *const Instr) ?VReg { return switch (self.type) { inline .constant, .bin_op, .proc_call => |s| s.dest, + .branch, .jump, .exit => null, }; } pub const Sources = std.BoundedArray(VReg, 2); + + pub fn format(self: Instr, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = .{ fmt, options }; + + switch (self.type) { + .constant => |constant| try writer.print( + "%{} = {}", + .{ @intFromEnum(constant.dest), constant.value }, + ), + .bin_op => |bin_op| try writer.print( + "%{} = %{} {s} %{}", + .{ + @intFromEnum(bin_op.dest), + @intFromEnum(bin_op.lhs), + @tagName(bin_op.op), + @intFromEnum(bin_op.rhs), + }, + ), + .proc_call => |proc_call| try writer.print( + "%{} = {s} %{}", + .{ + @intFromEnum(proc_call.dest), + @tagName(proc_call.proc), + @intFromEnum(proc_call.arg), + }, + ), + .branch => |branch| try writer.print( + "branch %{} ? ${} : ${}", + .{ + @intFromEnum(branch.cond), + @intFromEnum(branch.true), + @intFromEnum(branch.false), + }, + ), + .jump => |jump| try writer.print("jump ${}", .{@intFromEnum(jump.to)}), + .exit => |_| try writer.print("exit", .{}), + } + } +}; + +pub const Procedure = struct { + blocks: []BasicBlock, + + fn init(allocator: Allocator, blocks: []BasicBlock) !Procedure { + for (blocks) |*block| { + try block.finalize(allocator); + } + return .{ .blocks = blocks }; + } }; pub const BasicBlock = struct { // arguments: []Reg, - instrs: []Instr, - vreg_last_use: std.AutoHashMap(VReg, usize), + instrs: std.ArrayListUnmanaged(Instr) = .empty, - fn init(allocator: Allocator, instrs: []Instr) !BasicBlock { - var vreg_last_use: std.AutoHashMap(VReg, usize) = .init(allocator); - for (0.., instrs) |i, instr| { + vreg_last_use: std.AutoHashMapUnmanaged(VReg, usize) = .empty, + + fn finalize(self: *BasicBlock, allocator: Allocator) !void { + self.vreg_last_use = .empty; + for (0.., self.instrs.items) |i, instr| { for (instr.sources().slice()) |src| - try vreg_last_use.put(src, i); + try self.vreg_last_use.put(allocator, src, i); if (instr.dest()) |dest| - try vreg_last_use.put(dest, i); + try self.vreg_last_use.put(allocator, dest, i); } - return .{ - .instrs = instrs, - .vreg_last_use = vreg_last_use, - }; } }; -pub fn compile(allocator: Allocator, source: []const u8, block: parse.Block) !BasicBlock { - const instrs: std.ArrayListUnmanaged(Instr) = try .initCapacity(allocator, 0); +pub fn compile(allocator: Allocator, source: []const u8, block: parse.Block) !Procedure { var ctx: CompileContext = .{ .allocator = allocator, .source = source, .register_counter = 0, .scope = .{ .locals = .empty, .parent = null }, - .instrs = instrs, + .blocks = .empty, + .current_block = @enumFromInt(0), }; try ctx.compileBlock(block); - return .init(allocator, ctx.instrs.items); + try ctx.addInstr(.{ + .loc = .{ .start = 0, .end = 0 }, + .type = .{ .exit = .{} }, + }); + return try .init(allocator, try ctx.blocks.toOwnedSlice(allocator)); } const CompileError = error{ @@ -117,38 +196,51 @@ const CompileContext = struct { source: []const u8, register_counter: u32, scope: Scope, - instrs: std.ArrayListUnmanaged(Instr), + blocks: std.ArrayListUnmanaged(BasicBlock), + current_block: BlockRef, + // instrs: std.ArrayListUnmanaged(Instr), const Scope = struct { locals: std.StringHashMapUnmanaged(VReg), parent: ?*Scope, }; - const Self = @This(); - - fn addInstr(self: *Self, instr: Instr) !void { - try self.instrs.append(self.allocator, instr); - } + fn compileBlock(self: *CompileContext, block: parse.Block) !void { + _ = try self.switchToNewBlock(); - fn compileBlock(self: *Self, block: parse.Block) !void { - const parent = try self.allocator.create(Scope); - defer self.allocator.destroy(parent); - parent.* = self.scope; + var parent = self.scope; self.scope = .{ .locals = .empty, - .parent = parent, + .parent = &parent, }; + for (block.stmts) |stmt| { try self.compileStmt(stmt); } + self.scope.locals.deinit(self.allocator); - self.scope = parent.*; + self.scope = parent; } - fn compileStmt(self: *Self, stmt: parse.Stmt) CompileError!void { + fn compileStmt(self: *CompileContext, stmt: parse.Stmt) CompileError!void { switch (stmt.type) { .expr => |expr| _ = try self.compileExpr(expr), - .block => |block| try self.compileBlock(block), + .block => |block| { + const curr = self.current_block; + const after = try self.switchToNewBlock(); + try self.compileBlock(block); + const b = self.current_block; + try self.addInstr(.{ + .loc = stmt.loc, + .type = .{ .jump = .{ .to = after } }, + }); + self.current_block = curr; + try self.addInstr(.{ + .loc = stmt.loc, + .type = .{ .jump = .{ .to = b } }, + }); + self.current_block = after; + }, .declare_var => |declare_var| { const val = try self.compileExpr(declare_var.value); const name = declare_var.ident.getIdent(self.source); @@ -157,18 +249,18 @@ const CompileContext = struct { } } - fn compileExpr(self: *Self, expr: *const parse.Expr) !VReg { + fn compileExpr(self: *CompileContext, expr: *const parse.Expr) !VReg { // This is not used by all expression types, but creating an unused virtual register is not a problem. const dest = self.register(); switch (expr.type) { - .integer_literal => try addInstr(self, .{ + .integer_literal => try self.addInstr(.{ .loc = expr.loc, .type = .{ .constant = .{ .dest = dest, .value = expr.loc.getInt(self.source) } }, }), .bin_op => |binop| { const lhs = try self.compileExpr(binop.lhs); const rhs = try self.compileExpr(binop.rhs); - try addInstr(self, .{ + try self.addInstr(.{ .loc = expr.loc, .type = .{ .bin_op = .{ @@ -190,7 +282,11 @@ const CompileContext = struct { const arg = try self.compileExpr(call.arg); try self.addInstr(.{ .loc = expr.loc, - .type = .{ .proc_call = .{ .dest = dest, .arg = arg, .proc = proc } }, + .type = .{ .proc_call = .{ + .dest = dest, + .arg = arg, + .proc = proc, + } }, }); }, .identifier => { @@ -202,11 +298,57 @@ const CompileContext = struct { } return error.UnknownVariable; }, + .@"if" => |@"if"| { + const cond = try self.compileExpr(@"if".cond); + const curr = self.current_block; + + const after = try self.switchToNewBlock(); + + try self.compileBlock(@"if".then); + try self.addInstr(.{ + .loc = expr.loc, + .type = .{ .jump = .{ .to = after } }, + }); + const t = self.current_block; + + const f = if (@"if".@"else") |@"else"| blk: { + try self.compileBlock(@"else"); + try self.addInstr(.{ + .loc = expr.loc, + .type = .{ .jump = .{ .to = after } }, + }); + break :blk self.current_block; + } else after; + + self.current_block = curr; + try self.addInstr(.{ + .loc = expr.loc, + .type = .{ .branch = .{ + .cond = cond, + .true = t, + .false = f, + } }, + }); + + self.current_block = after; + }, } return dest; } - fn register(self: *Self) VReg { + fn switchToNewBlock(self: *CompileContext) !BlockRef { + const ref: BlockRef = @enumFromInt(self.blocks.items.len); + try self.blocks.append(self.allocator, .{}); + self.current_block = ref; + return ref; + } + + fn addInstr(self: *CompileContext, instr: Instr) !void { + try self.blocks.items[@intFromEnum(self.current_block)] + .instrs.append(self.allocator, instr); + } + + fn register(self: *CompileContext) VReg { const reg: VReg = @enumFromInt(self.register_counter); self.register_counter += 1; return reg; diff --git a/src/main.zig b/src/main.zig index 7af0885..48637c2 100644 --- a/src/main.zig +++ b/src/main.zig @@ -38,11 +38,13 @@ pub fn main() !void { const source = \\{ - \\ let x = 69 - \\ { - \\ # let x = read_int(0) - \\ print(18446744073709551615) - \\ print(x + x) + \\ let x = 10 + \\ if x { + \\ let x = read_int(0) + \\ # print(18446744073709551615) + \\ # print(x + x) + \\ } else { + \\ print(10) \\ } \\ print(x) \\} @@ -60,13 +62,16 @@ pub fn main() !void { if (lexer.peek().type != .eof) { std.debug.print("Unexpected token {}, expected end of file\n", .{lexer.next()}); } - const block = try compile.compile(allocator, source, ast); + const procedure = try compile.compile(allocator, source, ast); std.debug.print("Bytecode instructions:\n", .{}); - for (block.instrs) |instr| { - std.debug.print(" {}\n", .{instr}); + for (procedure.blocks, 0..) |block, i| { + std.debug.print(" ${}:\n", .{i}); + for (block.instrs.items) |instr| { + std.debug.print(" {}\n", .{instr}); + } } - std.debug.print("Last use of each virtual register: {}\n", .{fmtHashMap(block.vreg_last_use)}); - const elf = try codegen.create_elf(allocator, block); + // std.debug.print("Last use of each virtual register: {}\n", .{fmtHashMap(block.vreg_last_use)}); + const elf = try codegen.create_elf(allocator, procedure); try output.writeAll(elf); std.debug.print("Run output:\n", .{}); if (run) { diff --git a/src/parse.zig b/src/parse.zig index 422b4f5..92fb219 100644 --- a/src/parse.zig +++ b/src/parse.zig @@ -74,6 +74,7 @@ pub const Expr = struct { bin_op: BinOp, call: Call, identifier, + @"if": If, pub const BinOp = struct { lhs: *const Expr, @@ -97,6 +98,12 @@ pub const Expr = struct { proc: *const Expr, arg: *const Expr, }; + + pub const If = struct { + cond: *const Expr, + then: Block, + @"else": ?Block, + }; }; fn format(self: Expr, writer: anytype, source: []const u8, indent: usize) !void { @@ -109,6 +116,15 @@ pub const Expr = struct { try writer.print("{}({})", .{ fmt(call.proc, source, indent), fmt(call.arg, source, indent) }); }, .identifier => try writer.print("{s}", .{self.loc.getIdent(source)}), + .@"if" => |@"if"| { + try writer.print("if {} {}", .{ + fmt(@"if".cond, source, indent), + fmt(@"if".then, source, indent), + }); + if (@"if".@"else") |@"else"| { + try writer.print(" else {}", .{fmt(@"else", source, indent)}); + } + }, } } }; @@ -162,7 +178,7 @@ pub fn expression(allocator: Allocator, lexer: *Lexer) ParseError!*Expr { } pub fn parseTerms(allocator: Allocator, lexer: *Lexer) !*Expr { - var lhs = try parseInvocations(allocator, lexer); + var lhs = try parseIf(allocator, lexer); while (true) { const op: Expr.Type.BinOp.Op = switch (lexer.peek().type) { .plus => .plus, @@ -171,7 +187,7 @@ pub fn parseTerms(allocator: Allocator, lexer: *Lexer) !*Expr { }; _ = lexer.next(); - const rhs = try parseInvocations(allocator, lexer); + const rhs = try parseIf(allocator, lexer); lhs = try allocate(Expr, allocator, .{ .loc = lhs.loc.combine(rhs.loc), .type = .{ .bin_op = .{ .lhs = lhs, .op = op, .rhs = rhs } }, @@ -180,6 +196,29 @@ pub fn parseTerms(allocator: Allocator, lexer: *Lexer) !*Expr { return lhs; } +pub fn parseIf(allocator: Allocator, lexer: *Lexer) !*Expr { + switch (lexer.peek().type) { + .@"if" => { + const @"if" = lexer.next(); + const cond = try expression(allocator, lexer); + const then = try block(allocator, lexer); + const @"else" = if (lexer.peek().type == .@"else") blk: { + _ = lexer.next(); + break :blk try block(allocator, lexer); + } else null; + return try allocate(Expr, allocator, .{ + .loc = @"if".loc.combine((@"else" orelse then).loc), + .type = .{ .@"if" = .{ + .cond = cond, + .then = then, + .@"else" = @"else", + } }, + }); + }, + else => return try parseInvocations(allocator, lexer), + } +} + pub fn parseInvocations(allocator: Allocator, lexer: *Lexer) !*Expr { var proc = try parsePrimaryExpr(allocator, lexer); while (true) { |