From ee78756a504dc61f50422298cc1123c5ac6b3b69 Mon Sep 17 00:00:00 2001 From: Mathias Magnusson Date: Sat, 31 May 2025 02:12:02 +0200 Subject: actually codegen the provided code ... well, since all we can do is to add integer literals, we produce code for the calculations and then perform the exit syscall with the result --- src/codegen.zig | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- src/compile.zig | 79 ++++++++++++++++++++++++++++++++++++++++--------------- src/main.zig | 33 ++++++++++++++++++++--- src/parse.zig | 9 +++++++ 4 files changed, 170 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/codegen.zig b/src/codegen.zig index 89daaa4..59b7040 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2,7 +2,7 @@ const std = @import("std"); const elf = std.elf; const Allocator = std.mem.Allocator; const root = @import("root"); -const Block = root.Block; +const compile = root.compile; const Register = enum(u5) { // zig fmt: off @@ -304,7 +304,7 @@ const Instruction = packed union { fn init(opcode: Opcode, funct3: u3, rs1: Register, rs2: Register, imm: i13) Self { std.debug.assert(imm % 2 == 0); const umm: u13 = @bitCast(imm); - return .{ .s = .{ + return .{ .b = .{ .opcode = opcode, .imm11 = @truncate(umm >> 11), .imm4_1 = @truncate(umm >> 1), @@ -371,7 +371,7 @@ const Instruction = packed union { imm12_31: u20, fn init(opcode: Opcode, rd: Register, imm: i20) Self { - return .{ .s = .{ + return .{ .u = .{ .opcode = opcode, .rd = rd, .imm12_31 = @bitCast(imm), @@ -419,17 +419,84 @@ const Instruction = packed union { const Self = @This(); }; -pub fn create_elf(allocator: Allocator, block: Block) ![]u8 { - _ = block; +const RegisterAllocator = struct { + allocated: std.AutoHashMap(compile.VReg, Register), + available: std.ArrayList(Register), + + fn init(allocator: Allocator) !RegisterAllocator { + var available: std.ArrayList(Register) = .init(allocator); + for ([_]Register{ .t6, .t5, .t4, .t3, .t2, .t1, .t0 }) |reg| { + try available.append(reg); + } + var allocated: std.AutoHashMap(compile.VReg, Register) = .init(allocator); + try allocated.ensureTotalCapacity(@intCast(available.items.len)); + return .{ + .allocated = allocated, + .available = available, + }; + } + + fn get(self: *const RegisterAllocator, vreg: compile.VReg) Register { + return self.allocated.get(vreg).?; + } + + fn allocate(self: *RegisterAllocator, vreg: compile.VReg) ?Register { + const reg = self.available.pop() orelse return null; + self.allocated.putAssumeCapacityNoClobber(vreg, reg); + return reg; + } + + fn free(self: *RegisterAllocator, vreg: compile.VReg) void { + const ent = self.allocated.fetchRemove(vreg).?; + const reg = ent.value; + std.debug.assert(std.mem.indexOfScalar(Register, self.available.items, reg) == null); + return self.available.appendAssumeCapacity(reg); + } +}; + +pub fn create_elf(allocator: Allocator, block: compile.Block) ![]u8 { var output_buffer: std.ArrayList(u8) = .init(allocator); errdefer output_buffer.deinit(); try output_buffer.appendNTimes(undefined, @sizeOf(elf.Elf64_Ehdr) + @sizeOf(elf.Elf64_Phdr)); + const output = output_buffer.writer(); + var register_allocator: RegisterAllocator = try .init(allocator); + + for (0.., block.instrs) |i, instr| { + switch (instr.type) { + .constant => |constant| { + const reg = register_allocator.allocate(constant.dest) orelse return error.OutOfRegisters; + + if (constant.value <= std.math.maxInt(i12)) { + try output.writeInt(u32, @bitCast(Instruction.addi(reg, .zero, @intCast(constant.value))), .little); + } else if (constant.value <= std.math.maxInt(i32)) { + // If the higest bit in the immediate in addi is set, it will be sign extended. We negate that by adding one more to the immediate for lui. + try output.writeInt(u32, @bitCast(Instruction.lui(reg, @intCast((constant.value >> 12) + if (constant.value & (1 << 11) != 0) @as(u64, 1) else 0))), .little); + try output.writeInt(u32, @bitCast(Instruction.addi(reg, reg, @bitCast(@as(u12, @truncate(constant.value))))), .little); + } else { + unreachable; // TODO + } + }, + .bin_op => |bin_op| { + const lhs = register_allocator.get(bin_op.lhs); + const rhs = register_allocator.get(bin_op.rhs); + for (instr.sources().slice()) |src| { + if (block.vreg_last_use.get(src) == i) { + register_allocator.free(src); + } + } + const reg = register_allocator.allocate(bin_op.dest) orelse return error.OutOfRegisters; + switch (bin_op.op) { + .add => try output.writeInt(u32, @bitCast(Instruction.add(reg, lhs, rhs)), .little), + } + }, + } + } + for ([_]Instruction{ + .addi(.a0, register_allocator.get(block.instrs[block.instrs.len - 1].dest()), 0), .addi(.a7, .zero, 93), - .addi(.a0, .zero, 71), - .xori(.a0, .a0, 2), .ecall(), }) |instr| { try output.writeInt(u32, @bitCast(instr), .little); diff --git a/src/compile.zig b/src/compile.zig index b9e1a86..822153f 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -5,47 +5,80 @@ const Token = root.Lexer.Token; const Expr = root.Expr; const Location = root.Lexer.Location; -pub const Reg = enum(u32) { _ }; +pub const VReg = enum(u32) { _ }; -pub const Instr = union(enum) { - constant: Constant, - bin_op: BinOp, +pub const Instr = struct { + loc: Location, + type: Type, + + const Type = union(enum) { + constant: Constant, + bin_op: BinOp, + }; const Constant = struct { - dest: Reg, - value: Location, + dest: VReg, + value: u64, }; const BinOp = struct { - dest: Reg, - lhs: Reg, - rhs: Reg, + dest: VReg, + lhs: VReg, + rhs: VReg, op: Op, const Op = enum { add, }; }; + + pub fn sources(self: Instr) std.BoundedArray(VReg, 2) { + return switch (self.type) { + .bin_op => |bin_op| std.BoundedArray(VReg, 2).fromSlice(&.{ bin_op.lhs, bin_op.rhs }) catch unreachable, + .constant => std.BoundedArray(VReg, 2).init(0) catch unreachable, + }; + } + + pub fn dest(self: *const Instr) VReg { + return switch (self.type) { + inline .constant, .bin_op => |s| s.dest, + }; + } }; pub const Block = struct { // arguments: []Reg, instrs: []Instr, + vreg_last_use: std.AutoHashMap(VReg, usize), + + fn init(allocator: Allocator, instrs: []Instr) !Block { + var vreg_last_use: std.AutoHashMap(VReg, usize) = .init(allocator); + for (0.., instrs) |i, instr| { + for (instr.sources().slice()) |src| + try vreg_last_use.put(src, i); + } + return .{ + .instrs = instrs, + .vreg_last_use = vreg_last_use, + }; + } }; -pub fn compile(allocator: Allocator, expr: *const Expr) !Block { +pub fn compile(allocator: Allocator, source: []const u8, expr: *const Expr) !Block { const instrs: std.ArrayListUnmanaged(Instr) = try .initCapacity(allocator, 0); var ctx: CompileContext = .{ .allocator = allocator, + .source = source, .register_counter = 0, .instrs = instrs, }; _ = try ctx.compileExpr(expr); - return .{ .instrs = ctx.instrs.items }; + return .init(allocator, ctx.instrs.items); } const CompileContext = struct { allocator: Allocator, + source: []const u8, register_counter: u32, instrs: std.ArrayListUnmanaged(Instr), @@ -55,22 +88,26 @@ const CompileContext = struct { try self.instrs.append(self.allocator, instr); } - fn compileExpr(self: *Self, expr: *const Expr) !Reg { + fn compileExpr(self: *Self, expr: *const Expr) !VReg { const dest = self.register(); switch (expr.type) { .integer_literal => try addInstr(self, .{ - .constant = .{ .dest = dest, .value = expr.loc }, + .loc = expr.loc, + .type = .{ .constant = .{ .dest = dest, .value = expr.getInt(self.source) } }, }), .bin_op => |binop| { const lhs = try self.compileExpr(binop.lhs); const rhs = try self.compileExpr(binop.rhs); try addInstr(self, .{ - .bin_op = .{ - .dest = dest, - .lhs = lhs, - .rhs = rhs, - .op = switch (binop.op) { - .plus => .add, + .loc = expr.loc, + .type = .{ + .bin_op = .{ + .dest = dest, + .lhs = lhs, + .rhs = rhs, + .op = switch (binop.op) { + .plus => .add, + }, }, }, }); @@ -80,8 +117,8 @@ const CompileContext = struct { return dest; } - fn register(self: *Self) Reg { - const reg: Reg = @enumFromInt(self.register_counter); + fn register(self: *Self) VReg { + const reg: VReg = @enumFromInt(self.register_counter); self.register_counter += 1; return reg; } diff --git a/src/main.zig b/src/main.zig index 34da76b..8f81f1d 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,14 +1,13 @@ const std = @import("std"); const parse = @import("./parse.zig"); const peek = @import("./peek.zig"); -const compile = @import("./compile.zig"); const codegen = @import("./codegen.zig"); pub const peekable = peek.peekable; pub const Peekable = peek.Peekable; pub const Expr = parse.Expr; pub const Lexer = @import("./lexer.zig"); -pub const Block = compile.Block; +pub const compile = @import("./compile.zig"); pub fn main() !void { var arena: std.heap.ArenaAllocator = .init(std.heap.smp_allocator); @@ -33,7 +32,7 @@ pub fn main() !void { // try stdout.print("{s}\n", .{line.items}); // } - const source = "1 + 2 + 3"; + const source = "420 + 1337 + 42"; // var lexer = Lexer{ .source = source }; // while (true) { // const token = lexer.next().?; @@ -47,7 +46,7 @@ pub fn main() !void { if (lexer.next()) |token| if (token.type != .eof) { std.debug.print("Unexpected token {}, expected end of file\n", .{token}); }; - const block = try compile.compile(allocator, expr); + const block = try compile.compile(allocator, source, expr); for (block.instrs) |instr| { std.debug.print("{}\n", .{instr}); } @@ -56,6 +55,32 @@ pub fn main() !void { // try stdout.print("{x}\n", .{elf}); } +fn HashMapFormatter(HashMap: type) type { + return std.fmt.Formatter(struct { + fn formatHashMap( + hash_map: HashMap, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + try writer.writeAll("{"); + var it = hash_map.iterator(); + var first = true; + while (it.next()) |kv| { + try writer.print("{s} {any}: {any}", .{ if (first) "" else ",", kv.key_ptr.*, kv.value_ptr.* }); + first = false; + } + try writer.writeAll(if (first) "}" else " }"); + } + }.formatHashMap); +} + +pub fn fmtHashMap(hash_map: anytype) HashMapFormatter(@TypeOf(hash_map)) { + return .{ .data = hash_map }; +} + test { _ = peek; } diff --git a/src/parse.zig b/src/parse.zig index 2149cd7..e0c1fd0 100644 --- a/src/parse.zig +++ b/src/parse.zig @@ -26,6 +26,15 @@ pub const Expr = struct { }; }; }; + + pub fn getInt(self: *const @This(), file_source: []const u8) u64 { + var value: u64 = 0; + for (file_source[self.loc.start..self.loc.end]) |c| { + std.debug.assert('0' <= c and c <= '9'); + value = value * 10 + c - '0'; + } + return value; + } }; pub fn expression(allocator: Allocator, lexer: *Peekable(Lexer)) error{OutOfMemory}!*Expr { -- cgit v1.2.3