diff options
author | Mathias Magnusson <mathias@magnusson.space> | 2025-07-24 22:15:03 +0200 |
---|---|---|
committer | Mathias Magnusson <mathias@magnusson.space> | 2025-07-24 22:17:19 +0200 |
commit | 0fa2f445eb7140214471074fc544adfd0f8a524f (patch) | |
tree | cb3bca4a9604df71cfe0cfcdb0c0f1e3590923ea | |
parent | b3909efb3a6bf76870b686b5062f9a4282fbdd66 (diff) | |
download | huginn-0fa2f445eb7140214471074fc544adfd0f8a524f.tar.gz |
continue implementing procedure calls
multiple procedures can now exist, but you cannot call them, the first
one is the "main" procedure since it happens to be placed first in the
binary, and all procedures end with an exit system call
-rw-r--r-- | fibonacci.hgn | 22 | ||||
-rw-r--r-- | src/Lexer.zig | 3 | ||||
-rw-r--r-- | src/codegen.zig | 26 | ||||
-rw-r--r-- | src/compile.zig | 222 | ||||
-rw-r--r-- | src/main.zig | 6 | ||||
-rw-r--r-- | src/parse.zig | 73 |
6 files changed, 232 insertions, 120 deletions
diff --git a/fibonacci.hgn b/fibonacci.hgn index 01d9333..c42241b 100644 --- a/fibonacci.hgn +++ b/fibonacci.hgn @@ -1,11 +1,13 @@ -a := 0 -b := 1 -n := 10 -# n := read_int(0) -while n > 0 { - c := a + b - a = b - b = c - n = n - 1 +main := proc() { + a := 0 + b := 1 + n := 10 + # n := read_int(0) + while n > 0 { + c := a + b + a = b + b = c + n = n - 1 + } + print(a) } -print(a) diff --git a/src/Lexer.zig b/src/Lexer.zig index 4b664fe..87ced92 100644 --- a/src/Lexer.zig +++ b/src/Lexer.zig @@ -26,6 +26,7 @@ pub const Token = struct { @"if", @"else", @"while", + proc, }; }; @@ -137,7 +138,7 @@ fn identifierOrKeyword(self: *Self) Token { } const value = self.source[self.start..self.pos]; return self.create(switch (std.meta.stringToEnum(Token.Type, value) orelse .invalid) { - .@"if", .@"else", .@"while" => |t| t, + .@"if", .@"else", .@"while", .proc => |t| t, else => .identifier, }); } diff --git a/src/codegen.zig b/src/codegen.zig index a1c2171..00b3857 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -563,7 +563,7 @@ const Relocation = struct { const Context = struct { instructions: std.ArrayList(Instruction), relocations: std.ArrayList(Relocation), - block_starts: std.ArrayList(usize), + block_addrs: std.AutoHashMap(compile.BlockRef, usize), print_block: compile.BlockRef, read_int_block: compile.BlockRef, @@ -767,7 +767,7 @@ const ProcedureContext = struct { fn codegenProc(self: *Self, proc: compile.Procedure) !void { for (proc.blocks) |block| { - try self.ctx.block_starts.append(self.ctx.instructions.items.len); + try self.ctx.block_addrs.putNoClobber(block.ref, self.ctx.instructions.items.len); try self.codegenBlock(block); } } @@ -845,17 +845,23 @@ fn codegenReadInt(self: *Context) !void { try self.emit(.jalr(.zero, .ra, 0)); } -pub fn create_elf(allocator: Allocator, proc: compile.Procedure) ![]u8 { +pub fn create_elf(allocator: Allocator, mod: compile.Module) ![]u8 { + var maxBlockRef: usize = 0; + for (mod.procedures) |proc| { + for (proc.blocks) |block| { + maxBlockRef = @max(maxBlockRef, @intFromEnum(block.ref)); + } + } var ctx: Context = .{ .instructions = .init(allocator), .relocations = .init(allocator), - .block_starts = .init(allocator), - .print_block = @enumFromInt(proc.blocks.len), - .read_int_block = @enumFromInt(proc.blocks.len + 1), + .block_addrs = .init(allocator), + .print_block = @enumFromInt(maxBlockRef + 1), + .read_int_block = @enumFromInt(maxBlockRef + 2), }; defer ctx.deinit(); - { + for (mod.procedures) |proc| { var proc_ctx: ProcedureContext = .{ .register_allocator = try .init(allocator, &.{ .t6, .t5, .t4, .t3, .t2, .t1, .t0 }), .lvar_allocator = try .init(allocator, &.{ .s11, .s10, .s9, .s8, .s7, .s6, .s5, .s4, .s3, .s2, .s1, .s0 }), @@ -867,15 +873,15 @@ pub fn create_elf(allocator: Allocator, proc: compile.Procedure) ![]u8 { std.debug.assert(proc_ctx.register_allocator.allocated.count() == 0); } - try ctx.block_starts.append(ctx.instructions.items.len); + try ctx.block_addrs.putNoClobber(ctx.print_block, ctx.instructions.items.len); try codegenPrint(&ctx); - try ctx.block_starts.append(ctx.instructions.items.len); + try ctx.block_addrs.putNoClobber(ctx.read_int_block, ctx.instructions.items.len); try codegenReadInt(&ctx); // TODO: make this less sheiße for (ctx.relocations.items) |relocation| { const instr = &ctx.instructions.items[relocation.instr]; - const target: isize = @intCast(ctx.block_starts.items[@intFromEnum(relocation.target)]); + const target: isize = @intCast(ctx.block_addrs.get(relocation.target).?); const from: isize = @intCast(relocation.instr); switch (instr.*) { .j => |j| { diff --git a/src/compile.zig b/src/compile.zig index 583be50..d2226f1 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -30,6 +30,77 @@ fn IdCounter(Id: type) type { }; } +pub const Module = struct { + procedures: []Procedure, + + pub fn format(self: Module, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = .{ fmt, options }; + for (self.procedures) |proc| { + try writer.print("{}", .{proc}); + } + } +}; + +pub const Procedure = struct { + name: []const u8, + blocks: []BasicBlock, + + fn init(allocator: Allocator, name: []const u8, blocks: []BasicBlock) !Procedure { + for (blocks) |*block| { + try block.finalize(allocator); + } + + return .{ .name = name, .blocks = blocks }; + } + + pub fn format(self: Procedure, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = .{ fmt, options }; + try writer.print("{s}:\n", .{self.name}); + for (self.blocks, 0..) |block, i| { + try writer.print(" ${}{}", .{ i, block }); + } + try writer.writeAll("\n"); + } +}; + +pub const BasicBlock = struct { + ref: BlockRef, + instrs: std.ArrayListUnmanaged(Instr) = .empty, + + vreg_last_use: std.AutoHashMapUnmanaged(VReg, usize) = .empty, + + fn finalize(self: *BasicBlock, allocator: Allocator) !void { + std.debug.assert(self.instrs.items.len > 0); + std.debug.assert(switch (self.instrs.getLast().type) { + inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"), + }); + + self.vreg_last_use = .empty; + for (0.., self.instrs.items) |i, instr| { + for (instr.sources().slice()) |src| + try self.vreg_last_use.put(allocator, src, i); + if (instr.dest()) |dest| + try self.vreg_last_use.put(allocator, dest, i); + } + } + + fn immediate_successors(self: *BasicBlock) std.BoundedArray(BlockRef, 2) { + return switch (self.instrs.getLast().type) { + .branch => |branch| std.BoundedArray(BlockRef, 2).fromSlice(&.{ branch.true.to, branch.false.to }) catch unreachable, + .jump => |jump| std.BoundedArray(BlockRef, 2).fromSlice(&.{jump.to}) catch unreachable, + else => .{}, + }; + } + + pub fn format(self: BasicBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = .{ fmt, options }; + try writer.print(":\n", .{}); + for (self.instrs.items) |instr| { + try writer.print(" {}\n", .{instr}); + } + } +}; + pub const Instr = struct { loc: Location, type: Type, @@ -199,95 +270,76 @@ pub const Instr = struct { } }; -pub const Procedure = struct { - blocks: []BasicBlock, - - fn init(allocator: Allocator, blocks: []BasicBlock) !Procedure { - for (blocks) |*block| { - try block.finalize(allocator); - } - - return .{ .blocks = blocks }; - } - - pub fn format(self: Procedure, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = .{ fmt, options }; - for (self.blocks, 0..) |block, i| { - try writer.print("${}{}", .{ i, block }); - } - } +const Scope = struct { + locals: std.StringHashMapUnmanaged(LVar), + parent: ?*Scope, }; -pub const BasicBlock = struct { - instrs: std.ArrayListUnmanaged(Instr) = .empty, - - vreg_last_use: std.AutoHashMapUnmanaged(VReg, usize) = .empty, - - fn finalize(self: *BasicBlock, allocator: Allocator) !void { - std.debug.assert(self.instrs.items.len > 0); - std.debug.assert(switch (self.instrs.getLast().type) { - inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"), - }); - - self.vreg_last_use = .empty; - for (0.., self.instrs.items) |i, instr| { - for (instr.sources().slice()) |src| - try self.vreg_last_use.put(allocator, src, i); - if (instr.dest()) |dest| - try self.vreg_last_use.put(allocator, dest, i); - } - } +const CompileError = error{ + OutOfMemory, + CanOnlyCallIdentifiers, + UnknownProcedure, + UnknownVariable, + CannotDefineProcedureHere, +}; - fn immediate_successors(self: *BasicBlock) std.BoundedArray(BlockRef, 2) { - return switch (self.instrs.getLast().type) { - .branch => |branch| std.BoundedArray(BlockRef, 2).fromSlice(&.{ branch.true.to, branch.false.to }) catch unreachable, - .jump => |jump| std.BoundedArray(BlockRef, 2).fromSlice(&.{jump.to}) catch unreachable, - else => .{}, - }; +pub fn compile(allocator: Allocator, source: []const u8, file: parse.File) !Module { + const procs = try allocator.alloc(Procedure, file.decls.len); + var ctx: Context = .{ + .allocator = allocator, + .block_ctx = .init, + }; + for (procs, file.decls) |*proc, decl| { + proc.* = try compileProcedure( + &ctx, + source, + decl.inner.ident, + decl.inner.value.type.proc, + ); } + return .{ + .procedures = procs, + }; +} - pub fn format(self: BasicBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = .{ fmt, options }; - try writer.print(":\n", .{}); - for (self.instrs.items) |instr| { - try writer.print(" {}\n", .{instr}); - } - } +const Context = struct { + allocator: Allocator, + block_ctx: IdCounter(BlockRef), }; -pub fn compile(allocator: Allocator, source: []const u8, block: parse.Block) !Procedure { - var ctx: CompileContext = .{ - .allocator = allocator, +fn compileProcedure( + ctx: *Context, + source: []const u8, + name: Location, + proc: parse.Expr.Type.Proc, +) !Procedure { + var pctx: ProcedureContext = .{ + .ctx = ctx, .source = source, .vreg_ctr = .init, .lvar_ctr = .init, .scope = .{ .locals = .empty, .parent = null }, .blocks = .empty, - .current_block = @enumFromInt(0), + .current_block = undefined, // immediately set by `switchToNewBlock` }; - _ = try ctx.switchToNewBlock(); - try ctx.compileBlock(block); - try ctx.addInstr(.{ + _ = try pctx.switchToNewBlock(); + try pctx.compileBlock(proc.body); + try pctx.addInstr(.{ .loc = .{ .start = 0, .end = 0 }, .type = .{ .exit = .{} }, }); - return try .init(allocator, try ctx.blocks.toOwnedSlice(allocator)); + var blocks = try ctx.allocator.alloc(BasicBlock, pctx.blocks.count()); + var it = pctx.blocks.iterator(); + var i: usize = 0; + while (it.next()) |kv| : (i += 1) { + std.debug.assert(kv.key_ptr.* == kv.value_ptr.ref); + blocks[i] = kv.value_ptr.*; + } + return try .init(ctx.allocator, name.getIdent(source), blocks); } -const Scope = struct { - locals: std.StringHashMapUnmanaged(LVar), - parent: ?*Scope, -}; - -const CompileError = error{ - OutOfMemory, - CanOnlyCallIdentifiers, - UnknownProcedure, - UnknownVariable, -}; - -const CompileContext = struct { - allocator: Allocator, +const ProcedureContext = struct { + ctx: *Context, source: []const u8, vreg_ctr: IdCounter(VReg), @@ -295,10 +347,10 @@ const CompileContext = struct { scope: Scope, - blocks: std.ArrayListUnmanaged(BasicBlock), + blocks: std.AutoArrayHashMapUnmanaged(BlockRef, BasicBlock), current_block: BlockRef, - fn compileBlock(self: *CompileContext, block: parse.Block) !void { + fn compileBlock(self: *ProcedureContext, block: parse.Block) !void { var parent = self.scope; self.scope = .{ .locals = .empty, @@ -309,11 +361,11 @@ const CompileContext = struct { try self.compileStmt(stmt); } - self.scope.locals.deinit(self.allocator); + self.scope.locals.deinit(self.ctx.allocator); self.scope = parent; } - fn compileStmt(self: *CompileContext, stmt: parse.Stmt) CompileError!void { + fn compileStmt(self: *ProcedureContext, stmt: parse.Stmt) CompileError!void { switch (stmt.type) { .expr => |expr| _ = try self.compileExpr(expr), .block => |block| { @@ -323,7 +375,7 @@ const CompileContext = struct { const local = if (assign_var.is_decl) blk: { const local = self.lvar_ctr.get(); const name = assign_var.ident.getIdent(self.source); - try self.scope.locals.put(self.allocator, name, local); + try self.scope.locals.put(self.ctx.allocator, name, local); break :blk local; } else blk: { var scope: ?*Scope = &self.scope; @@ -373,7 +425,7 @@ const CompileContext = struct { } } - fn compileExpr(self: *CompileContext, expr: *const parse.Expr) !VReg { + fn compileExpr(self: *ProcedureContext, expr: *const parse.Expr) !VReg { // This is not used by all expression types, but creating an unused virtual register is not a problem. const dest = self.vreg_ctr.get(); switch (expr.type) { @@ -470,19 +522,23 @@ const CompileContext = struct { self.current_block = after; }, + .proc => |proc| { + _ = proc; + return error.CannotDefineProcedureHere; + }, } return dest; } - fn switchToNewBlock(self: *CompileContext) !BlockRef { - const ref: BlockRef = @enumFromInt(self.blocks.items.len); - try self.blocks.append(self.allocator, .{}); + fn switchToNewBlock(self: *ProcedureContext) !BlockRef { + const ref = self.ctx.block_ctx.get(); + try self.blocks.putNoClobber(self.ctx.allocator, ref, .{ .ref = ref }); self.current_block = ref; return ref; } - fn addInstr(self: *CompileContext, instr: Instr) !void { - try self.blocks.items[@intFromEnum(self.current_block)] - .instrs.append(self.allocator, instr); + fn addInstr(self: *ProcedureContext, instr: Instr) !void { + try self.blocks.getPtr(self.current_block).? + .instrs.append(self.ctx.allocator, instr); } }; diff --git a/src/main.zig b/src/main.zig index 5873c66..55b7e0e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -47,9 +47,9 @@ pub fn main() !void { if (lexer.peek().type != .eof) { std.debug.print("Unexpected token {}, expected end of file\n", .{lexer.next()}); } - const procedure = try compile.compile(allocator, source, ast); - std.debug.print("Bytecode instructions:\n{}", .{procedure}); - const elf = try codegen.create_elf(allocator, procedure); + const module = try compile.compile(allocator, source, ast); + std.debug.print("Bytecode instructions:\n{}", .{module}); + const elf = try codegen.create_elf(allocator, module); try out_file.writer().writeAll(elf); std.debug.print("Run output:\n", .{}); if (run) { diff --git a/src/parse.zig b/src/parse.zig index bd6b000..bd0ca46 100644 --- a/src/parse.zig +++ b/src/parse.zig @@ -23,6 +23,25 @@ pub fn fmt(tree: anytype, source: []const u8, indent: usize) Fmt(@TypeOf(tree)) return .{ .data = .{ tree, source, indent } }; } +pub const File = struct { + decls: []Decl, + + const Decl = struct { + loc: Lexer.Location, + inner: Stmt.Type.AssignVar, + }; + + fn format(self: File, writer: anytype, source: []const u8, indent: usize) !void { + for (self.decls) |decl| { + try writer.print("{s} {s} {}", .{ + decl.inner.ident.getIdent(source), + ":=", + fmt(decl.inner.value, source, indent), + }); + } + } +}; + pub const Block = struct { loc: Lexer.Location, stmts: []Stmt, @@ -87,6 +106,7 @@ pub const Expr = struct { call: Call, identifier, @"if": If, + proc: Proc, pub const BinOp = struct { lhs: *const Expr, @@ -124,6 +144,10 @@ pub const Expr = struct { then: Block, @"else": ?Block, }; + + pub const Proc = struct { + body: Block, + }; }; fn format(self: Expr, writer: anytype, source: []const u8, indent: usize) !void { @@ -145,6 +169,9 @@ pub const Expr = struct { try writer.print(" else {}", .{fmt(@"else", source, indent)}); } }, + .proc => |proc| { + try writer.print("proc() {}", .{fmt(proc.body, source, indent)}); + }, } } }; @@ -157,22 +184,28 @@ const ParseError = error{ ExprStatementMustBeCall, }; -pub fn file(allocator: Allocator, lexer: *Lexer) !Block { - var stmts: std.ArrayList(Stmt) = .init(allocator); +pub fn file(allocator: Allocator, lexer: *Lexer) !File { + var decls: std.ArrayList(File.Decl) = .init(allocator); while (lexer.peek().type != .eof) { - try stmts.append(try statement(allocator, lexer)); + const stmt = try parseStatement(allocator, lexer); + if (stmt.type != .assign_var or !stmt.type.assign_var.is_decl) { + return error.ExpectedProcedureDeclaration; + } + try decls.append(.{ + .loc = stmt.loc, + .inner = stmt.type.assign_var, + }); } return .{ - .loc = stmts.items[0].loc.combine(stmts.getLast().loc), - .stmts = try stmts.toOwnedSlice(), + .decls = try decls.toOwnedSlice(), }; } -fn block(allocator: Allocator, lexer: *Lexer) !Block { +fn parseBlock(allocator: Allocator, lexer: *Lexer) !Block { const left_curly = try mustEat(lexer, .left_curly); var stmts: std.ArrayList(Stmt) = .init(allocator); while (lexer.peek().type != .right_curly) { - try stmts.append(try statement(allocator, lexer)); + try stmts.append(try parseStatement(allocator, lexer)); } const right_curly = try mustEat(lexer, .right_curly); return .{ @@ -181,10 +214,10 @@ fn block(allocator: Allocator, lexer: *Lexer) !Block { }; } -fn statement(allocator: Allocator, lexer: *Lexer) ParseError!Stmt { +fn parseStatement(allocator: Allocator, lexer: *Lexer) ParseError!Stmt { switch (lexer.peek().type) { .left_curly => { - const b = try block(allocator, lexer); + const b = try parseBlock(allocator, lexer); return .{ .loc = b.loc, .type = .{ .block = b }, @@ -193,7 +226,7 @@ fn statement(allocator: Allocator, lexer: *Lexer) ParseError!Stmt { .@"while" => { const @"while" = lexer.next(); const cond = try expression(allocator, lexer); - const do = try block(allocator, lexer); + const do = try parseBlock(allocator, lexer); return .{ .loc = @"while".loc.combine(do.loc), .type = .{ .@"while" = .{ @@ -232,7 +265,21 @@ fn statement(allocator: Allocator, lexer: *Lexer) ParseError!Stmt { } fn expression(allocator: Allocator, lexer: *Lexer) ParseError!*Expr { - return parseComparisons(allocator, lexer); + return parseProc(allocator, lexer); +} + +fn parseProc(allocator: Allocator, lexer: *Lexer) ParseError!*Expr { + if (lexer.peek().type != .proc) return parseComparisons(allocator, lexer); + const proc = try mustEat(lexer, .proc); + _ = try mustEat(lexer, .left_paren); + // TODO: parameters + _ = try mustEat(lexer, .right_paren); + const body = try parseBlock(allocator, lexer); + + return allocate(Expr, allocator, .{ + .loc = proc.loc.combine(body.loc), + .type = .{ .proc = .{ .body = body } }, + }); } fn parseComparisons(allocator: Allocator, lexer: *Lexer) ParseError!*Expr { @@ -278,10 +325,10 @@ fn parseIf(allocator: Allocator, lexer: *Lexer) !*Expr { .@"if" => { const @"if" = lexer.next(); const cond = try expression(allocator, lexer); - const then = try block(allocator, lexer); + const then = try parseBlock(allocator, lexer); const @"else" = if (lexer.peek().type == .@"else") blk: { _ = lexer.next(); - break :blk try block(allocator, lexer); + break :blk try parseBlock(allocator, lexer); } else null; return try allocate(Expr, allocator, .{ .loc = @"if".loc.combine((@"else" orelse then).loc), |