aboutsummaryrefslogtreecommitdiff
path: root/src/compile.zig
diff options
context:
space:
mode:
authorMathias Magnusson <mathias@magnusson.space>2025-06-07 00:05:22 +0200
committerMathias Magnusson <mathias@magnusson.space>2025-06-07 00:57:32 +0200
commit1ed87d6c58cab383f18590093f651ef35c4fa671 (patch)
treec76bc3b8797f6e9973439462565b015da1dcbd80 /src/compile.zig
parent8c9a56311ac2774bf83eda4ea0aa9bde0aec125f (diff)
downloadhuginn-1ed87d6c58cab383f18590093f651ef35c4fa671.tar.gz
add variable reassignments and basic block arguments
Diffstat (limited to 'src/compile.zig')
-rw-r--r--src/compile.zig290
1 files changed, 229 insertions, 61 deletions
diff --git a/src/compile.zig b/src/compile.zig
index 8cf61ca..c0ba77d 100644
--- a/src/compile.zig
+++ b/src/compile.zig
@@ -5,8 +5,30 @@ const Token = root.Lexer.Token;
const parse = root.parse;
const Location = root.Lexer.Location;
+/// Virtual register
pub const VReg = enum(u32) { _ };
pub const BlockRef = enum(u32) { _ };
+/// Id for local variables in a procedure
+pub const LVar = enum(u32) { _ };
+
+fn IdCounter(Id: type) type {
+ const Inner = @typeInfo(Id).@"enum".tag_type;
+ std.debug.assert(@typeInfo(Inner) == .int);
+ return struct {
+ counter: Inner,
+
+ const init: Self = .{ .counter = 0 };
+
+ fn get(self: *Self) Id {
+ const id: Id = @enumFromInt(self.counter);
+ self.counter = std.math.add(Inner, self.counter, 1) catch
+ std.debug.panic("IdCounter({}) ran out of ids!", .{Id});
+ return id;
+ }
+
+ const Self = @This();
+ };
+}
pub const Instr = struct {
loc: Location,
@@ -63,23 +85,25 @@ pub const Instr = struct {
pub const Branch = struct {
cond: VReg,
- true: BlockRef,
- false: BlockRef,
+ true: Jump,
+ false: Jump,
pub const may_end_block = {};
- pub fn sources(self: Branch) Sources {
- return Sources.fromSlice(&.{self.cond}) catch unreachable;
+ pub fn sources(_: Branch) Sources {
+ @panic("Sources not implemented for Branch!!!");
+ // return Sources.fromSlice(&.{self.cond}) catch unreachable;
}
};
pub const Jump = struct {
to: BlockRef,
+ args: std.ArrayListUnmanaged(VReg) = .empty,
pub const may_end_block = {};
- pub fn sources(_: Jump) Sources {
- return Sources.init(0) catch unreachable;
+ pub fn sources(self: Jump) Sources {
+ return Sources.fromSlice(self.args) catch unreachable;
}
};
@@ -131,15 +155,28 @@ pub const Instr = struct {
@intFromEnum(proc_call.arg),
},
),
- .branch => |branch| try writer.print(
- "branch %{} ? ${} : ${}",
- .{
+ .branch => |branch| {
+ try writer.print("branch %{} ? ${}(", .{
@intFromEnum(branch.cond),
- @intFromEnum(branch.true),
- @intFromEnum(branch.false),
- },
- ),
- .jump => |jump| try writer.print("jump ${}", .{@intFromEnum(jump.to)}),
+ @intFromEnum(branch.true.to),
+ });
+ for (branch.true.args.items, 0..) |arg, i| {
+ try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) });
+ }
+ try writer.print(") : ${}(", .{@intFromEnum(branch.false.to)});
+ for (branch.false.args.items, 0..) |arg, i| {
+ try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) });
+ }
+ try writer.print(")", .{});
+ },
+ .jump => |jump| {
+ try writer.print("jump ${}(", .{@intFromEnum(jump.to)});
+ for (jump.args.items, 0..) |arg, i| {
+ try writer.print("{s}%{}", .{ if (i > 0) ", " else "", @intFromEnum(arg) });
+ }
+ try writer.print(")", .{});
+ },
+
.exit => |_| try writer.print("exit", .{}),
}
}
@@ -148,21 +185,34 @@ pub const Instr = struct {
pub const Procedure = struct {
blocks: []BasicBlock,
- fn init(allocator: Allocator, blocks: []BasicBlock) !Procedure {
- for (blocks) |*block| {
- try block.finalize(allocator);
- }
+ fn init(blocks: []BasicBlock) !Procedure {
+ // for (blocks) |*block| {
+ // try block.finalize(allocator);
+ // }
+
return .{ .blocks = blocks };
}
+
+ pub fn format(self: Procedure, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+ _ = .{ fmt, options };
+ for (self.blocks, 0..) |block, i| {
+ try writer.print("${}{}", .{ i, block });
+ }
+ }
};
pub const BasicBlock = struct {
- // arguments: []Reg,
+ params: std.AutoArrayHashMapUnmanaged(LVar, VReg) = .empty,
instrs: std.ArrayListUnmanaged(Instr) = .empty,
vreg_last_use: std.AutoHashMapUnmanaged(VReg, usize) = .empty,
fn finalize(self: *BasicBlock, allocator: Allocator) !void {
+ std.debug.assert(self.instrs.items > 0);
+ std.debug.assert(switch (self.instrs.getLast().type) {
+ inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"),
+ });
+
self.vreg_last_use = .empty;
for (0.., self.instrs.items) |i, instr| {
for (instr.sources().slice()) |src|
@@ -170,29 +220,64 @@ pub const BasicBlock = struct {
if (instr.dest()) |dest|
try self.vreg_last_use.put(allocator, dest, i);
}
- std.debug.assert(switch (self.instrs.items[self.instrs.items.len - 1].type) {
- inline else => |ty| @hasDecl(@TypeOf(ty), "may_end_block"),
- });
}
+
+ fn immediate_successors(self: *BasicBlock) std.BoundedArray(BlockRef, 2) {
+ return switch (self.instrs.getLast().type) {
+ .branch => |branch| std.BoundedArray(BlockRef, 2).fromSlice(&.{ branch.true.to, branch.false.to }) catch unreachable,
+ .jump => |jump| std.BoundedArray(BlockRef, 2).fromSlice(&.{jump.to}) catch unreachable,
+ else => .{},
+ };
+ }
+
+ pub fn format(self: BasicBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+ _ = .{ fmt, options };
+ try writer.print("(", .{});
+ {
+ var it = self.params.iterator();
+ var first = true;
+ while (it.next()) |ent| {
+ try writer.print("{s}%{}", .{ if (first) "" else ", ", @intFromEnum(ent.value_ptr.*) });
+ first = false;
+ }
+ }
+ try writer.print("):\n", .{});
+ for (self.instrs.items) |instr| {
+ try writer.print(" {}\n", .{instr});
+ }
+ }
+
+ const Info = struct {
+ local_current_vreg: std.AutoArrayHashMapUnmanaged(LVar, VReg) = .empty,
+ };
};
pub fn compile(allocator: Allocator, source: []const u8, block: parse.Block) !Procedure {
var ctx: CompileContext = .{
.allocator = allocator,
.source = source,
- .register_counter = 0,
+ .vreg_ctr = .init,
+ .lvar_ctr = .init,
.scope = .{ .locals = .empty, .parent = null },
.blocks = .empty,
+ .block_infos = .empty,
.current_block = @enumFromInt(0),
};
+ _ = try ctx.switchToNewBlock();
try ctx.compileBlock(block);
try ctx.addInstr(.{
.loc = .{ .start = 0, .end = 0 },
.type = .{ .exit = .{} },
});
- return try .init(allocator, try ctx.blocks.toOwnedSlice(allocator));
+ try ctx.assignArguments();
+ return try .init(try ctx.blocks.toOwnedSlice(allocator));
}
+const Scope = struct {
+ locals: std.StringHashMapUnmanaged(LVar),
+ parent: ?*Scope,
+};
+
const CompileError = error{
OutOfMemory,
CanOnlyCallIdentifiers,
@@ -203,20 +288,17 @@ const CompileError = error{
const CompileContext = struct {
allocator: Allocator,
source: []const u8,
- register_counter: u32,
+
+ vreg_ctr: IdCounter(VReg),
+ lvar_ctr: IdCounter(LVar),
+
scope: Scope,
+
blocks: std.ArrayListUnmanaged(BasicBlock),
+ block_infos: std.ArrayListUnmanaged(BasicBlock.Info),
current_block: BlockRef,
- // instrs: std.ArrayListUnmanaged(Instr),
-
- const Scope = struct {
- locals: std.StringHashMapUnmanaged(VReg),
- parent: ?*Scope,
- };
fn compileBlock(self: *CompileContext, block: parse.Block) !void {
- _ = try self.switchToNewBlock();
-
var parent = self.scope;
self.scope = .{
.locals = .empty,
@@ -235,32 +317,31 @@ const CompileContext = struct {
switch (stmt.type) {
.expr => |expr| _ = try self.compileExpr(expr),
.block => |block| {
- const curr = self.current_block;
- const after = try self.switchToNewBlock();
try self.compileBlock(block);
- const b = self.current_block;
- try self.addInstr(.{
- .loc = stmt.loc,
- .type = .{ .jump = .{ .to = after } },
- });
- self.current_block = curr;
- try self.addInstr(.{
- .loc = stmt.loc,
- .type = .{ .jump = .{ .to = b } },
- });
- self.current_block = after;
},
- .declare_var => |declare_var| {
- const val = try self.compileExpr(declare_var.value);
- const name = declare_var.ident.getIdent(self.source);
- try self.scope.locals.put(self.allocator, name, val);
+ .assign_var => |assign_var| {
+ const local = if (assign_var.is_decl) blk: {
+ const local = self.lvar_ctr.get();
+ const name = assign_var.ident.getIdent(self.source);
+ try self.scope.locals.put(self.allocator, name, local);
+ break :blk local;
+ } else blk: {
+ var scope: ?*Scope = &self.scope;
+ while (scope) |s| : (scope = s.parent) {
+ if (s.locals.get(assign_var.ident.getIdent(self.source))) |local|
+ break :blk local;
+ } else return error.UnknownVariable;
+ };
+
+ const val = try self.compileExpr(assign_var.value);
+ try self.assignLocalVar(local, val);
},
}
}
fn compileExpr(self: *CompileContext, expr: *const parse.Expr) !VReg {
// This is not used by all expression types, but creating an unused virtual register is not a problem.
- const dest = self.register();
+ const dest = self.vreg_ctr.get();
switch (expr.type) {
.integer_literal => try self.addInstr(.{
.loc = expr.loc,
@@ -301,8 +382,8 @@ const CompileContext = struct {
.identifier => {
var scope: ?*Scope = &self.scope;
while (scope) |s| : (scope = s.parent) {
- if (s.locals.get(expr.loc.getIdent(self.source))) |reg| {
- return reg;
+ if (s.locals.get(expr.loc.getIdent(self.source))) |local| {
+ return self.getLocalVar(local);
}
}
return error.UnknownVariable;
@@ -313,20 +394,21 @@ const CompileContext = struct {
const after = try self.switchToNewBlock();
+ const t = try self.switchToNewBlock();
try self.compileBlock(@"if".then);
try self.addInstr(.{
.loc = expr.loc,
.type = .{ .jump = .{ .to = after } },
});
- const t = self.current_block;
const f = if (@"if".@"else") |@"else"| blk: {
+ const f = try self.switchToNewBlock();
try self.compileBlock(@"else");
try self.addInstr(.{
.loc = expr.loc,
.type = .{ .jump = .{ .to = after } },
});
- break :blk self.current_block;
+ break :blk f;
} else after;
self.current_block = curr;
@@ -334,8 +416,8 @@ const CompileContext = struct {
.loc = expr.loc,
.type = .{ .branch = .{
.cond = cond,
- .true = t,
- .false = f,
+ .true = .{ .to = t },
+ .false = .{ .to = f },
} },
});
@@ -345,9 +427,83 @@ const CompileContext = struct {
return dest;
}
+ fn assignArguments(self: *CompileContext) !void {
+ var immediate_predecessors: std.AutoHashMapUnmanaged(usize, std.ArrayListUnmanaged(usize)) = .empty;
+ defer immediate_predecessors.deinit(self.allocator);
+ defer {
+ var it = immediate_predecessors.valueIterator();
+ while (it.next()) |val| val.deinit(self.allocator);
+ }
+
+ for (self.blocks.items, 0..) |block, i| {
+ switch (block.instrs.getLast().type) {
+ .branch => |branch| {
+ const pt = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(branch.true.to));
+ if (!pt.found_existing) {
+ pt.value_ptr.* = .empty;
+ }
+ try pt.value_ptr.append(self.allocator, i);
+
+ const pf = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(branch.false.to));
+ if (!pf.found_existing) {
+ pf.value_ptr.* = .empty;
+ }
+ try pf.value_ptr.append(self.allocator, i);
+ },
+ .jump => |jump| {
+ const p = try immediate_predecessors.getOrPut(self.allocator, @intFromEnum(jump.to));
+ if (!p.found_existing) {
+ p.value_ptr.* = .empty;
+ }
+ try p.value_ptr.append(self.allocator, i);
+ },
+ else => {},
+ }
+ }
+
+ var block_ids: std.ArrayListUnmanaged(usize) = .empty;
+ for (0..self.blocks.items.len) |i| try block_ids.append(self.allocator, i);
+ while (block_ids.pop()) |ref| {
+ const block = &self.blocks.items[ref];
+ const info = &self.block_infos.items[ref];
+ const ty = &block.instrs.items[block.instrs.items.len - 1].type;
+ const got_new_params = switch (ty.*) {
+ .branch => blk: {
+ const t = try self.assignArgumentsInner(block, info, &ty.branch.true);
+ const f = try self.assignArgumentsInner(block, info, &ty.branch.false);
+ break :blk t or f;
+ },
+ .jump => try self.assignArgumentsInner(block, info, &ty.jump),
+ else => false,
+ };
+ if (got_new_params) {
+ try block_ids.appendSlice(self.allocator, immediate_predecessors.get(ref).?.items);
+ }
+ }
+ }
+
+ fn assignArgumentsInner(self: *CompileContext, block: *BasicBlock, info: *BasicBlock.Info, jump: *Instr.Jump) !bool {
+ var got_new_params = false;
+ var it = self.blocks.items[@intFromEnum(jump.to)].params.iterator();
+ // Skip over the arguments that we've already handled in an earlier call
+ for (0..jump.args.items.len) |_| _ = it.next();
+ while (it.next()) |ent| {
+ try jump.args.append(self.allocator, info.local_current_vreg.get(ent.key_ptr.*) orelse blk: {
+ const reg = self.vreg_ctr.get();
+ try block.params.put(self.allocator, ent.key_ptr.*, reg);
+ try info.local_current_vreg.put(self.allocator, ent.key_ptr.*, reg);
+ got_new_params = true;
+ break :blk reg;
+ });
+ }
+
+ return got_new_params;
+ }
+
fn switchToNewBlock(self: *CompileContext) !BlockRef {
const ref: BlockRef = @enumFromInt(self.blocks.items.len);
try self.blocks.append(self.allocator, .{});
+ try self.block_infos.append(self.allocator, .{});
self.current_block = ref;
return ref;
}
@@ -357,9 +513,21 @@ const CompileContext = struct {
.instrs.append(self.allocator, instr);
}
- fn register(self: *CompileContext) VReg {
- const reg: VReg = @enumFromInt(self.register_counter);
- self.register_counter += 1;
- return reg;
+ fn getLocalVar(self: *CompileContext, local: LVar) !VReg {
+ if (self.block_infos.items[@intFromEnum(self.current_block)]
+ .local_current_vreg.get(local)) |vreg| return vreg;
+
+ const vreg = self.vreg_ctr.get();
+ try self.blocks.items[@intFromEnum(self.current_block)]
+ .params.put(self.allocator, local, vreg);
+ try self.block_infos.items[@intFromEnum(self.current_block)]
+ .local_current_vreg.put(self.allocator, local, vreg);
+
+ return vreg;
+ }
+
+ fn assignLocalVar(self: *CompileContext, local: LVar, vreg: VReg) !void {
+ try self.block_infos.items[@intFromEnum(self.current_block)]
+ .local_current_vreg.put(self.allocator, local, vreg);
}
};