From 5a421bb91780e74404d83df2e99d7469b3cb8b90 Mon Sep 17 00:00:00 2001 From: Mathias Magnusson Date: Tue, 3 Jun 2025 00:34:15 +0200 Subject: add { blocks } with scoped local variables --- src/Lexer.zig | 4 +++ src/codegen.zig | 6 ++-- src/compile.zig | 54 +++++++++++++++++++++++------ src/main.zig | 21 ++++++----- src/parse.zig | 106 ++++++++++++++++++++++++++++++++++++++------------------ 5 files changed, 135 insertions(+), 56 deletions(-) diff --git a/src/Lexer.zig b/src/Lexer.zig index c8f19c6..f5d8e95 100644 --- a/src/Lexer.zig +++ b/src/Lexer.zig @@ -7,6 +7,8 @@ pub const Token = struct { pub const Type = enum { left_paren, right_paren, + left_curly, + right_curly, integer_literal, plus, minus, @@ -69,6 +71,8 @@ fn getNext(self: *Self) Token { return s: switch (self.eatChar() orelse return self.create(.eof)) { '(' => self.create(.left_paren), ')' => self.create(.right_paren), + '{' => self.create(.left_curly), + '}' => self.create(.right_curly), '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => self.integerLiteral(), '+' => self.create(.plus), '-' => self.create(.minus), diff --git a/src/codegen.zig b/src/codegen.zig index f186304..dd484b0 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -548,7 +548,7 @@ const Context = struct { instructions: std.ArrayList(Instruction), // Current stuff that changes often, basically here to avoid prop drilling. - block: ?*const compile.Block = null, + block: ?*const compile.BasicBlock = null, current_instruction_index: ?usize = null, fn deinit(self: *Context) void { @@ -716,7 +716,7 @@ const Context = struct { } } - fn codegenBlock(self: *Context, block: compile.Block) !void { + fn codegenBlock(self: *Context, block: compile.BasicBlock) !void { self.block = █ defer self.block = null; for (block.instrs, 0..) |instr, i| { @@ -726,7 +726,7 @@ const Context = struct { } }; -pub fn create_elf(allocator: Allocator, block: compile.Block) ![]u8 { +pub fn create_elf(allocator: Allocator, block: compile.BasicBlock) ![]u8 { var ctx: Context = .{ .register_allocator = try .init(allocator), .instructions = .init(allocator) }; defer ctx.deinit(); diff --git a/src/compile.zig b/src/compile.zig index ac7768e..5d57e71 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -72,12 +72,12 @@ pub const Instr = struct { pub const Sources = std.BoundedArray(VReg, 2); }; -pub const Block = struct { +pub const BasicBlock = struct { // arguments: []Reg, instrs: []Instr, vreg_last_use: std.AutoHashMap(VReg, usize), - fn init(allocator: Allocator, instrs: []Instr) !Block { + fn init(allocator: Allocator, instrs: []Instr) !BasicBlock { var vreg_last_use: std.AutoHashMap(VReg, usize) = .init(allocator); for (0.., instrs) |i, instr| { for (instr.sources().slice()) |src| @@ -92,41 +92,67 @@ pub const Block = struct { } }; -pub fn compile(allocator: Allocator, source: []const u8, stmts: []parse.Stmt) !Block { +pub fn compile(allocator: Allocator, source: []const u8, block: parse.Block) !BasicBlock { const instrs: std.ArrayListUnmanaged(Instr) = try .initCapacity(allocator, 0); var ctx: CompileContext = .{ .allocator = allocator, .source = source, .register_counter = 0, - .scope = .empty, + .scope = .{ .locals = .empty, .parent = null }, .instrs = instrs, }; - for (stmts) |stmt| { - try ctx.compileStmt(stmt); - } + try ctx.compileBlock(block); return .init(allocator, ctx.instrs.items); } +const CompileError = error{ + OutOfMemory, + CanOnlyCallIdentifiers, + UnknownProcedure, + UnknownVariable, +}; + const CompileContext = struct { allocator: Allocator, source: []const u8, register_counter: u32, - scope: std.StringHashMapUnmanaged(VReg), + scope: Scope, instrs: std.ArrayListUnmanaged(Instr), + const Scope = struct { + locals: std.StringHashMapUnmanaged(VReg), + parent: ?*Scope, + }; + const Self = @This(); fn addInstr(self: *Self, instr: Instr) !void { try self.instrs.append(self.allocator, instr); } - fn compileStmt(self: *Self, stmt: parse.Stmt) !void { + fn compileBlock(self: *Self, block: parse.Block) !void { + const parent = try self.allocator.create(Scope); + defer self.allocator.destroy(parent); + parent.* = self.scope; + self.scope = .{ + .locals = .empty, + .parent = parent, + }; + for (block.stmts) |stmt| { + try self.compileStmt(stmt); + } + self.scope.locals.deinit(self.allocator); + self.scope = parent.*; + } + + fn compileStmt(self: *Self, stmt: parse.Stmt) CompileError!void { switch (stmt.type) { .expr => |expr| _ = try self.compileExpr(expr), + .block => |block| try self.compileBlock(block), .declare_var => |declare_var| { const val = try self.compileExpr(declare_var.value); const name = declare_var.ident.getIdent(self.source); - try self.scope.put(self.allocator, name, val); + try self.scope.locals.put(self.allocator, name, val); }, } } @@ -168,7 +194,13 @@ const CompileContext = struct { }); }, .identifier => { - return self.scope.get(expr.loc.getIdent(self.source)) orelse return error.UnknownVariable; + var scope: ?*Scope = &self.scope; + while (scope) |s| : (scope = s.parent) { + if (s.locals.get(expr.loc.getIdent(self.source))) |reg| { + return reg; + } + } + return error.UnknownVariable; }, } return dest; diff --git a/src/main.zig b/src/main.zig index a9c77ac..9678d0b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -37,9 +37,15 @@ pub fn main() !void { // } const source = - \\let x = read_int(0); - \\print(18446744073709551615); - \\print(x + x); + \\{ + \\ let x = 69; + \\ { + \\ let x = read_int(0); + \\ print(18446744073709551615); + \\ print(x + x); + \\ } + \\ print(x); + \\} ; var lexer: Lexer = .{ .source = source }; std.debug.print("Tokens:\n", .{}); @@ -49,15 +55,12 @@ pub fn main() !void { if (token.type == .eof) break; } lexer = .{ .source = source }; - const stmts = try parse.statements(allocator, &lexer); - std.debug.print("Statements:\n", .{}); - for (stmts) |stmt| { - std.debug.print(" {}\n", .{stmt.fmt(source)}); - } + const ast = try parse.block(allocator, &lexer); + std.debug.print("Parse tree:\n{}\n", .{parse.fmt(ast, source, 0)}); if (lexer.peek().type != .eof) { std.debug.print("Unexpected token {}, expected end of file\n", .{lexer.next()}); } - const block = try compile.compile(allocator, source, stmts); + const block = try compile.compile(allocator, source, ast); std.debug.print("Bytecode instructions:\n", .{}); for (block.instrs) |instr| { std.debug.print(" {}\n", .{instr}); diff --git a/src/parse.zig b/src/parse.zig index 6ed1b79..b509fe7 100644 --- a/src/parse.zig +++ b/src/parse.zig @@ -5,6 +5,38 @@ const root = @import("root"); const Lexer = root.Lexer; const Token = root.Lexer.Token; +fn Fmt(T: type) type { + return std.fmt.Formatter(struct { + fn format( + data: struct { T, []const u8, usize }, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, + ) !void { + const self, const file_source, const indent = data; + return self.format(writer, file_source, indent); + } + }.format); +} + +pub fn fmt(tree: anytype, source: []const u8, indent: usize) Fmt(@TypeOf(tree)) { + return .{ .data = .{ tree, source, indent } }; +} + +pub const Block = struct { + loc: Lexer.Location, + stmts: []Stmt, + + fn format(self: Block, writer: anytype, source: []const u8, indent: usize) !void { + try writer.writeAll("{\n"); + for (self.stmts) |stmt| { + try writer.print("{}\n", .{fmt(stmt, source, indent + 4)}); + } + try writer.writeByteNTimes(' ', indent); + try writer.writeAll("}"); + } +}; + pub const Stmt = struct { loc: Lexer.Location, type: Type, @@ -12,6 +44,7 @@ pub const Stmt = struct { pub const Type = union(enum) { expr: *const Expr, declare_var: DeclareVar, + block: Block, pub const DeclareVar = struct { ident: Lexer.Location, @@ -19,21 +52,17 @@ pub const Stmt = struct { }; }; - pub fn fmt(self: Stmt, file_source: []const u8) Format { - return .{ .data = .{ self, file_source } }; + fn format(self: Stmt, writer: anytype, source: []const u8, indent: usize) !void { + try writer.writeByteNTimes(' ', indent); + return switch (self.type) { + .expr => |expr| writer.print("{};", .{fmt(expr, source, indent)}), + .block => |b| writer.print("{}", .{fmt(b, source, indent)}), + .declare_var => |declare_var| writer.print("let {s} = {};", .{ + declare_var.ident.getIdent(source), + fmt(declare_var.value, source, indent), + }), + }; } - - const Format = std.fmt.Formatter(struct { - fn format(data: struct { Stmt, []const u8 }, comptime f: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - const self, const file_source = data; - _ = f; - _ = options; - return switch (self.type) { - .expr => |expr| writer.print("{};", .{expr.fmt(file_source)}), - .declare_var => |declare_var| writer.print("let {s} = {};", .{ declare_var.ident.getIdent(file_source), declare_var.value.fmt(file_source) }), - }; - } - }.format); }; pub const Expr = struct { @@ -70,32 +99,36 @@ pub const Expr = struct { }; }; - pub fn fmt(self: Expr, file_source: []const u8) Format { - return .{ .data = .{ self, file_source } }; - } - - const Format = std.fmt.Formatter(struct { - fn format(data: struct { Expr, []const u8 }, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { - const self, const file_source = data; - switch (self.type) { - .integer_literal => try writer.print("{}", .{self.loc.getInt(file_source)}), - .bin_op => |bin_op| try writer.print("({} {} {})", .{ bin_op.lhs.fmt(file_source), bin_op.op, bin_op.rhs.fmt(file_source) }), - .call => |call| try writer.print("{}({})", .{ call.proc.fmt(file_source), call.arg.fmt(file_source) }), - .identifier => try writer.print("{s}", .{self.loc.getIdent(file_source)}), - } + fn format(self: Expr, writer: anytype, source: []const u8, indent: usize) !void { + switch (self.type) { + .integer_literal => try writer.print("{}", .{self.loc.getInt(source)}), + .bin_op => |bin_op| { + try writer.print("{} {} {}", .{ fmt(bin_op.lhs, source, indent), bin_op.op, fmt(bin_op.rhs, source, indent) }); + }, + .call => |call| { + try writer.print("{}({})", .{ fmt(call.proc, source, indent), fmt(call.arg, source, indent) }); + }, + .identifier => try writer.print("{s}", .{self.loc.getIdent(source)}), } - }.format); + } }; -pub fn statements(allocator: Allocator, lexer: *Lexer) ![]Stmt { +const ParseError = error{ OutOfMemory, ExpectedRightParen, UnexpectedToken, ExpectedSemicolon }; + +pub fn block(allocator: Allocator, lexer: *Lexer) !Block { + const left_curly = try mustEat(lexer, .left_curly); var stmts: std.ArrayList(Stmt) = .init(allocator); - while (lexer.peek().type != .eof) { + while (lexer.peek().type != .right_curly) { try stmts.append(try statement(allocator, lexer)); } - return try stmts.toOwnedSlice(); + const right_curly = try mustEat(lexer, .right_curly); + return .{ + .loc = left_curly.loc.combine(right_curly.loc), + .stmts = try stmts.toOwnedSlice(), + }; } -pub fn statement(allocator: Allocator, lexer: *Lexer) !Stmt { +pub fn statement(allocator: Allocator, lexer: *Lexer) ParseError!Stmt { switch (lexer.peek().type) { .let => { const let = lexer.next(); @@ -108,6 +141,13 @@ pub fn statement(allocator: Allocator, lexer: *Lexer) !Stmt { .type = .{ .declare_var = .{ .ident = ident.loc, .value = value } }, }; }, + .left_curly => { + const b = try block(allocator, lexer); + return .{ + .loc = b.loc, + .type = .{ .block = b }, + }; + }, else => { var expr = try expression(allocator, lexer); const semicolon = lexer.next(); @@ -120,7 +160,7 @@ pub fn statement(allocator: Allocator, lexer: *Lexer) !Stmt { } } -pub fn expression(allocator: Allocator, lexer: *Lexer) error{ OutOfMemory, ExpectedRightParen, UnexpectedToken }!*Expr { +pub fn expression(allocator: Allocator, lexer: *Lexer) ParseError!*Expr { return parseTerms(allocator, lexer); } -- cgit v1.2.3