From 5749be69125dc87ac50742295272a7e21f4f472e Mon Sep 17 00:00:00 2001 From: Mathias Magnusson Date: Sat, 31 May 2025 22:54:26 +0200 Subject: codegen integer literals correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was not as easy as one would expect ☠️ --- src/codegen.zig | 36 ++++++++++++++++++++++++++---------- src/compile.zig | 2 +- src/lexer.zig | 28 ++++++++++++++++++++++++++-- src/main.zig | 2 +- src/parse.zig | 9 --------- 5 files changed, 54 insertions(+), 23 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index b0bf5c5..368309c 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -485,20 +485,36 @@ const Context = struct { } } - fn genConstant(self: *Context, constant: compile.Instr.Constant) !void { - const reg = self.register_allocator.allocate(constant.dest) orelse return error.OutOfRegisters; - - if (constant.value <= std.math.maxInt(i12)) { - try self.emit(.addi(reg, .zero, @intCast(constant.value))); - } else if (constant.value <= std.math.maxInt(i32)) { - // If the higest bit in the immediate in addi is set, it will be sign extended. We negate that by adding one more to the immediate for lui. - try self.emit(.lui(reg, @intCast((constant.value >> 12) + if (constant.value & (1 << 11) != 0) @as(u64, 1) else 0))); - try self.emit(.addi(reg, reg, @bitCast(@as(u12, @truncate(constant.value))))); + fn genConstantInner(self: *Context, reg: Register, value: u64) !void { + if (value <= std.math.maxInt(i12)) { + // If the highest bit is set, we will get sign extension from this, but it will be + // cleared by the next addiw. + try self.emit(Instruction.addi(reg, .zero, @intCast(value))); + } else if (value <= std.math.maxInt(i32)) { + const lower: u12 = @truncate(value); + const upper: u20 = @truncate(if (lower >> 11 == 1) (value >> 12) + 1 else value >> 12); + // If the highest bit in upper is set here, we will get an unwanted sign extension, but + // that will be cleared in the `addiw` right afterwards. If the highest bit was set, + // then that must be because (lower >> 11 == 1) happened, so lower is not zero, thus the + // `addiw` is guaranteed to be run. The only other way the highest bit can be set would + // be if value had `1 << 32 == 1`, but then it would not be smaller than + // `std.math.maxInt(i32)`. + try self.emit(.lui(reg, @bitCast(upper))); + if (lower > 0) try self.emit(.addiw(reg, reg, @bitCast(lower))); } else { - unreachable; // TODO + const thisVal: u12 = @truncate(value); + const nextVal = if (thisVal >> 11 == 1) (value >> 12) + 1 else value >> 12; + try self.genConstantInner(reg, nextVal); + try self.emit(.slli(reg, reg, 12)); // TODO: sometimes these `slli`s can be collapsed + if (thisVal > 0) try self.emit(.addi(reg, reg, @bitCast(thisVal))); } } + fn genConstant(self: *Context, constant: compile.Instr.Constant) !void { + const reg = self.register_allocator.allocate(constant.dest) orelse return error.OutOfRegisters; + try self.genConstantInner(reg, constant.value); + } + fn genBinOp(self: *Context, bin_op: compile.Instr.BinOp) !void { const lhs = self.register_allocator.get(bin_op.lhs); const rhs = self.register_allocator.get(bin_op.rhs); diff --git a/src/compile.zig b/src/compile.zig index ad5fc59..c723842 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -102,7 +102,7 @@ const CompileContext = struct { switch (expr.type) { .integer_literal => try addInstr(self, .{ .loc = expr.loc, - .type = .{ .constant = .{ .dest = dest, .value = expr.getInt(self.source) } }, + .type = .{ .constant = .{ .dest = dest, .value = expr.loc.getInt(self.source) } }, }), .bin_op => |binop| { const lhs = try self.compileExpr(binop.lhs); diff --git a/src/lexer.zig b/src/lexer.zig index 67beec9..8586765 100644 --- a/src/lexer.zig +++ b/src/lexer.zig @@ -1,3 +1,5 @@ +const std = @import("std"); + pub const Token = struct { loc: Location, type: Type, @@ -20,6 +22,16 @@ pub const Location = struct { if (a.end > b.start) unreachable; return .{ .start = @min(a.start, b.start), .end = @max(a.end, b.end) }; } + + /// Assumes that the location comes directly from an `integer_literal` token. + pub fn getInt(self: Location, file_source: []const u8) u64 { + var value: u64 = 0; + for (file_source[self.start..self.end]) |c| { + std.debug.assert('0' <= c and c <= '9'); + value = value * 10 + (c - '0'); + } + return value; + } }; source: []const u8, @@ -41,10 +53,22 @@ pub fn next(self: *Self) ?Token { } fn integerLiteral(self: *Self) Token { - while (digitValue(self.peek()) != null) { + var value: ?u64 = 0; + while (digitValue(self.peek())) |v| { + var nxt: ?u64 = null; + if (value) |val| + if (std.math.mul(u64, val, 10) catch null) |p| + if (std.math.add(u64, p, v) catch null) |s| { + nxt = s; + }; + + value = nxt; _ = self.eat(); } - return self.create(.{ .integer_literal = {} }); + return if (value != null) + self.create(.{ .integer_literal = {} }) + else + self.create(.{ .invalid = {} }); } fn create(self: *Self, tajp: Token.Type) Token { diff --git a/src/main.zig b/src/main.zig index 8f81f1d..ef90c79 100644 --- a/src/main.zig +++ b/src/main.zig @@ -32,7 +32,7 @@ pub fn main() !void { // try stdout.print("{s}\n", .{line.items}); // } - const source = "420 + 1337 + 42"; + const source = "17216961135462248174 + 4095 + 4294967295 + 2147483647"; // var lexer = Lexer{ .source = source }; // while (true) { // const token = lexer.next().?; diff --git a/src/parse.zig b/src/parse.zig index e0c1fd0..2149cd7 100644 --- a/src/parse.zig +++ b/src/parse.zig @@ -26,15 +26,6 @@ pub const Expr = struct { }; }; }; - - pub fn getInt(self: *const @This(), file_source: []const u8) u64 { - var value: u64 = 0; - for (file_source[self.loc.start..self.loc.end]) |c| { - std.debug.assert('0' <= c and c <= '9'); - value = value * 10 + c - '0'; - } - return value; - } }; pub fn expression(allocator: Allocator, lexer: *Peekable(Lexer)) error{OutOfMemory}!*Expr { -- cgit v1.2.3