const std = @import("std"); pub const Token = struct { loc: Location, type: Type, pub const Type = enum { left_paren, right_paren, left_curly, right_curly, integer_literal, plus, minus, equal, invalid, eof, identifier, left_angle, right_angle, left_angle_equal, right_angle_equal, // Keywords let, @"if", @"else", @"while", }; }; pub const Location = struct { start: usize, end: usize, pub fn combine(a: Location, b: Location) Location { std.debug.assert(a.end <= b.start); return .{ .start = @min(a.start, b.start), .end = @max(a.end, b.end) }; } /// Assumes that the location comes directly from an `integer_literal` token. pub fn getInt(self: Location, file_source: []const u8) u64 { var value: u64 = 0; for (file_source[self.start..self.end]) |c| { std.debug.assert('0' <= c and c <= '9'); value = value * 10 + (c - '0'); } return value; } /// Assumes that the location comes directly from an `identifier` token. pub fn getIdent(self: Location, file_source: []const u8) []const u8 { return file_source[self.start..self.end]; } }; pub fn peek(self: *Self) Token { if (self.peeked == null) { self.peeked = self.getNext(); } return self.peeked.?; } pub fn next(self: *Self) Token { const token = self.peek(); self.peeked = null; return token; } source: []const u8, start: usize = 0, pos: usize = 0, peeked: ?Token = null, fn getNext(self: *Self) Token { self.start = self.pos; return s: switch (self.eatChar() orelse return self.create(.eof)) { '(' => self.create(.left_paren), ')' => self.create(.right_paren), '{' => self.create(.left_curly), '}' => self.create(.right_curly), '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => self.integerLiteral(), '+' => self.create(.plus), '-' => self.create(.minus), '=' => self.create(.equal), '<' => if (self.eatIfEqual('=')) self.create(.left_angle_equal) else self.create(.left_angle), '>' => if (self.eatIfEqual('=')) self.create(.right_angle_equal) else self.create(.right_angle), '#' => { while ((self.eatChar() orelse '\n') != '\n') {} self.start = self.pos; continue :s (self.eatChar() orelse return self.create(.eof)); }, ' ', '\n' => { self.start = self.pos; continue :s (self.eatChar() orelse return self.create(.eof)); }, else => |c| if ('a' <= c and c <= 'z' or 'A' <= c and c <= 'Z') self.identifierOrKeyword() else self.create(.invalid), }; } fn integerLiteral(self: *Self) Token { var value: ?u64 = self.source[self.start] - '0'; while (digitValue(self.peekChar())) |v| { var nxt: ?u64 = null; if (value) |val| if (std.math.mul(u64, val, 10) catch null) |p| if (std.math.add(u64, p, v) catch null) |s| { nxt = s; }; value = nxt; _ = self.eatChar(); } return if (value != null) self.create(.integer_literal) else self.create(.invalid); } fn identifierOrKeyword(self: *Self) Token { while (true) { const c = self.peekChar() orelse 0; if ('a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_') { _ = self.eatChar(); continue; } const value = self.source[self.start..self.pos]; return self.create(switch (std.meta.stringToEnum(Token.Type, value) orelse .invalid) { .let, .@"if", .@"else", .@"while" => |t| t, else => .identifier, }); } } fn create(self: *Self, ty: Token.Type) Token { const start = self.start; return .{ .loc = .{ .start = start, .end = self.pos }, .type = ty }; } fn eatIfEqual(self: *Self, char: u8) bool { if (self.peekChar() == char) { _ = self.eatChar(); return true; } return false; } fn eatChar(self: *Self) ?u8 { const token = self.peekChar(); if (token != null) self.pos += 1; return token; } fn peekChar(self: *Self) ?u8 { return if (self.pos < self.source.len) self.source[self.pos] else null; } const Self = @This(); fn digitValue(c: ?u8) ?u8 { return switch (c orelse return null) { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => c.? - '0', else => null, }; }