From 7f0f886fb7e1a4c409b78f62be9f8081aa5c6705 Mon Sep 17 00:00:00 2001 From: Joscha Date: Thu, 17 Nov 2022 16:52:37 +0100 Subject: [PATCH] Parse positive numeric literals --- src/ast.rs | 10 ++++++--- src/parser.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 7cc381b..8617d5e 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,18 +1,22 @@ #[derive(Debug, Clone)] pub struct Ident(pub String); +/// Positive number literal. +/// +/// Possible bases are binary, decimal, hexadecimal. Underscores can be inserted +/// before and after any digit. #[derive(Debug, Clone)] pub enum NumLit { /// - `0b_0001_1011` - /// - `-0b10` + /// - `0b10` Bin(i64, String), /// - `12_345` - /// - `-7` + /// - `7` Dec(i64, String), /// - `0x_c0_f3` - /// - `-0xB` + /// - `0xB` Hex(i64, String), } diff --git a/src/parser.rs b/src/parser.rs index 098d1c9..36b8bd0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,11 +1,63 @@ use chumsky::prelude::*; +use chumsky::text::Character; -use crate::ast::Ident; +use crate::ast::{Ident, NumLit}; fn ident() -> impl Parser> { text::ident().map(Ident) } -pub fn parser() -> impl Parser> { - ident().padded().then_ignore(end()) +fn num_lit_radix_digits( + radix: u32, + max_digits: usize, +) -> impl Parser> { + filter(move |c: &char| c.is_digit(radix) || *c == '_') + .repeated() + .at_least(1) + .try_map(move |chars, span| { + let text = chars.iter().copied().collect::(); + let digits = chars.into_iter().filter(|c| *c != '_').collect::(); + if digits.len() > max_digits { + return Err(Simple::custom(span, "number out of range")); + } + let number = u64::from_str_radix(&digits, radix).unwrap(); + if number > i64::MAX as u64 { + return Err(Simple::custom(span, "number out of range")); + } + Ok((number as i64, text)) + }) +} + +fn num_lit_bin_digits() -> impl Parser> { + // u64::MAX in binary is 0b_1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 + // 63 digits are necessary to represent the full range. + const MAX_BIN_DIGITS: usize = 63; + + just("0b").ignore_then(num_lit_radix_digits(2, MAX_BIN_DIGITS)) +} + +fn num_lit_dec_digits() -> impl Parser> { + // u64::MAX in decimal is 9_223_372_036_854_775_807 + // 19 digits are necessary to represent the full range. + const MAX_DEC_DIGITS: usize = 19; + + num_lit_radix_digits(10, MAX_DEC_DIGITS) +} + +fn num_lit_hex_digits() -> impl Parser> { + // u64::MAX in hexadecimal is 0x_7fff_ffff_ffff_ffff + // 16 digits are necessary to represent the full range. + const MAX_HEX_DIGITS: usize = 16; + + just("0x").ignore_then(num_lit_radix_digits(16, MAX_HEX_DIGITS)) +} + +fn num_lit() -> impl Parser> { + (num_lit_bin_digits().map(|(num, text)| NumLit::Bin(num, text))) + .or(num_lit_hex_digits().map(|(num, text)| NumLit::Hex(num, text))) + .or(num_lit_dec_digits().map(|(num, text)| NumLit::Dec(num, text))) +} + +pub fn parser() -> impl Parser> { + num_lit().padded().then_ignore(end()) }