Parse positive numeric literals

This commit is contained in:
Joscha 2022-11-17 16:52:37 +01:00
parent f1eca2be57
commit 7f0f886fb7
2 changed files with 62 additions and 6 deletions

View file

@ -1,18 +1,22 @@
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Ident(pub String); pub struct Ident(pub String);
/// Positive number literal.
///
/// Possible bases are binary, decimal, hexadecimal. Underscores can be inserted
/// before and after any digit.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum NumLit { pub enum NumLit {
/// - `0b_0001_1011` /// - `0b_0001_1011`
/// - `-0b10` /// - `0b10`
Bin(i64, String), Bin(i64, String),
/// - `12_345` /// - `12_345`
/// - `-7` /// - `7`
Dec(i64, String), Dec(i64, String),
/// - `0x_c0_f3` /// - `0x_c0_f3`
/// - `-0xB` /// - `0xB`
Hex(i64, String), Hex(i64, String),
} }

View file

@ -1,11 +1,63 @@
use chumsky::prelude::*; use chumsky::prelude::*;
use chumsky::text::Character;
use crate::ast::Ident; use crate::ast::{Ident, NumLit};
fn ident() -> impl Parser<char, Ident, Error = Simple<char>> { fn ident() -> impl Parser<char, Ident, Error = Simple<char>> {
text::ident().map(Ident) text::ident().map(Ident)
} }
pub fn parser() -> impl Parser<char, Ident, Error = Simple<char>> { fn num_lit_radix_digits(
ident().padded().then_ignore(end()) radix: u32,
max_digits: usize,
) -> impl Parser<char, (i64, String), Error = Simple<char>> {
filter(move |c: &char| c.is_digit(radix) || *c == '_')
.repeated()
.at_least(1)
.try_map(move |chars, span| {
let text = chars.iter().copied().collect::<String>();
let digits = chars.into_iter().filter(|c| *c != '_').collect::<String>();
if digits.len() > max_digits {
return Err(Simple::custom(span, "number out of range"));
}
let number = u64::from_str_radix(&digits, radix).unwrap();
if number > i64::MAX as u64 {
return Err(Simple::custom(span, "number out of range"));
}
Ok((number as i64, text))
})
}
fn num_lit_bin_digits() -> impl Parser<char, (i64, String), Error = Simple<char>> {
// u64::MAX in binary is 0b_1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
// 63 digits are necessary to represent the full range.
const MAX_BIN_DIGITS: usize = 63;
just("0b").ignore_then(num_lit_radix_digits(2, MAX_BIN_DIGITS))
}
fn num_lit_dec_digits() -> impl Parser<char, (i64, String), Error = Simple<char>> {
// u64::MAX in decimal is 9_223_372_036_854_775_807
// 19 digits are necessary to represent the full range.
const MAX_DEC_DIGITS: usize = 19;
num_lit_radix_digits(10, MAX_DEC_DIGITS)
}
fn num_lit_hex_digits() -> impl Parser<char, (i64, String), Error = Simple<char>> {
// u64::MAX in hexadecimal is 0x_7fff_ffff_ffff_ffff
// 16 digits are necessary to represent the full range.
const MAX_HEX_DIGITS: usize = 16;
just("0x").ignore_then(num_lit_radix_digits(16, MAX_HEX_DIGITS))
}
fn num_lit() -> impl Parser<char, NumLit, Error = Simple<char>> {
(num_lit_bin_digits().map(|(num, text)| NumLit::Bin(num, text)))
.or(num_lit_hex_digits().map(|(num, text)| NumLit::Hex(num, text)))
.or(num_lit_dec_digits().map(|(num, text)| NumLit::Dec(num, text)))
}
pub fn parser() -> impl Parser<char, NumLit, Error = Simple<char>> {
num_lit().padded().then_ignore(end())
} }