Partially implement phase 1 parsing

This commit introduces a few --pedantic warnings. Those will (hopefully) be
fixed in the next commit.
This commit is contained in:
Joscha 2020-03-30 18:57:24 +00:00
parent b15606a530
commit ada200bf50
2 changed files with 134 additions and 38 deletions

View file

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
module Mima.Asm.Phase1 module Mima.Asm.Phase1
( Onion(..) ( Onion(..)
-- * Types -- * Types
@ -17,11 +19,18 @@ module Mima.Asm.Phase1
, parsePhase1 , parsePhase1
) where ) where
import Control.Monad
import Control.Monad.Trans.Class
import Control.Monad.Trans.Writer
import qualified Data.Aeson as A import qualified Data.Aeson as A
import Data.Char import Data.Char
import Data.Foldable
import Data.Monoid
import qualified Data.Text as T import qualified Data.Text as T
import Data.Void import Data.Void
import Text.Megaparsec import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer
import qualified Mima.Vm.Instruction as Vm import qualified Mima.Vm.Instruction as Vm
import qualified Mima.Vm.Word as Vm import qualified Mima.Vm.Word as Vm
@ -58,7 +67,7 @@ instance Onion Name where
data Address a data Address a
= AddressAbsolute a Vm.MimaAddress = AddressAbsolute a Vm.MimaAddress
| AddressRelative a Int | AddressRelative a Integer
deriving (Show) deriving (Show)
instance Onion Address where instance Onion Address where
@ -110,20 +119,22 @@ instance Onion Instruction where
peel (SmallInstruction a _ _) = a peel (SmallInstruction a _ _) = a
peel (LargeInstruction a _ _) = a peel (LargeInstruction a _ _) = a
-- | The first @a@ parameter represents the span of the whole thing. The second
-- @a@ parameter represents the span of the directive literal (e. g. @.org@).
data RegisterDirective a data RegisterDirective a
= RegIar a (Location a) = RegIar a a (Location a)
| RegAcc a (MimaWord a) | RegAcc a a (MimaWord a)
| RegRa a (Location a) | RegRa a a (Location a)
| RegSp a (Location a) | RegSp a a (Location a)
| RegFp a (Location a) | RegFp a a (Location a)
deriving (Show) deriving (Show)
instance Onion RegisterDirective where instance Onion RegisterDirective where
peel (RegIar a _) = a peel (RegIar a _ _) = a
peel (RegAcc a _) = a peel (RegAcc a _ _) = a
peel (RegRa a _) = a peel (RegRa a _ _) = a
peel (RegSp a _) = a peel (RegSp a _ _) = a
peel (RegFp a _) = a peel (RegFp a _ _) = a
data JsonValue a = JsonValue a A.Value data JsonValue a = JsonValue a A.Value
deriving (Show) deriving (Show)
@ -167,27 +178,120 @@ instance Onion AsmToken where
{- Parsers -} {- Parsers -}
type Parser = Parsec Void T.Text type Phase1 = [AsmToken Span]
type Parser = WriterT (Endo Phase1) (Parsec Void T.Text)
data Span = Span SourcePos SourcePos data Span = Span SourcePos SourcePos
deriving (Show) deriving (Show)
withSpan :: Parser a -> Parser (a, Span) addTokens :: [AsmToken Span] -> Parser ()
addTokens = tell . Endo . (++)
addToken :: AsmToken Span -> Parser ()
addToken t = addTokens [t]
withSpan :: Parser a -> Parser (Span, a)
withSpan f = do withSpan f = do
start <- getSourcePos start <- getSourcePos
result <- f result <- f
stop <- getSourcePos stop <- getSourcePos
pure (result, Span start stop) pure (Span start stop, result)
name :: Parser (Name Span) name :: Parser (Name Span)
name = do name = fmap (uncurry Name) $ withSpan $ do
(a, s) <- withSpan $ do
firstChar <- satisfy isLower <?> "lowercase character" firstChar <- satisfy isLower <?> "lowercase character"
otherChars <- takeWhileP (Just "alphanumeric character") isAlphaNum otherChars <- takeWhileP (Just "alphanumeric character") isAlphaNum
pure $ T.pack [firstChar] <> otherChars pure $ T.pack [firstChar] <> otherChars
pure $ Name s a
type Phase1 = [AsmToken Span] number :: (Num a) => Parser a
number =
(chunk "0b" *> binary) <|>
(chunk "0o" *> octal) <|>
(chunk "0x" *> hexadecimal) <|>
decimal
signedNumber :: (Num a) => Parser a
signedNumber = signed empty number
boundedNumber :: (Bounded n, Num n) => Parser n
boundedNumber = do
n <- signedNumber :: Parser Integer
when (n < minVal || n > maxVal) $ fail $
"invalid range: " ++
show n ++ " is not between " ++
show minVal ++ " and " ++ show maxVal
pure $ fromInteger n
where
maxVal = toInteger (maxBound :: Vm.MimaWord)
minVal = -(maxVal + 1)
address :: Parser (Address Span)
address =
fmap (uncurry AddressAbsolute) (withSpan boundedNumber) <|>
fmap (uncurry AddressRelative) (withSpan signedNumber)
location :: Parser (Location Span)
location = (LocationAddress <$> address) <|> (LocationLabel <$> name)
smallOpcode :: Parser (SmallOpcode Span)
smallOpcode = asum $ map parseOpcode [minBound..maxBound]
where
parseOpcode o = do
(s, _) <- withSpan $ chunk $ T.pack $ show o
pure $ SmallOpcode s o
largeOpcode :: Parser (LargeOpcode Span)
largeOpcode = asum $ map parseOpcode [minBound..maxBound]
where
parseOpcode o = do
(s, _) <- withSpan $ chunk $ T.pack $ show o
pure $ LargeOpcode s o
mimaWord :: Parser (MimaWord Span)
mimaWord =
(uncurry WordRaw <$> withSpan boundedNumber) <|> (WordLocation <$> location)
smallValue :: Parser (SmallValue Span)
smallValue = uncurry SmallValue <$> withSpan boundedNumber
instruction :: Parser (Instruction Span)
instruction = small <|> large
where
small = do
start <- getSourcePos
so <- smallOpcode
space1
loc <- location
stop <- getSourcePos
pure $ SmallInstruction (Span start stop) so loc
large = do
start <- getSourcePos
lo <- largeOpcode
space1
sv <- smallValue
stop <- getSourcePos
pure $ LargeInstruction (Span start stop) lo sv
singleDirective
:: (Span -> Span -> a -> b Span)
-> T.Text
-> Parser a
-> Parser (b Span)
singleDirective f t p = do
(outerSpan, (nameSpan, a)) <- withSpan $ do
(nameSpan, _) <- withSpan $ chunk t
space1
a <- p
pure (nameSpan, a)
pure $ f outerSpan nameSpan a
registerDirective :: Parser (RegisterDirective Span)
registerDirective =
singleDirective RegIar "IAR" location <|>
singleDirective RegAcc "ACC" mimaWord <|>
singleDirective RegRa "RA" location <|>
singleDirective RegSp "SP" location <|>
singleDirective RegFp "FP" location
parsePhase1 :: Parser Phase1 parsePhase1 :: Parser Phase1
parsePhase1 = undefined name parsePhase1 = undefined

View file

@ -17,15 +17,11 @@ import Mima.Vm.Word
data SmallOpcode = LDC | LDV | STV | ADD | AND | OR | XOR | EQL data SmallOpcode = LDC | LDV | STV | ADD | AND | OR | XOR | EQL
| JMP | JMN | LDIV | STIV | CALL | ADC | JMP | JMN | LDIV | STIV | CALL | ADC
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord, Bounded, Enum)
instance ToText SmallOpcode where instance ToText SmallOpcode where
toText = T.pack . show toText = T.pack . show
allSmallOpcodes :: [SmallOpcode]
allSmallOpcodes = [LDC, LDV, STV, ADD, AND, OR, XOR, EQL,
JMP, JMN, LDIV, STIV, CALL, ADC]
smallOpcodeNr :: SmallOpcode -> Opcode smallOpcodeNr :: SmallOpcode -> Opcode
smallOpcodeNr LDC = 0 smallOpcodeNr LDC = 0
smallOpcodeNr LDV = 1 smallOpcodeNr LDV = 1
@ -43,19 +39,15 @@ smallOpcodeNr CALL = 12
smallOpcodeNr ADC = 13 smallOpcodeNr ADC = 13
smallOpcodeMap :: Map.Map Opcode SmallOpcode smallOpcodeMap :: Map.Map Opcode SmallOpcode
smallOpcodeMap = Map.fromList [(smallOpcodeNr so, so) | so <- allSmallOpcodes] smallOpcodeMap = Map.fromList [(smallOpcodeNr so, so) | so <- [minBound..maxBound]]
data LargeOpcode = HALT | NOT | RAR | RET | LDRA | STRA | LDSP | STSP data LargeOpcode = HALT | NOT | RAR | RET | LDRA | STRA | LDSP | STSP
| LDFP | STFP | LDRS | STRS | LDRF | STRF | LDFP | STFP | LDRS | STRS | LDRF | STRF
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord, Bounded, Enum)
instance ToText LargeOpcode where instance ToText LargeOpcode where
toText = T.pack . show toText = T.pack . show
allLargeOpcodes :: [LargeOpcode]
allLargeOpcodes = [HALT, NOT, RAR, RET, LDRA, STRA, LDSP, STSP,
LDFP, STFP, LDRS, STRS, LDRF, STRF]
largeOpcodeNr :: LargeOpcode -> Opcode largeOpcodeNr :: LargeOpcode -> Opcode
largeOpcodeNr HALT = 0 largeOpcodeNr HALT = 0
largeOpcodeNr NOT = 1 largeOpcodeNr NOT = 1
@ -73,7 +65,7 @@ largeOpcodeNr LDRF = 12
largeOpcodeNr STRF = 13 largeOpcodeNr STRF = 13
largeOpcodeMap :: Map.Map Opcode LargeOpcode largeOpcodeMap :: Map.Map Opcode LargeOpcode
largeOpcodeMap = Map.fromList [(largeOpcodeNr lo, lo) | lo <- allLargeOpcodes] largeOpcodeMap = Map.fromList [(largeOpcodeNr lo, lo) | lo <- [minBound..maxBound]]
argumentIsOptional :: LargeOpcode -> Bool argumentIsOptional :: LargeOpcode -> Bool
argumentIsOptional HALT = True argumentIsOptional HALT = True