Switch assembler to new parser

This commit is contained in:
Joscha 2019-11-21 18:27:27 +00:00
parent ee5c2762d6
commit fa4cd218ab
7 changed files with 2 additions and 547 deletions

View file

@ -3,9 +3,9 @@ module Main where
import Control.Monad.Trans.Class import Control.Monad.Trans.Class
import Options.Applicative import Options.Applicative
import Mima.Assembler.Parser
import Mima.IO import Mima.IO
import Mima.Load import Mima.Load
import Mima.Parse.Assembly
data Settings = Settings data Settings = Settings
{ infile :: String { infile :: String
@ -33,7 +33,7 @@ main = doRun_ $ do
settings <- lift $ execParser opts settings <- lift $ execParser opts
lift $ putStrLn $ "Loading assembly file at " ++ infile settings lift $ putStrLn $ "Loading assembly file at " ++ infile settings
(state, _) <- loadFile readState (infile settings) (state, _, _) <- loadFile readAssembly (infile settings)
lift $ putStrLn "Parsing successful" lift $ putStrLn "Parsing successful"
lift $ putStrLn $ "Writing result to " ++ outfile settings lift $ putStrLn $ "Writing result to " ++ outfile settings

View file

@ -1,119 +0,0 @@
{-# LANGUAGE TupleSections #-}
module Mima.Assembler.Parser
( parseState
, readState
) where
import Control.Monad
import Control.Monad.Trans.Class
import Control.Monad.Trans.State
import qualified Data.Map as Map
import Data.Maybe
import qualified Data.Set as Set
import qualified Data.Text as T
import Text.Megaparsec
import Mima.Assembler.Parser.Basic
import Mima.Assembler.Parser.Instruction
import Mima.Assembler.Parser.Label
import Mima.Assembler.Parser.RawInstruction
import Mima.Assembler.Parser.Register
import Mima.Parse.Weed
import Mima.State
import Mima.Word
data MyState = MyState
{ sCurrentPos :: MimaAddress
, sLabels :: Map.Map MimaLabel MimaAddress
, sInstructions :: Map.Map MimaAddress (RawInstruction Address)
} deriving (Show)
initialState :: MyState
initialState = MyState 0 Map.empty Map.empty
type SParser a = StatefulParser MyState a
incrementCurrentPos :: SParser ()
incrementCurrentPos = do
s <- get
when (sCurrentPos s == maxBound) empty
put s{sCurrentPos = succ $ sCurrentPos s}
parseInstructions' :: SParser ()
parseInstructions' = sepBy parseInstruction' incrementCurrentPos >> lift eof
where
parseInstruction' :: SParser ()
parseInstruction' = do
s <- get
let currentPos = sCurrentPos s
knownLabels = Map.keysSet $ sLabels s
(actualPos, instruction, labels) <- lift $ parseInstruction currentPos knownLabels
let newLabels = Map.fromList [(l, actualPos) | l <- Set.toList labels]
put s { sCurrentPos = actualPos
, sLabels = Map.union newLabels $ sLabels s
, sInstructions = Map.insert actualPos instruction $ sInstructions s
}
parseInstructions :: Parser (Map.Map MimaLabel MimaAddress, Map.Map MimaAddress (RawInstruction Address))
parseInstructions = do
(_, s) <- runStatefulParser parseInstructions' initialState
pure (sLabels s, sInstructions s)
resolveRegisters :: Map.Map MimaLabel MimaAddress
-> Registers Address
-> Parser (Registers MimaAddress)
resolveRegisters labels reg = do
iar <- resolveMaybeAddress $ regIAR reg
ra <- resolveMaybeAddress $ regRA reg
sp <- resolveMaybeAddress $ regSP reg
fp <- resolveMaybeAddress $ regFP reg
pure reg{regIAR = iar, regRA = ra, regSP = sp, regFP = fp}
where
resolveMaybeAddress :: Maybe Address -> Parser (Maybe MimaAddress)
resolveMaybeAddress (Just addr) = Just <$> resolveAddress labels addr
resolveMaybeAddress Nothing = pure Nothing
resolveRawInstruction :: Map.Map MimaLabel MimaAddress
-> RawInstruction Address
-> Parser (RawInstruction MimaAddress)
resolveRawInstruction _ (RawLIT word) = pure $ RawLIT word
resolveRawInstruction _ (RawLargeInstruction lo sv) = pure $ RawLargeInstruction lo sv
resolveRawInstruction labels (RawSmallInstruction so lv) = do
addr <- resolveAddress labels lv
pure $ RawSmallInstruction so addr
resolveLabels :: Map.Map MimaLabel MimaAddress
-> Map.Map MimaAddress (RawInstruction Address)
-> Parser (Map.Map MimaAddress (RawInstruction MimaAddress))
resolveLabels labels rawLabeledInstructions = do
let labeledInstrList = Map.toList rawLabeledInstructions
resolve = resolveRawInstruction labels
instrList <- forM labeledInstrList $ \(addr, instr) -> (addr,) <$> resolve instr
let rawInstructions = Map.fromList instrList
pure rawInstructions
stateFromRegisters :: Registers MimaAddress -> MimaMemory -> MimaState
stateFromRegisters reg mem =
MimaState { msIAR = fromMaybe 0 $ regIAR reg
, msACC = fromMaybe 0 $ regACC reg
, msRA = fromMaybe 0 $ regRA reg
, msSP = fromMaybe 0 $ regSP reg
, msFP = fromMaybe 0 $ regFP reg
, msMemory = mem
}
parseState :: Parser (MimaState, Map.Map T.Text MimaAddress)
parseState = do
lexeme $ pure ()
void $ many newlines
unresolvedRegisters <- parseRegisters
(labels, unresolvedRawInstructions) <- parseInstructions
registers <- resolveRegisters labels unresolvedRegisters
rawInstructions <- resolveLabels labels unresolvedRawInstructions
let mem = mapToMemory $ Map.map rawInstructionToWord rawInstructions
labelNames = Map.fromList $ map (\(k, v) -> (lName k, v)) $ Map.toList labels
pure (stateFromRegisters registers mem, labelNames)
readState :: FilePath -> T.Text -> Either WeedErrorBundle (MimaState, Map.Map T.Text MimaAddress)
readState filename input = parse parseState filename input

View file

@ -1,127 +0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module Mima.Assembler.Parser.Basic
( Parser
, withOffset
, failAt
-- * Character specifications
, isAlphabet
, isConnecting
, isWhitespace
-- * Lexme
, whitespace
, space
, lexeme
, newline
, newlines
, colon
-- * Basic data types
, mimaWord
, largeValue
, largeValue'
, smallValue
, smallValue'
-- * Stateful parsing
, StatefulParser
, runStatefulParser
) where
import Control.Monad
import Control.Monad.Trans.State
import qualified Data.Set as Set
import qualified Data.Text as T
import Data.Void
import Text.Megaparsec
import qualified Text.Megaparsec.Char as C
import qualified Text.Megaparsec.Char.Lexer as L
import Mima.Word
type Parser = Parsec Void T.Text
withOffset :: Parser a -> Parser (a, Int)
withOffset parser = (,) <$> parser <*> getOffset
failAt :: Int -> String -> Parser a
failAt offset message = do
setOffset offset
fail message
{- Character specifications -}
isOneOf :: String -> Char -> Bool
isOneOf s t =
let charSet = Set.fromList s
in t `Set.member` charSet
isAlphabet :: Char -> Bool
isAlphabet = isOneOf (['a'..'z'] ++ ['A'..'Z'])
isConnecting :: Char -> Bool
isConnecting = isOneOf "_-"
isWhitespace :: Char -> Bool
isWhitespace = isOneOf " \t"
{- Lexeme stuff -}
whitespace :: Parser Char
whitespace = label "space" $ satisfy isWhitespace
space :: Parser ()
space = L.space (void whitespace) (L.skipLineComment ";") empty
lexeme :: Parser a -> Parser a
lexeme = L.lexeme space
newline :: Parser ()
newline = void $ lexeme C.newline
newlines :: Parser ()
newlines = void (some newline) <|> eof
colon :: Parser ()
colon = void $ lexeme $ C.string ":"
{- Basic data types -}
fromHex :: (Num a) => Int -> Parser a
fromHex bitWidth = do
void $ C.string' "0x"
n <- L.hexadecimal :: Parser Integer
let upperBound = 2^bitWidth - 1
if 0 <= n && n <= upperBound
then pure $ fromIntegral n
else fail $ "value " ++ show n ++ " out of bounds " ++ show (0 :: Integer, upperBound)
fromDec :: (Num a) => Int -> Parser a
fromDec bitWidth = do
n <- L.signed mempty L.decimal :: Parser Integer
let upperBound = 2^bitWidth - 1
if (-upperBound) <= n && n <= upperBound
then pure $ fromIntegral n
else fail $ "value " ++ show n ++ " out of bounds " ++ show (-upperBound, upperBound)
mimaWord :: Parser MimaWord
mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24
largeValue :: Parser LargeValue
largeValue = lexeme largeValue'
-- | Non-lexeme version of 'largeValue'
largeValue' :: Parser LargeValue
largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20
smallValue :: Parser SmallValue
smallValue = lexeme smallValue'
-- | Non-lexeme version of 'smallValue'
smallValue' :: Parser SmallValue
smallValue' = label "16-bit number" $ fromHex 16 <|> fromDec 16
{- Stateful parsing -}
type StatefulParser s a = StateT s Parser a
runStatefulParser :: StatefulParser s a -> s -> Parser (a, s)
runStatefulParser = runStateT

View file

@ -1,98 +0,0 @@
module Mima.Assembler.Parser.Instruction
( parseInstruction
) where
import Control.Monad
import Control.Monad.Trans.Class
import Control.Monad.Trans.State
import Data.Maybe
import qualified Data.Set as Set
import Text.Megaparsec
import Mima.Assembler.Parser.Basic
import Mima.Assembler.Parser.Label
import Mima.Assembler.Parser.RawInstruction
import Mima.Word
data MyState = MyState
{ sCurrentPos :: MimaAddress
, sKnownLabels :: Set.Set MimaLabel
, sActualPos :: Maybe MimaAddress
, sLabels :: Set.Set MimaLabel
} deriving (Show)
initialState :: MimaAddress -> Set.Set MimaLabel -> MyState
initialState currentPos knownLabels = MyState
{ sCurrentPos = currentPos
, sKnownLabels = knownLabels
, sActualPos = Nothing
, sLabels = Set.empty
}
getActualPos :: MyState -> MimaAddress
getActualPos s = fromMaybe (sCurrentPos s) (sActualPos s)
alreadySeen :: MimaLabel -> MyState -> Bool
alreadySeen l s = l `Set.member` sKnownLabels s || l `Set.member` sLabels s
addLabel :: MimaLabel -> MyState -> MyState
addLabel l s = s{sLabels = Set.insert l $ sLabels s}
{- And now, the parsing -}
type SParser a = StatefulParser MyState a
parseLabel :: SParser ()
parseLabel = do
s <- get
l <- lift $ try $ mimaLabel' <* colon
void $ lift $ many newline
if alreadySeen l s
then lift $ failAtLabel l "label already defined earlier"
else modify (addLabel l)
parseAddressLabel :: SParser ()
parseAddressLabel = do
s <- get
(addr, offset) <- lift $ try $ withOffset largeValue' <* colon
void $ lift $ many newline
when (addr < sCurrentPos s) $ do
let errorMsg = "address can't be earlier than " ++ show (sCurrentPos s)
lift $ failAt offset errorMsg
case sActualPos s of
Just _ -> lift $ failAt offset "can't set an instruction's address twice"
Nothing -> put s{sActualPos = Just addr}
parseInstruction' :: SParser (RawInstruction Address)
parseInstruction' = do
void $ many (parseLabel <|> parseAddressLabel)
lift $ rawInstruction <* newlines
-- | @'parseInstruction' currentPos knownLabels@ parses an instruction and
-- its preceding label markings.
--
-- * @currentPos@ is the position at which, if no other marking is
-- specified, this instruction is located.
--
-- * @knownLabels@ are the labels which have already been set
-- elsewhere and thus cannot be set again on this instruction.
--
-- Returns @(actualPos, instruction, labels)@.
--
-- * @actualPos@ is the position at which the instruction is actually
-- located in memory. This can differ from @currentPos@ if a
-- location label is attached to this instruction. The following
-- must always hold: @actualPos >= currentPos@.
--
-- * @instruction@ is the 'RawInstruction' that was parsed.
--
-- * @labels@ are the labels attached to the parsed instruction.
parseInstruction :: MimaAddress
-> Set.Set MimaLabel
-> Parser (MimaAddress, RawInstruction Address, Set.Set MimaLabel)
parseInstruction currentPos knownLabels = do
let s = initialState currentPos knownLabels
(instruction, s') <- runStatefulParser parseInstruction' s
let actualPos = getActualPos s'
labels = sLabels s'
pure (actualPos, instruction, labels)

View file

@ -1,62 +0,0 @@
module Mima.Assembler.Parser.Label
( MimaLabel
, lName
, mimaLabel
, mimaLabel'
, failAtLabel
, resolveLabel
, Address
, address
, resolveAddress
) where
import Data.Char
import qualified Data.Map as Map
import qualified Data.Text as T
import Text.Megaparsec
import Mima.Assembler.Parser.Basic
import Mima.Word
{- Labels -}
data MimaLabel = MimaLabel { lName :: T.Text, lOffset :: Int }
deriving (Show)
instance Eq MimaLabel where
a == b = lName a == lName b
instance Ord MimaLabel where
compare a b = compare (lName a) (lName b)
mimaLabel :: Parser MimaLabel
mimaLabel = lexeme mimaLabel'
mimaLabel' :: Parser MimaLabel
mimaLabel' = label "label" $ do
firstChar <- satisfy isAlphabet
otherChars <- takeWhileP Nothing (\c -> isAlphabet c || isConnecting c || isDigit c)
offset <- getOffset
let name = T.singleton firstChar <> otherChars
pure MimaLabel{lName = name, lOffset = offset}
failAtLabel :: MimaLabel -> String -> Parser a
failAtLabel l = failAt (lOffset l)
resolveLabel :: Map.Map MimaLabel MimaAddress -> MimaLabel -> Parser MimaAddress
resolveLabel lmap l =
case lmap Map.!? l of
Just addr -> pure addr
Nothing -> failAtLabel l "could not resolve label"
{- Addresses -}
data Address = Direct LargeValue | Indirect MimaLabel
deriving (Show)
address :: Parser Address
address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel)
resolveAddress :: Map.Map MimaLabel MimaAddress -> Address -> Parser MimaAddress
resolveAddress _ (Direct addr) = pure addr
resolveAddress labels (Indirect l) = resolveLabel labels l

View file

@ -1,81 +0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module Mima.Assembler.Parser.RawInstruction
( RawInstruction(..)
, rawInstruction
, rawInstructionToWord
) where
import qualified Data.Text as T
import Text.Megaparsec
import qualified Text.Megaparsec.Char as C
import Mima.Assembler.Parser.Basic
import Mima.Assembler.Parser.Label
import Mima.Instruction
import Mima.Word
data RawInstruction addr
= RawLIT MimaWord
| RawSmallInstruction SmallOpcode addr
| RawLargeInstruction LargeOpcode SmallValue
deriving (Show)
parseByLiteral :: [(T.Text, b)] -> Parser b
parseByLiteral = foldl (<|>) empty . map (\(a, b) -> b <$ C.string' a)
smallOpcode' :: Parser SmallOpcode
smallOpcode' = parseByLiteral
[ ( "LDC", LDC)
, ( "LDV", LDV)
, ( "STV", STV)
, ( "ADD", ADD)
, ( "AND", AND)
, ( "OR", OR)
, ( "XOR", XOR)
, ( "EQL", EQL)
, ( "JMP", JMP)
, ( "JMN", JMN)
, ("LDIV", LDIV)
, ("STIV", STIV)
, ("CALL", CALL)
, ( "ADC", ADC)
]
largeOpcode' :: Parser LargeOpcode
largeOpcode' = parseByLiteral
[ ("STRS", STRS)
, ("LDRS", LDRS)
, ("STRF", STRF)
, ("LDRF", LDRF)
]
largeOptionalOpcode' :: Parser LargeOpcode
largeOptionalOpcode' = parseByLiteral
[ ("HALT", HALT)
, ( "NOT", NOT)
, ( "RAR", RAR)
, ( "RET", RET)
, ("LDRA", LDRA)
, ("STRA", STRA)
, ("LDSP", LDSP)
, ("STSP", STSP)
, ("LDFP", LDFP)
, ("STFP", STFP)
]
rawInstruction :: Parser (RawInstruction Address)
rawInstruction = label "instruction" $
(RawLIT <$> (C.string' "LIT" *> instr mimaWord))
<|> (RawSmallInstruction <$> smallOpcode' <*> instr address)
<|> (RawLargeInstruction <$> largeOpcode' <*> instr smallValue)
<|> (RawLargeInstruction <$> largeOptionalOpcode' <*> instr' smallValue)
where
-- These assume that the parser is a lexeme
instr parser = lexeme whitespace *> parser
instr' parser = try (instr parser) <|> lexeme (pure 0)
rawInstructionToWord :: RawInstruction MimaAddress -> MimaWord
rawInstructionToWord (RawLIT word) = word
rawInstructionToWord (RawSmallInstruction so lv) = instructionToWord (SmallInstruction so lv)
rawInstructionToWord (RawLargeInstruction lo sv) = instructionToWord (LargeInstruction lo sv)

View file

@ -1,58 +0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module Mima.Assembler.Parser.Register
( Registers(..)
, parseRegisters
) where
import Control.Monad
import Control.Monad.Trans.Class
import Control.Monad.Trans.State
import qualified Data.Text as T
import Text.Megaparsec
import qualified Text.Megaparsec.Char as C
import Mima.Assembler.Parser.Basic
import Mima.Assembler.Parser.Label
import Mima.Word
data Registers addr = Registers
{ regIAR :: Maybe addr
, regACC :: Maybe MimaWord
, regRA :: Maybe addr
, regSP :: Maybe addr
, regFP :: Maybe addr
} deriving (Show)
emptyRegisters :: Registers a
emptyRegisters = Registers Nothing Nothing Nothing Nothing Nothing
parseRegisters :: Parser (Registers Address)
parseRegisters = snd <$> runStatefulParser parseRegisters' emptyRegisters
parseRegisters' :: StatefulParser (Registers Address) ()
parseRegisters' = (parseARegister >> lift newlines >> parseRegisters') <|> pure ()
parseARegister :: StatefulParser (Registers Address) ()
parseARegister
= parseRegister "IAR" address regIAR (\v reg -> reg{regIAR = Just v})
<|> parseRegister "ACC" mimaWord regACC (\v reg -> reg{regACC = Just v})
<|> parseRegister "RA" address regRA (\v reg -> reg{regRA = Just v})
<|> parseRegister "SP" address regSP (\v reg -> reg{regSP = Just v})
<|> parseRegister "FP" address regFP (\v reg -> reg{regFP = Just v})
<?> "register initialisation"
parseRegister :: T.Text
-> Parser x
-> (Registers addr -> Maybe x)
-> (x -> Registers addr -> Registers addr)
-> StatefulParser (Registers addr) ()
parseRegister name parser readReg writeReg = do
void $ lift $ lexeme $ C.string' name
void $ lift $ lexeme $ C.string' "="
reg <- get
case readReg reg of
Just _ -> fail $ "can't specify register " ++ T.unpack name ++ " twice"
Nothing -> do
x <- lift parser
modify (writeReg x)