From fa4cd218ab6bc1544692cde3387bda2fe71192db Mon Sep 17 00:00:00 2001 From: Joscha Date: Thu, 21 Nov 2019 18:27:27 +0000 Subject: [PATCH] Switch assembler to new parser --- app/MimaAsm/Main.hs | 4 +- src/Mima/Assembler/Parser.hs | 119 ------------------ src/Mima/Assembler/Parser/Basic.hs | 127 -------------------- src/Mima/Assembler/Parser/Instruction.hs | 98 --------------- src/Mima/Assembler/Parser/Label.hs | 62 ---------- src/Mima/Assembler/Parser/RawInstruction.hs | 81 ------------- src/Mima/Assembler/Parser/Register.hs | 58 --------- 7 files changed, 2 insertions(+), 547 deletions(-) delete mode 100644 src/Mima/Assembler/Parser.hs delete mode 100644 src/Mima/Assembler/Parser/Basic.hs delete mode 100644 src/Mima/Assembler/Parser/Instruction.hs delete mode 100644 src/Mima/Assembler/Parser/Label.hs delete mode 100644 src/Mima/Assembler/Parser/RawInstruction.hs delete mode 100644 src/Mima/Assembler/Parser/Register.hs diff --git a/app/MimaAsm/Main.hs b/app/MimaAsm/Main.hs index 5fddc95..3f5e81d 100644 --- a/app/MimaAsm/Main.hs +++ b/app/MimaAsm/Main.hs @@ -3,9 +3,9 @@ module Main where import Control.Monad.Trans.Class import Options.Applicative -import Mima.Assembler.Parser import Mima.IO import Mima.Load +import Mima.Parse.Assembly data Settings = Settings { infile :: String @@ -33,7 +33,7 @@ main = doRun_ $ do settings <- lift $ execParser opts lift $ putStrLn $ "Loading assembly file at " ++ infile settings - (state, _) <- loadFile readState (infile settings) + (state, _, _) <- loadFile readAssembly (infile settings) lift $ putStrLn "Parsing successful" lift $ putStrLn $ "Writing result to " ++ outfile settings diff --git a/src/Mima/Assembler/Parser.hs b/src/Mima/Assembler/Parser.hs deleted file mode 100644 index bff4fae..0000000 --- a/src/Mima/Assembler/Parser.hs +++ /dev/null @@ -1,119 +0,0 @@ -{-# LANGUAGE TupleSections #-} - -module Mima.Assembler.Parser - ( parseState - , readState - ) where - -import Control.Monad -import Control.Monad.Trans.Class -import Control.Monad.Trans.State -import qualified Data.Map as Map -import Data.Maybe -import qualified Data.Set as Set -import qualified Data.Text as T -import Text.Megaparsec - -import Mima.Assembler.Parser.Basic -import Mima.Assembler.Parser.Instruction -import Mima.Assembler.Parser.Label -import Mima.Assembler.Parser.RawInstruction -import Mima.Assembler.Parser.Register -import Mima.Parse.Weed -import Mima.State -import Mima.Word - -data MyState = MyState - { sCurrentPos :: MimaAddress - , sLabels :: Map.Map MimaLabel MimaAddress - , sInstructions :: Map.Map MimaAddress (RawInstruction Address) - } deriving (Show) - -initialState :: MyState -initialState = MyState 0 Map.empty Map.empty - -type SParser a = StatefulParser MyState a - -incrementCurrentPos :: SParser () -incrementCurrentPos = do - s <- get - when (sCurrentPos s == maxBound) empty - put s{sCurrentPos = succ $ sCurrentPos s} - -parseInstructions' :: SParser () -parseInstructions' = sepBy parseInstruction' incrementCurrentPos >> lift eof - where - parseInstruction' :: SParser () - parseInstruction' = do - s <- get - let currentPos = sCurrentPos s - knownLabels = Map.keysSet $ sLabels s - (actualPos, instruction, labels) <- lift $ parseInstruction currentPos knownLabels - let newLabels = Map.fromList [(l, actualPos) | l <- Set.toList labels] - put s { sCurrentPos = actualPos - , sLabels = Map.union newLabels $ sLabels s - , sInstructions = Map.insert actualPos instruction $ sInstructions s - } - -parseInstructions :: Parser (Map.Map MimaLabel MimaAddress, Map.Map MimaAddress (RawInstruction Address)) -parseInstructions = do - (_, s) <- runStatefulParser parseInstructions' initialState - pure (sLabels s, sInstructions s) - -resolveRegisters :: Map.Map MimaLabel MimaAddress - -> Registers Address - -> Parser (Registers MimaAddress) -resolveRegisters labels reg = do - iar <- resolveMaybeAddress $ regIAR reg - ra <- resolveMaybeAddress $ regRA reg - sp <- resolveMaybeAddress $ regSP reg - fp <- resolveMaybeAddress $ regFP reg - pure reg{regIAR = iar, regRA = ra, regSP = sp, regFP = fp} - where - resolveMaybeAddress :: Maybe Address -> Parser (Maybe MimaAddress) - resolveMaybeAddress (Just addr) = Just <$> resolveAddress labels addr - resolveMaybeAddress Nothing = pure Nothing - -resolveRawInstruction :: Map.Map MimaLabel MimaAddress - -> RawInstruction Address - -> Parser (RawInstruction MimaAddress) -resolveRawInstruction _ (RawLIT word) = pure $ RawLIT word -resolveRawInstruction _ (RawLargeInstruction lo sv) = pure $ RawLargeInstruction lo sv -resolveRawInstruction labels (RawSmallInstruction so lv) = do - addr <- resolveAddress labels lv - pure $ RawSmallInstruction so addr - -resolveLabels :: Map.Map MimaLabel MimaAddress - -> Map.Map MimaAddress (RawInstruction Address) - -> Parser (Map.Map MimaAddress (RawInstruction MimaAddress)) -resolveLabels labels rawLabeledInstructions = do - let labeledInstrList = Map.toList rawLabeledInstructions - resolve = resolveRawInstruction labels - instrList <- forM labeledInstrList $ \(addr, instr) -> (addr,) <$> resolve instr - let rawInstructions = Map.fromList instrList - pure rawInstructions - -stateFromRegisters :: Registers MimaAddress -> MimaMemory -> MimaState -stateFromRegisters reg mem = - MimaState { msIAR = fromMaybe 0 $ regIAR reg - , msACC = fromMaybe 0 $ regACC reg - , msRA = fromMaybe 0 $ regRA reg - , msSP = fromMaybe 0 $ regSP reg - , msFP = fromMaybe 0 $ regFP reg - , msMemory = mem - } - -parseState :: Parser (MimaState, Map.Map T.Text MimaAddress) -parseState = do - lexeme $ pure () - void $ many newlines - unresolvedRegisters <- parseRegisters - (labels, unresolvedRawInstructions) <- parseInstructions - registers <- resolveRegisters labels unresolvedRegisters - rawInstructions <- resolveLabels labels unresolvedRawInstructions - let mem = mapToMemory $ Map.map rawInstructionToWord rawInstructions - labelNames = Map.fromList $ map (\(k, v) -> (lName k, v)) $ Map.toList labels - pure (stateFromRegisters registers mem, labelNames) - -readState :: FilePath -> T.Text -> Either WeedErrorBundle (MimaState, Map.Map T.Text MimaAddress) -readState filename input = parse parseState filename input diff --git a/src/Mima/Assembler/Parser/Basic.hs b/src/Mima/Assembler/Parser/Basic.hs deleted file mode 100644 index 6760e6d..0000000 --- a/src/Mima/Assembler/Parser/Basic.hs +++ /dev/null @@ -1,127 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} - -module Mima.Assembler.Parser.Basic - ( Parser - , withOffset - , failAt - -- * Character specifications - , isAlphabet - , isConnecting - , isWhitespace - -- * Lexme - , whitespace - , space - , lexeme - , newline - , newlines - , colon - -- * Basic data types - , mimaWord - , largeValue - , largeValue' - , smallValue - , smallValue' - -- * Stateful parsing - , StatefulParser - , runStatefulParser - ) where - -import Control.Monad -import Control.Monad.Trans.State -import qualified Data.Set as Set -import qualified Data.Text as T -import Data.Void -import Text.Megaparsec -import qualified Text.Megaparsec.Char as C -import qualified Text.Megaparsec.Char.Lexer as L - -import Mima.Word - -type Parser = Parsec Void T.Text - -withOffset :: Parser a -> Parser (a, Int) -withOffset parser = (,) <$> parser <*> getOffset - -failAt :: Int -> String -> Parser a -failAt offset message = do - setOffset offset - fail message - -{- Character specifications -} - -isOneOf :: String -> Char -> Bool -isOneOf s t = - let charSet = Set.fromList s - in t `Set.member` charSet - -isAlphabet :: Char -> Bool -isAlphabet = isOneOf (['a'..'z'] ++ ['A'..'Z']) - -isConnecting :: Char -> Bool -isConnecting = isOneOf "_-" - -isWhitespace :: Char -> Bool -isWhitespace = isOneOf " \t" - -{- Lexeme stuff -} - -whitespace :: Parser Char -whitespace = label "space" $ satisfy isWhitespace - -space :: Parser () -space = L.space (void whitespace) (L.skipLineComment ";") empty - -lexeme :: Parser a -> Parser a -lexeme = L.lexeme space - -newline :: Parser () -newline = void $ lexeme C.newline - -newlines :: Parser () -newlines = void (some newline) <|> eof - -colon :: Parser () -colon = void $ lexeme $ C.string ":" - -{- Basic data types -} - -fromHex :: (Num a) => Int -> Parser a -fromHex bitWidth = do - void $ C.string' "0x" - n <- L.hexadecimal :: Parser Integer - let upperBound = 2^bitWidth - 1 - if 0 <= n && n <= upperBound - then pure $ fromIntegral n - else fail $ "value " ++ show n ++ " out of bounds " ++ show (0 :: Integer, upperBound) - -fromDec :: (Num a) => Int -> Parser a -fromDec bitWidth = do - n <- L.signed mempty L.decimal :: Parser Integer - let upperBound = 2^bitWidth - 1 - if (-upperBound) <= n && n <= upperBound - then pure $ fromIntegral n - else fail $ "value " ++ show n ++ " out of bounds " ++ show (-upperBound, upperBound) - -mimaWord :: Parser MimaWord -mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24 - -largeValue :: Parser LargeValue -largeValue = lexeme largeValue' - --- | Non-lexeme version of 'largeValue' -largeValue' :: Parser LargeValue -largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20 - -smallValue :: Parser SmallValue -smallValue = lexeme smallValue' - --- | Non-lexeme version of 'smallValue' -smallValue' :: Parser SmallValue -smallValue' = label "16-bit number" $ fromHex 16 <|> fromDec 16 - -{- Stateful parsing -} - -type StatefulParser s a = StateT s Parser a - -runStatefulParser :: StatefulParser s a -> s -> Parser (a, s) -runStatefulParser = runStateT diff --git a/src/Mima/Assembler/Parser/Instruction.hs b/src/Mima/Assembler/Parser/Instruction.hs deleted file mode 100644 index b9d62ec..0000000 --- a/src/Mima/Assembler/Parser/Instruction.hs +++ /dev/null @@ -1,98 +0,0 @@ -module Mima.Assembler.Parser.Instruction - ( parseInstruction - ) where - -import Control.Monad -import Control.Monad.Trans.Class -import Control.Monad.Trans.State -import Data.Maybe -import qualified Data.Set as Set -import Text.Megaparsec - -import Mima.Assembler.Parser.Basic -import Mima.Assembler.Parser.Label -import Mima.Assembler.Parser.RawInstruction -import Mima.Word - -data MyState = MyState - { sCurrentPos :: MimaAddress - , sKnownLabels :: Set.Set MimaLabel - , sActualPos :: Maybe MimaAddress - , sLabels :: Set.Set MimaLabel - } deriving (Show) - -initialState :: MimaAddress -> Set.Set MimaLabel -> MyState -initialState currentPos knownLabels = MyState - { sCurrentPos = currentPos - , sKnownLabels = knownLabels - , sActualPos = Nothing - , sLabels = Set.empty - } - -getActualPos :: MyState -> MimaAddress -getActualPos s = fromMaybe (sCurrentPos s) (sActualPos s) - -alreadySeen :: MimaLabel -> MyState -> Bool -alreadySeen l s = l `Set.member` sKnownLabels s || l `Set.member` sLabels s - -addLabel :: MimaLabel -> MyState -> MyState -addLabel l s = s{sLabels = Set.insert l $ sLabels s} - -{- And now, the parsing -} - -type SParser a = StatefulParser MyState a - -parseLabel :: SParser () -parseLabel = do - s <- get - l <- lift $ try $ mimaLabel' <* colon - void $ lift $ many newline - if alreadySeen l s - then lift $ failAtLabel l "label already defined earlier" - else modify (addLabel l) - -parseAddressLabel :: SParser () -parseAddressLabel = do - s <- get - (addr, offset) <- lift $ try $ withOffset largeValue' <* colon - void $ lift $ many newline - when (addr < sCurrentPos s) $ do - let errorMsg = "address can't be earlier than " ++ show (sCurrentPos s) - lift $ failAt offset errorMsg - case sActualPos s of - Just _ -> lift $ failAt offset "can't set an instruction's address twice" - Nothing -> put s{sActualPos = Just addr} - -parseInstruction' :: SParser (RawInstruction Address) -parseInstruction' = do - void $ many (parseLabel <|> parseAddressLabel) - lift $ rawInstruction <* newlines - --- | @'parseInstruction' currentPos knownLabels@ parses an instruction and --- its preceding label markings. --- --- * @currentPos@ is the position at which, if no other marking is --- specified, this instruction is located. --- --- * @knownLabels@ are the labels which have already been set --- elsewhere and thus cannot be set again on this instruction. --- --- Returns @(actualPos, instruction, labels)@. --- --- * @actualPos@ is the position at which the instruction is actually --- located in memory. This can differ from @currentPos@ if a --- location label is attached to this instruction. The following --- must always hold: @actualPos >= currentPos@. --- --- * @instruction@ is the 'RawInstruction' that was parsed. --- --- * @labels@ are the labels attached to the parsed instruction. -parseInstruction :: MimaAddress - -> Set.Set MimaLabel - -> Parser (MimaAddress, RawInstruction Address, Set.Set MimaLabel) -parseInstruction currentPos knownLabels = do - let s = initialState currentPos knownLabels - (instruction, s') <- runStatefulParser parseInstruction' s - let actualPos = getActualPos s' - labels = sLabels s' - pure (actualPos, instruction, labels) diff --git a/src/Mima/Assembler/Parser/Label.hs b/src/Mima/Assembler/Parser/Label.hs deleted file mode 100644 index 2443141..0000000 --- a/src/Mima/Assembler/Parser/Label.hs +++ /dev/null @@ -1,62 +0,0 @@ -module Mima.Assembler.Parser.Label - ( MimaLabel - , lName - , mimaLabel - , mimaLabel' - , failAtLabel - , resolveLabel - , Address - , address - , resolveAddress - ) where - -import Data.Char -import qualified Data.Map as Map -import qualified Data.Text as T -import Text.Megaparsec - -import Mima.Assembler.Parser.Basic -import Mima.Word - -{- Labels -} - -data MimaLabel = MimaLabel { lName :: T.Text, lOffset :: Int } - deriving (Show) - -instance Eq MimaLabel where - a == b = lName a == lName b - -instance Ord MimaLabel where - compare a b = compare (lName a) (lName b) - -mimaLabel :: Parser MimaLabel -mimaLabel = lexeme mimaLabel' - -mimaLabel' :: Parser MimaLabel -mimaLabel' = label "label" $ do - firstChar <- satisfy isAlphabet - otherChars <- takeWhileP Nothing (\c -> isAlphabet c || isConnecting c || isDigit c) - offset <- getOffset - let name = T.singleton firstChar <> otherChars - pure MimaLabel{lName = name, lOffset = offset} - -failAtLabel :: MimaLabel -> String -> Parser a -failAtLabel l = failAt (lOffset l) - -resolveLabel :: Map.Map MimaLabel MimaAddress -> MimaLabel -> Parser MimaAddress -resolveLabel lmap l = - case lmap Map.!? l of - Just addr -> pure addr - Nothing -> failAtLabel l "could not resolve label" - -{- Addresses -} - -data Address = Direct LargeValue | Indirect MimaLabel - deriving (Show) - -address :: Parser Address -address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel) - -resolveAddress :: Map.Map MimaLabel MimaAddress -> Address -> Parser MimaAddress -resolveAddress _ (Direct addr) = pure addr -resolveAddress labels (Indirect l) = resolveLabel labels l diff --git a/src/Mima/Assembler/Parser/RawInstruction.hs b/src/Mima/Assembler/Parser/RawInstruction.hs deleted file mode 100644 index c4af572..0000000 --- a/src/Mima/Assembler/Parser/RawInstruction.hs +++ /dev/null @@ -1,81 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} - -module Mima.Assembler.Parser.RawInstruction - ( RawInstruction(..) - , rawInstruction - , rawInstructionToWord - ) where - -import qualified Data.Text as T -import Text.Megaparsec -import qualified Text.Megaparsec.Char as C - -import Mima.Assembler.Parser.Basic -import Mima.Assembler.Parser.Label -import Mima.Instruction -import Mima.Word - -data RawInstruction addr - = RawLIT MimaWord - | RawSmallInstruction SmallOpcode addr - | RawLargeInstruction LargeOpcode SmallValue - deriving (Show) - -parseByLiteral :: [(T.Text, b)] -> Parser b -parseByLiteral = foldl (<|>) empty . map (\(a, b) -> b <$ C.string' a) - -smallOpcode' :: Parser SmallOpcode -smallOpcode' = parseByLiteral - [ ( "LDC", LDC) - , ( "LDV", LDV) - , ( "STV", STV) - , ( "ADD", ADD) - , ( "AND", AND) - , ( "OR", OR) - , ( "XOR", XOR) - , ( "EQL", EQL) - , ( "JMP", JMP) - , ( "JMN", JMN) - , ("LDIV", LDIV) - , ("STIV", STIV) - , ("CALL", CALL) - , ( "ADC", ADC) - ] - -largeOpcode' :: Parser LargeOpcode -largeOpcode' = parseByLiteral - [ ("STRS", STRS) - , ("LDRS", LDRS) - , ("STRF", STRF) - , ("LDRF", LDRF) - ] - -largeOptionalOpcode' :: Parser LargeOpcode -largeOptionalOpcode' = parseByLiteral - [ ("HALT", HALT) - , ( "NOT", NOT) - , ( "RAR", RAR) - , ( "RET", RET) - , ("LDRA", LDRA) - , ("STRA", STRA) - , ("LDSP", LDSP) - , ("STSP", STSP) - , ("LDFP", LDFP) - , ("STFP", STFP) - ] - -rawInstruction :: Parser (RawInstruction Address) -rawInstruction = label "instruction" $ - (RawLIT <$> (C.string' "LIT" *> instr mimaWord)) - <|> (RawSmallInstruction <$> smallOpcode' <*> instr address) - <|> (RawLargeInstruction <$> largeOpcode' <*> instr smallValue) - <|> (RawLargeInstruction <$> largeOptionalOpcode' <*> instr' smallValue) - where - -- These assume that the parser is a lexeme - instr parser = lexeme whitespace *> parser - instr' parser = try (instr parser) <|> lexeme (pure 0) - -rawInstructionToWord :: RawInstruction MimaAddress -> MimaWord -rawInstructionToWord (RawLIT word) = word -rawInstructionToWord (RawSmallInstruction so lv) = instructionToWord (SmallInstruction so lv) -rawInstructionToWord (RawLargeInstruction lo sv) = instructionToWord (LargeInstruction lo sv) diff --git a/src/Mima/Assembler/Parser/Register.hs b/src/Mima/Assembler/Parser/Register.hs deleted file mode 100644 index 14f0a02..0000000 --- a/src/Mima/Assembler/Parser/Register.hs +++ /dev/null @@ -1,58 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} - -module Mima.Assembler.Parser.Register - ( Registers(..) - , parseRegisters - ) where - -import Control.Monad -import Control.Monad.Trans.Class -import Control.Monad.Trans.State -import qualified Data.Text as T -import Text.Megaparsec -import qualified Text.Megaparsec.Char as C - -import Mima.Assembler.Parser.Basic -import Mima.Assembler.Parser.Label -import Mima.Word - -data Registers addr = Registers - { regIAR :: Maybe addr - , regACC :: Maybe MimaWord - , regRA :: Maybe addr - , regSP :: Maybe addr - , regFP :: Maybe addr - } deriving (Show) - -emptyRegisters :: Registers a -emptyRegisters = Registers Nothing Nothing Nothing Nothing Nothing - -parseRegisters :: Parser (Registers Address) -parseRegisters = snd <$> runStatefulParser parseRegisters' emptyRegisters - -parseRegisters' :: StatefulParser (Registers Address) () -parseRegisters' = (parseARegister >> lift newlines >> parseRegisters') <|> pure () - -parseARegister :: StatefulParser (Registers Address) () -parseARegister - = parseRegister "IAR" address regIAR (\v reg -> reg{regIAR = Just v}) - <|> parseRegister "ACC" mimaWord regACC (\v reg -> reg{regACC = Just v}) - <|> parseRegister "RA" address regRA (\v reg -> reg{regRA = Just v}) - <|> parseRegister "SP" address regSP (\v reg -> reg{regSP = Just v}) - <|> parseRegister "FP" address regFP (\v reg -> reg{regFP = Just v}) - "register initialisation" - -parseRegister :: T.Text - -> Parser x - -> (Registers addr -> Maybe x) - -> (x -> Registers addr -> Registers addr) - -> StatefulParser (Registers addr) () -parseRegister name parser readReg writeReg = do - void $ lift $ lexeme $ C.string' name - void $ lift $ lexeme $ C.string' "=" - reg <- get - case readReg reg of - Just _ -> fail $ "can't specify register " ++ T.unpack name ++ " twice" - Nothing -> do - x <- lift parser - modify (writeReg x)