From dc990a2e7a67b52e40654dd822446bde17eb964c Mon Sep 17 00:00:00 2001 From: Joscha Date: Sat, 9 Nov 2019 23:37:47 +0000 Subject: [PATCH] Parse instructions with labels --- src/Mima/Assembler/Parser.hs | 86 +++++++++++ src/Mima/Assembler/Parser/Basic.hs | 9 +- src/Mima/Assembler/Parser/Label.hs | 7 +- src/Mima/Assembler/Parser/RawInstruction.hs | 156 ++++++++------------ 4 files changed, 157 insertions(+), 101 deletions(-) create mode 100644 src/Mima/Assembler/Parser.hs diff --git a/src/Mima/Assembler/Parser.hs b/src/Mima/Assembler/Parser.hs new file mode 100644 index 0000000..421d5c3 --- /dev/null +++ b/src/Mima/Assembler/Parser.hs @@ -0,0 +1,86 @@ +{-# LANGUAGE TupleSections #-} + +module Mima.Assembler.Parser + ( parseState + ) where + +import Control.Monad +import Control.Monad.Trans.Class +import Control.Monad.Trans.State +import qualified Data.Map as Map +import qualified Data.Set as Set +import Text.Megaparsec + +import Mima.Assembler.Parser.Basic +import Mima.Assembler.Parser.Instruction +import Mima.Assembler.Parser.Label +import Mima.Assembler.Parser.RawInstruction +import Mima.State +import Mima.Word + +data MyState = MyState + { sCurrentPos :: MimaAddress + , sLabels :: Map.Map MimaLabel MimaAddress + , sInstructions :: Map.Map MimaAddress (RawInstruction Address) + } deriving (Show) + +initialState :: MyState +initialState = MyState 0 Map.empty Map.empty + +type SParser a = StatefulParser MyState a + +incrementCurrentPos :: SParser () +incrementCurrentPos = do + s <- get + when (sCurrentPos s == maxBound) empty + put s{sCurrentPos = succ $ sCurrentPos s} + +parseInstructions' :: SParser () +parseInstructions' = sepBy parseInstruction' incrementCurrentPos >> lift (eof <|> fail atMaxAddress) + where + atMaxAddress = "already at maximum address (" ++ show (maxBound :: MimaAddress) + ++ ") - can't go any further" + + parseInstruction' :: SParser () + parseInstruction' = do + s <- get + let currentPos = sCurrentPos s + knownLabels = Map.keysSet $ sLabels s + (actualPos, instruction, labels) <- lift $ parseInstruction currentPos knownLabels + let newLabels = Map.fromList [(l, actualPos) | l <- Set.toList labels] + put s { sCurrentPos = actualPos + , sLabels = Map.union newLabels $ sLabels s + , sInstructions = Map.insert actualPos instruction $ sInstructions s + } + +parseInstructions :: Parser (Map.Map MimaLabel MimaAddress, Map.Map MimaAddress (RawInstruction Address)) +parseInstructions = do + (_, s) <- runStatefulParser parseInstructions' initialState + pure (sLabels s, sInstructions s) + +resolveRawInstruction :: Map.Map MimaLabel MimaAddress + -> RawInstruction Address + -> Parser (RawInstruction MimaAddress) +resolveRawInstruction _ (RawLIT word) = pure $ RawLIT word +resolveRawInstruction _ (RawLargeInstruction lo sv) = pure $ RawLargeInstruction lo sv +resolveRawInstruction labels (RawSmallInstruction so lv) = do + addr <- resolveAddress labels lv + pure $ RawSmallInstruction so addr + +resolveLabels :: Map.Map MimaLabel MimaAddress + -> Map.Map MimaAddress (RawInstruction Address) + -> Parser (Map.Map MimaAddress (RawInstruction MimaAddress)) +resolveLabels labels rawLabeledInstructions = do + let labeledInstrList = Map.toList rawLabeledInstructions + resolve = resolveRawInstruction labels + instrList <- forM labeledInstrList $ \(addr, instr) -> (addr,) <$> resolve instr + let rawInstructions = Map.fromList instrList + pure rawInstructions + +parseState :: Parser MimaState +parseState = do + space + (labels, rawLabeledInstructions) <- parseInstructions + rawInstructions <- resolveLabels labels rawLabeledInstructions + let mem = mapToMemory $ Map.map rawInstructionToWord rawInstructions + pure $ basicState mem diff --git a/src/Mima/Assembler/Parser/Basic.hs b/src/Mima/Assembler/Parser/Basic.hs index 83e666b..6760e6d 100644 --- a/src/Mima/Assembler/Parser/Basic.hs +++ b/src/Mima/Assembler/Parser/Basic.hs @@ -20,6 +20,7 @@ module Mima.Assembler.Parser.Basic , largeValue , largeValue' , smallValue + , smallValue' -- * Stateful parsing , StatefulParser , runStatefulParser @@ -105,14 +106,18 @@ mimaWord :: Parser MimaWord mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24 largeValue :: Parser LargeValue -largeValue = lexeme $ largeValue' +largeValue = lexeme largeValue' -- | Non-lexeme version of 'largeValue' largeValue' :: Parser LargeValue largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20 smallValue :: Parser SmallValue -smallValue = lexeme $ label "16-bit number" $ fromHex 16 <|> fromDec 16 +smallValue = lexeme smallValue' + +-- | Non-lexeme version of 'smallValue' +smallValue' :: Parser SmallValue +smallValue' = label "16-bit number" $ fromHex 16 <|> fromDec 16 {- Stateful parsing -} diff --git a/src/Mima/Assembler/Parser/Label.hs b/src/Mima/Assembler/Parser/Label.hs index 0594648..11b85d0 100644 --- a/src/Mima/Assembler/Parser/Label.hs +++ b/src/Mima/Assembler/Parser/Label.hs @@ -6,6 +6,7 @@ module Mima.Assembler.Parser.Label , resolveLabel , Address , address + , resolveAddress ) where import qualified Data.Map as Map @@ -50,4 +51,8 @@ data Address = Direct LargeValue | Indirect MimaLabel deriving (Show) address :: Parser Address -address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel') +address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel) + +resolveAddress :: Map.Map MimaLabel MimaAddress -> Address -> Parser MimaAddress +resolveAddress _ (Direct addr) = pure addr +resolveAddress labels (Indirect l) = resolveLabel labels l diff --git a/src/Mima/Assembler/Parser/RawInstruction.hs b/src/Mima/Assembler/Parser/RawInstruction.hs index d302444..27cb161 100644 --- a/src/Mima/Assembler/Parser/RawInstruction.hs +++ b/src/Mima/Assembler/Parser/RawInstruction.hs @@ -1,117 +1,77 @@ {-# LANGUAGE OverloadedStrings #-} module Mima.Assembler.Parser.RawInstruction - ( RawInstruction + ( RawInstruction(..) , rawInstruction + , rawInstructionToWord ) where -import Data.Bits +import qualified Data.Text as T import Text.Megaparsec import qualified Text.Megaparsec.Char as C import Mima.Assembler.Parser.Basic import Mima.Assembler.Parser.Label +import Mima.Instruction import Mima.Word data RawInstruction addr - = RawLIT MimaWord - | RawLDC addr - | RawLDV addr - | RawSTV addr - | RawADD addr - | RawAND addr - | RawOR addr - | RawXOR addr - | RawEQL addr - | RawJMP addr - | RawJMN addr - | RawLDIV addr - | RawSTIV addr - | RawCALL addr - | RawLDVR addr - | RawSTVR addr - | RawHALT SmallValue - | RawNOT SmallValue - | RawRAR SmallValue - | RawRET SmallValue - | RawLDRA SmallValue - | RawSTRA SmallValue - | RawLDSP SmallValue - | RawSTSP SmallValue - | RawLDFP SmallValue - | RawSTFP SmallValue - | RawADC SmallValue + = RawLIT MimaWord + | RawSmallInstruction SmallOpcode addr + | RawLargeInstruction LargeOpcode SmallValue deriving (Show) +parseByLiteral :: [(T.Text, b)] -> Parser b +parseByLiteral = foldl (<|>) empty . map (\(a, b) -> b <$ C.string' a) + +smallOpcode' :: Parser SmallOpcode +smallOpcode' = parseByLiteral + [ ( "LDC", LDC) + , ( "LDV", LDV) + , ( "STV", STV) + , ( "ADD", ADD) + , ( "AND", AND) + , ( "OR", OR) + , ( "XOR", XOR) + , ( "EQL", EQL) + , ( "JMP", JMP) + , ( "JMN", JMN) + , ("LDIV", LDIV) + , ("STIV", STIV) + , ("CALL", CALL) + , ("LDVR", LDVR) + , ("STVR", STVR) + ] + +largeOpcode' :: Parser LargeOpcode +largeOpcode' = parseByLiteral [( "ADC", ADC)] + +largeOptionalOpcode' :: Parser LargeOpcode +largeOptionalOpcode' = parseByLiteral + [ ("HALT", HALT) + , ( "NOT", NOT) + , ( "RAR", RAR) + , ( "RET", RET) + , ("LDRA", LDRA) + , ("STRA", STRA) + , ("LDSP", LDSP) + , ("STSP", STSP) + , ("LDFP", LDFP) + , ("STFP", STFP) + ] + rawInstruction :: Parser (RawInstruction Address) -rawInstruction - = label "instruction" - $ RawLIT <$> instr "LIT" mimaWord - <|> RawLDC <$> instr "LDC" address - <|> RawLDV <$> instr "LDV" address - <|> RawSTV <$> instr "STV" address - <|> RawADD <$> instr "ADD" address - <|> RawAND <$> instr "AND" address - <|> RawOR <$> instr "OR" address - <|> RawXOR <$> instr "XOR" address - <|> RawEQL <$> instr "EQL" address - <|> RawJMP <$> instr "JMP" address - <|> RawJMN <$> instr "JMN" address - <|> RawLDIV <$> instr "LDIV" address - <|> RawSTIV <$> instr "STIV" address - <|> RawCALL <$> instr "CALL" address - <|> RawLDVR <$> instr "LDVR" address - <|> RawSTVR <$> instr "STVR" address - <|> RawHALT <$> instr' "HALT" smallValue - <|> RawNOT <$> instr' "NOT" smallValue - <|> RawRAR <$> instr' "RAR" smallValue - <|> RawRET <$> instr' "RET" smallValue - <|> RawLDRA <$> instr' "LDRA" smallValue - <|> RawSTRA <$> instr' "STRA" smallValue - <|> RawLDSP <$> instr' "LDSP" smallValue - <|> RawSTSP <$> instr' "STSP" smallValue - <|> RawLDFP <$> instr' "LDFP" smallValue - <|> RawSTFP <$> instr' "STFP" smallValue - <|> RawADC <$> instr "ADC" smallValue +rawInstruction = label "instruction" $ + (RawLIT <$> (C.string' "LIT" *> instr mimaWord)) + <|> (RawSmallInstruction <$> smallOpcode' <*> instr address) + <|> (RawLargeInstruction <$> largeOpcode' <*> instr smallValue) + <|> (RawLargeInstruction <$> largeOptionalOpcode' <*> instr' smallValue) where - instr name value = lexeme (C.string' name >> whitespace) >> space *> value - instr' name value = lexeme (C.string' name >> whitespace) *> (value <|> pure zeroBits) + -- These assume that the parser is a lexeme + instr parser = lexeme whitespace *> parser + instr' parser = try (instr parser) <|> pure 0 -{- -data InstrState = InstrState - { isCurPos :: MimaAddress - , isPrevLabels :: Set.Set Label - , isNewPos :: Maybe MimaAddress - , isLabels :: Set.Set Label - } deriving (Show) - -instrState :: MimaAddress -> Set.Set Label -> InstrState -instrState curPos prevLabels = InstrState curPos prevLabels Nothing Set.empty - -parseLabel :: StatefulParser InstrState () -parseLabel = do - name <- lift labelName - is <- get - let prevLabels = isPrevLabels is - labels = isLabels is - if name `Set.member` prevLabels || name `Set.member` labels - then fail "label can't be specified more than once" - else do - void $ lift $ lexeme $ C.string ":" - put is{isLabels = Set.insert name labels} - -parseLocationLabel :: StatefulParser InstrState () -parseLocationLabel = do - newPos <- lift largeValue' - is <- get - case isNewPos is of - Just _ -> fail "cannot specify two positions for one instruction" - Nothing -> if newPos < isCurPos is - then fail "cannot set a position to an earlier position" - else do - void $ lift $ lexeme $ C.string ":" - put is{isNewPos = Just newPos} - -parseInstruction :: StatefulParser InstrState RawInstruction -parseInstruction = try parseLabel <|> parseLocationLabel --} +rawInstructionToWord :: RawInstruction MimaAddress -> MimaWord +rawInstructionToWord (RawLIT word) = word +rawInstructionToWord (RawSmallInstruction so lv) = instructionToWord (SmallInstruction so lv) +rawInstructionToWord (RawLargeInstruction lo sv) = instructionToWord (LargeInstruction lo sv)