From 803c826395c63147f457bb54d7271ddbf99c9eae Mon Sep 17 00:00:00 2001 From: Joscha Date: Sat, 9 Nov 2019 19:24:50 +0000 Subject: [PATCH] Parse instructions with labels --- package.yaml | 2 + src/Mima/Assembler/Parser/Basic.hs | 122 ++++++++++++++++++++ src/Mima/Assembler/Parser/Instruction.hs | 98 ++++++++++++++++ src/Mima/Assembler/Parser/Label.hs | 53 +++++++++ src/Mima/Assembler/Parser/RawInstruction.hs | 117 +++++++++++++++++++ 5 files changed, 392 insertions(+) create mode 100644 src/Mima/Assembler/Parser/Basic.hs create mode 100644 src/Mima/Assembler/Parser/Instruction.hs create mode 100644 src/Mima/Assembler/Parser/Label.hs create mode 100644 src/Mima/Assembler/Parser/RawInstruction.hs diff --git a/package.yaml b/package.yaml index c5a5395..b8cecfe 100644 --- a/package.yaml +++ b/package.yaml @@ -24,8 +24,10 @@ dependencies: - binary - bytestring - containers +- megaparsec - optparse-applicative - text +- transformers - OddWord >= 1.0 && < 1.1 library: diff --git a/src/Mima/Assembler/Parser/Basic.hs b/src/Mima/Assembler/Parser/Basic.hs new file mode 100644 index 0000000..83e666b --- /dev/null +++ b/src/Mima/Assembler/Parser/Basic.hs @@ -0,0 +1,122 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Mima.Assembler.Parser.Basic + ( Parser + , withOffset + , failAt + -- * Character specifications + , isAlphabet + , isConnecting + , isWhitespace + -- * Lexme + , whitespace + , space + , lexeme + , newline + , newlines + , colon + -- * Basic data types + , mimaWord + , largeValue + , largeValue' + , smallValue + -- * Stateful parsing + , StatefulParser + , runStatefulParser + ) where + +import Control.Monad +import Control.Monad.Trans.State +import qualified Data.Set as Set +import qualified Data.Text as T +import Data.Void +import Text.Megaparsec +import qualified Text.Megaparsec.Char as C +import qualified Text.Megaparsec.Char.Lexer as L + +import Mima.Word + +type Parser = Parsec Void T.Text + +withOffset :: Parser a -> Parser (a, Int) +withOffset parser = (,) <$> parser <*> getOffset + +failAt :: Int -> String -> Parser a +failAt offset message = do + setOffset offset + fail message + +{- Character specifications -} + +isOneOf :: String -> Char -> Bool +isOneOf s t = + let charSet = Set.fromList s + in t `Set.member` charSet + +isAlphabet :: Char -> Bool +isAlphabet = isOneOf (['a'..'z'] ++ ['A'..'Z']) + +isConnecting :: Char -> Bool +isConnecting = isOneOf "_-" + +isWhitespace :: Char -> Bool +isWhitespace = isOneOf " \t" + +{- Lexeme stuff -} + +whitespace :: Parser Char +whitespace = label "space" $ satisfy isWhitespace + +space :: Parser () +space = L.space (void whitespace) (L.skipLineComment ";") empty + +lexeme :: Parser a -> Parser a +lexeme = L.lexeme space + +newline :: Parser () +newline = void $ lexeme C.newline + +newlines :: Parser () +newlines = void (some newline) <|> eof + +colon :: Parser () +colon = void $ lexeme $ C.string ":" + +{- Basic data types -} + +fromHex :: (Num a) => Int -> Parser a +fromHex bitWidth = do + void $ C.string' "0x" + n <- L.hexadecimal :: Parser Integer + let upperBound = 2^bitWidth - 1 + if 0 <= n && n <= upperBound + then pure $ fromIntegral n + else fail $ "value " ++ show n ++ " out of bounds " ++ show (0 :: Integer, upperBound) + +fromDec :: (Num a) => Int -> Parser a +fromDec bitWidth = do + n <- L.signed mempty L.decimal :: Parser Integer + let upperBound = 2^bitWidth - 1 + if (-upperBound) <= n && n <= upperBound + then pure $ fromIntegral n + else fail $ "value " ++ show n ++ " out of bounds " ++ show (-upperBound, upperBound) + +mimaWord :: Parser MimaWord +mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24 + +largeValue :: Parser LargeValue +largeValue = lexeme $ largeValue' + +-- | Non-lexeme version of 'largeValue' +largeValue' :: Parser LargeValue +largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20 + +smallValue :: Parser SmallValue +smallValue = lexeme $ label "16-bit number" $ fromHex 16 <|> fromDec 16 + +{- Stateful parsing -} + +type StatefulParser s a = StateT s Parser a + +runStatefulParser :: StatefulParser s a -> s -> Parser (a, s) +runStatefulParser = runStateT diff --git a/src/Mima/Assembler/Parser/Instruction.hs b/src/Mima/Assembler/Parser/Instruction.hs new file mode 100644 index 0000000..b9d62ec --- /dev/null +++ b/src/Mima/Assembler/Parser/Instruction.hs @@ -0,0 +1,98 @@ +module Mima.Assembler.Parser.Instruction + ( parseInstruction + ) where + +import Control.Monad +import Control.Monad.Trans.Class +import Control.Monad.Trans.State +import Data.Maybe +import qualified Data.Set as Set +import Text.Megaparsec + +import Mima.Assembler.Parser.Basic +import Mima.Assembler.Parser.Label +import Mima.Assembler.Parser.RawInstruction +import Mima.Word + +data MyState = MyState + { sCurrentPos :: MimaAddress + , sKnownLabels :: Set.Set MimaLabel + , sActualPos :: Maybe MimaAddress + , sLabels :: Set.Set MimaLabel + } deriving (Show) + +initialState :: MimaAddress -> Set.Set MimaLabel -> MyState +initialState currentPos knownLabels = MyState + { sCurrentPos = currentPos + , sKnownLabels = knownLabels + , sActualPos = Nothing + , sLabels = Set.empty + } + +getActualPos :: MyState -> MimaAddress +getActualPos s = fromMaybe (sCurrentPos s) (sActualPos s) + +alreadySeen :: MimaLabel -> MyState -> Bool +alreadySeen l s = l `Set.member` sKnownLabels s || l `Set.member` sLabels s + +addLabel :: MimaLabel -> MyState -> MyState +addLabel l s = s{sLabels = Set.insert l $ sLabels s} + +{- And now, the parsing -} + +type SParser a = StatefulParser MyState a + +parseLabel :: SParser () +parseLabel = do + s <- get + l <- lift $ try $ mimaLabel' <* colon + void $ lift $ many newline + if alreadySeen l s + then lift $ failAtLabel l "label already defined earlier" + else modify (addLabel l) + +parseAddressLabel :: SParser () +parseAddressLabel = do + s <- get + (addr, offset) <- lift $ try $ withOffset largeValue' <* colon + void $ lift $ many newline + when (addr < sCurrentPos s) $ do + let errorMsg = "address can't be earlier than " ++ show (sCurrentPos s) + lift $ failAt offset errorMsg + case sActualPos s of + Just _ -> lift $ failAt offset "can't set an instruction's address twice" + Nothing -> put s{sActualPos = Just addr} + +parseInstruction' :: SParser (RawInstruction Address) +parseInstruction' = do + void $ many (parseLabel <|> parseAddressLabel) + lift $ rawInstruction <* newlines + +-- | @'parseInstruction' currentPos knownLabels@ parses an instruction and +-- its preceding label markings. +-- +-- * @currentPos@ is the position at which, if no other marking is +-- specified, this instruction is located. +-- +-- * @knownLabels@ are the labels which have already been set +-- elsewhere and thus cannot be set again on this instruction. +-- +-- Returns @(actualPos, instruction, labels)@. +-- +-- * @actualPos@ is the position at which the instruction is actually +-- located in memory. This can differ from @currentPos@ if a +-- location label is attached to this instruction. The following +-- must always hold: @actualPos >= currentPos@. +-- +-- * @instruction@ is the 'RawInstruction' that was parsed. +-- +-- * @labels@ are the labels attached to the parsed instruction. +parseInstruction :: MimaAddress + -> Set.Set MimaLabel + -> Parser (MimaAddress, RawInstruction Address, Set.Set MimaLabel) +parseInstruction currentPos knownLabels = do + let s = initialState currentPos knownLabels + (instruction, s') <- runStatefulParser parseInstruction' s + let actualPos = getActualPos s' + labels = sLabels s' + pure (actualPos, instruction, labels) diff --git a/src/Mima/Assembler/Parser/Label.hs b/src/Mima/Assembler/Parser/Label.hs new file mode 100644 index 0000000..0594648 --- /dev/null +++ b/src/Mima/Assembler/Parser/Label.hs @@ -0,0 +1,53 @@ +module Mima.Assembler.Parser.Label + ( MimaLabel + , mimaLabel + , mimaLabel' + , failAtLabel + , resolveLabel + , Address + , address + ) where + +import qualified Data.Map as Map +import qualified Data.Text as T +import Text.Megaparsec + +import Mima.Assembler.Parser.Basic +import Mima.Word + +{- Labels -} + +data MimaLabel = MimaLabel { lName :: T.Text, lOffset :: Int } + deriving (Show) + +instance Eq MimaLabel where + a == b = lName a == lName b + +instance Ord MimaLabel where + compare a b = compare (lName a) (lName b) + +mimaLabel :: Parser MimaLabel +mimaLabel = lexeme mimaLabel' + +mimaLabel' :: Parser MimaLabel +mimaLabel' = label "label" $ do + name <- takeWhile1P Nothing (\c -> isAlphabet c || isConnecting c) + offset <- getOffset + pure MimaLabel{lName = name, lOffset = offset} + +failAtLabel :: MimaLabel -> String -> Parser a +failAtLabel l = failAt (lOffset l) + +resolveLabel :: Map.Map MimaLabel MimaAddress -> MimaLabel -> Parser MimaAddress +resolveLabel lmap l = + case lmap Map.!? l of + Just addr -> pure addr + Nothing -> failAtLabel l "could not resolve label" + +{- Addresses -} + +data Address = Direct LargeValue | Indirect MimaLabel + deriving (Show) + +address :: Parser Address +address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel') diff --git a/src/Mima/Assembler/Parser/RawInstruction.hs b/src/Mima/Assembler/Parser/RawInstruction.hs new file mode 100644 index 0000000..d302444 --- /dev/null +++ b/src/Mima/Assembler/Parser/RawInstruction.hs @@ -0,0 +1,117 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Mima.Assembler.Parser.RawInstruction + ( RawInstruction + , rawInstruction + ) where + +import Data.Bits +import Text.Megaparsec +import qualified Text.Megaparsec.Char as C + +import Mima.Assembler.Parser.Basic +import Mima.Assembler.Parser.Label +import Mima.Word + +data RawInstruction addr + = RawLIT MimaWord + | RawLDC addr + | RawLDV addr + | RawSTV addr + | RawADD addr + | RawAND addr + | RawOR addr + | RawXOR addr + | RawEQL addr + | RawJMP addr + | RawJMN addr + | RawLDIV addr + | RawSTIV addr + | RawCALL addr + | RawLDVR addr + | RawSTVR addr + | RawHALT SmallValue + | RawNOT SmallValue + | RawRAR SmallValue + | RawRET SmallValue + | RawLDRA SmallValue + | RawSTRA SmallValue + | RawLDSP SmallValue + | RawSTSP SmallValue + | RawLDFP SmallValue + | RawSTFP SmallValue + | RawADC SmallValue + deriving (Show) + +rawInstruction :: Parser (RawInstruction Address) +rawInstruction + = label "instruction" + $ RawLIT <$> instr "LIT" mimaWord + <|> RawLDC <$> instr "LDC" address + <|> RawLDV <$> instr "LDV" address + <|> RawSTV <$> instr "STV" address + <|> RawADD <$> instr "ADD" address + <|> RawAND <$> instr "AND" address + <|> RawOR <$> instr "OR" address + <|> RawXOR <$> instr "XOR" address + <|> RawEQL <$> instr "EQL" address + <|> RawJMP <$> instr "JMP" address + <|> RawJMN <$> instr "JMN" address + <|> RawLDIV <$> instr "LDIV" address + <|> RawSTIV <$> instr "STIV" address + <|> RawCALL <$> instr "CALL" address + <|> RawLDVR <$> instr "LDVR" address + <|> RawSTVR <$> instr "STVR" address + <|> RawHALT <$> instr' "HALT" smallValue + <|> RawNOT <$> instr' "NOT" smallValue + <|> RawRAR <$> instr' "RAR" smallValue + <|> RawRET <$> instr' "RET" smallValue + <|> RawLDRA <$> instr' "LDRA" smallValue + <|> RawSTRA <$> instr' "STRA" smallValue + <|> RawLDSP <$> instr' "LDSP" smallValue + <|> RawSTSP <$> instr' "STSP" smallValue + <|> RawLDFP <$> instr' "LDFP" smallValue + <|> RawSTFP <$> instr' "STFP" smallValue + <|> RawADC <$> instr "ADC" smallValue + where + instr name value = lexeme (C.string' name >> whitespace) >> space *> value + instr' name value = lexeme (C.string' name >> whitespace) *> (value <|> pure zeroBits) + +{- +data InstrState = InstrState + { isCurPos :: MimaAddress + , isPrevLabels :: Set.Set Label + , isNewPos :: Maybe MimaAddress + , isLabels :: Set.Set Label + } deriving (Show) + +instrState :: MimaAddress -> Set.Set Label -> InstrState +instrState curPos prevLabels = InstrState curPos prevLabels Nothing Set.empty + +parseLabel :: StatefulParser InstrState () +parseLabel = do + name <- lift labelName + is <- get + let prevLabels = isPrevLabels is + labels = isLabels is + if name `Set.member` prevLabels || name `Set.member` labels + then fail "label can't be specified more than once" + else do + void $ lift $ lexeme $ C.string ":" + put is{isLabels = Set.insert name labels} + +parseLocationLabel :: StatefulParser InstrState () +parseLocationLabel = do + newPos <- lift largeValue' + is <- get + case isNewPos is of + Just _ -> fail "cannot specify two positions for one instruction" + Nothing -> if newPos < isCurPos is + then fail "cannot set a position to an earlier position" + else do + void $ lift $ lexeme $ C.string ":" + put is{isNewPos = Just newPos} + +parseInstruction :: StatefulParser InstrState RawInstruction +parseInstruction = try parseLabel <|> parseLocationLabel +-}