Parse instructions with labels
This commit is contained in:
parent
9258aa4f4d
commit
803c826395
5 changed files with 392 additions and 0 deletions
122
src/Mima/Assembler/Parser/Basic.hs
Normal file
122
src/Mima/Assembler/Parser/Basic.hs
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Mima.Assembler.Parser.Basic
|
||||
( Parser
|
||||
, withOffset
|
||||
, failAt
|
||||
-- * Character specifications
|
||||
, isAlphabet
|
||||
, isConnecting
|
||||
, isWhitespace
|
||||
-- * Lexme
|
||||
, whitespace
|
||||
, space
|
||||
, lexeme
|
||||
, newline
|
||||
, newlines
|
||||
, colon
|
||||
-- * Basic data types
|
||||
, mimaWord
|
||||
, largeValue
|
||||
, largeValue'
|
||||
, smallValue
|
||||
-- * Stateful parsing
|
||||
, StatefulParser
|
||||
, runStatefulParser
|
||||
) where
|
||||
|
||||
import Control.Monad
|
||||
import Control.Monad.Trans.State
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Text as T
|
||||
import Data.Void
|
||||
import Text.Megaparsec
|
||||
import qualified Text.Megaparsec.Char as C
|
||||
import qualified Text.Megaparsec.Char.Lexer as L
|
||||
|
||||
import Mima.Word
|
||||
|
||||
type Parser = Parsec Void T.Text
|
||||
|
||||
withOffset :: Parser a -> Parser (a, Int)
|
||||
withOffset parser = (,) <$> parser <*> getOffset
|
||||
|
||||
failAt :: Int -> String -> Parser a
|
||||
failAt offset message = do
|
||||
setOffset offset
|
||||
fail message
|
||||
|
||||
{- Character specifications -}
|
||||
|
||||
isOneOf :: String -> Char -> Bool
|
||||
isOneOf s t =
|
||||
let charSet = Set.fromList s
|
||||
in t `Set.member` charSet
|
||||
|
||||
isAlphabet :: Char -> Bool
|
||||
isAlphabet = isOneOf (['a'..'z'] ++ ['A'..'Z'])
|
||||
|
||||
isConnecting :: Char -> Bool
|
||||
isConnecting = isOneOf "_-"
|
||||
|
||||
isWhitespace :: Char -> Bool
|
||||
isWhitespace = isOneOf " \t"
|
||||
|
||||
{- Lexeme stuff -}
|
||||
|
||||
whitespace :: Parser Char
|
||||
whitespace = label "space" $ satisfy isWhitespace
|
||||
|
||||
space :: Parser ()
|
||||
space = L.space (void whitespace) (L.skipLineComment ";") empty
|
||||
|
||||
lexeme :: Parser a -> Parser a
|
||||
lexeme = L.lexeme space
|
||||
|
||||
newline :: Parser ()
|
||||
newline = void $ lexeme C.newline
|
||||
|
||||
newlines :: Parser ()
|
||||
newlines = void (some newline) <|> eof
|
||||
|
||||
colon :: Parser ()
|
||||
colon = void $ lexeme $ C.string ":"
|
||||
|
||||
{- Basic data types -}
|
||||
|
||||
fromHex :: (Num a) => Int -> Parser a
|
||||
fromHex bitWidth = do
|
||||
void $ C.string' "0x"
|
||||
n <- L.hexadecimal :: Parser Integer
|
||||
let upperBound = 2^bitWidth - 1
|
||||
if 0 <= n && n <= upperBound
|
||||
then pure $ fromIntegral n
|
||||
else fail $ "value " ++ show n ++ " out of bounds " ++ show (0 :: Integer, upperBound)
|
||||
|
||||
fromDec :: (Num a) => Int -> Parser a
|
||||
fromDec bitWidth = do
|
||||
n <- L.signed mempty L.decimal :: Parser Integer
|
||||
let upperBound = 2^bitWidth - 1
|
||||
if (-upperBound) <= n && n <= upperBound
|
||||
then pure $ fromIntegral n
|
||||
else fail $ "value " ++ show n ++ " out of bounds " ++ show (-upperBound, upperBound)
|
||||
|
||||
mimaWord :: Parser MimaWord
|
||||
mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24
|
||||
|
||||
largeValue :: Parser LargeValue
|
||||
largeValue = lexeme $ largeValue'
|
||||
|
||||
-- | Non-lexeme version of 'largeValue'
|
||||
largeValue' :: Parser LargeValue
|
||||
largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20
|
||||
|
||||
smallValue :: Parser SmallValue
|
||||
smallValue = lexeme $ label "16-bit number" $ fromHex 16 <|> fromDec 16
|
||||
|
||||
{- Stateful parsing -}
|
||||
|
||||
type StatefulParser s a = StateT s Parser a
|
||||
|
||||
runStatefulParser :: StatefulParser s a -> s -> Parser (a, s)
|
||||
runStatefulParser = runStateT
|
||||
98
src/Mima/Assembler/Parser/Instruction.hs
Normal file
98
src/Mima/Assembler/Parser/Instruction.hs
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
module Mima.Assembler.Parser.Instruction
|
||||
( parseInstruction
|
||||
) where
|
||||
|
||||
import Control.Monad
|
||||
import Control.Monad.Trans.Class
|
||||
import Control.Monad.Trans.State
|
||||
import Data.Maybe
|
||||
import qualified Data.Set as Set
|
||||
import Text.Megaparsec
|
||||
|
||||
import Mima.Assembler.Parser.Basic
|
||||
import Mima.Assembler.Parser.Label
|
||||
import Mima.Assembler.Parser.RawInstruction
|
||||
import Mima.Word
|
||||
|
||||
data MyState = MyState
|
||||
{ sCurrentPos :: MimaAddress
|
||||
, sKnownLabels :: Set.Set MimaLabel
|
||||
, sActualPos :: Maybe MimaAddress
|
||||
, sLabels :: Set.Set MimaLabel
|
||||
} deriving (Show)
|
||||
|
||||
initialState :: MimaAddress -> Set.Set MimaLabel -> MyState
|
||||
initialState currentPos knownLabels = MyState
|
||||
{ sCurrentPos = currentPos
|
||||
, sKnownLabels = knownLabels
|
||||
, sActualPos = Nothing
|
||||
, sLabels = Set.empty
|
||||
}
|
||||
|
||||
getActualPos :: MyState -> MimaAddress
|
||||
getActualPos s = fromMaybe (sCurrentPos s) (sActualPos s)
|
||||
|
||||
alreadySeen :: MimaLabel -> MyState -> Bool
|
||||
alreadySeen l s = l `Set.member` sKnownLabels s || l `Set.member` sLabels s
|
||||
|
||||
addLabel :: MimaLabel -> MyState -> MyState
|
||||
addLabel l s = s{sLabels = Set.insert l $ sLabels s}
|
||||
|
||||
{- And now, the parsing -}
|
||||
|
||||
type SParser a = StatefulParser MyState a
|
||||
|
||||
parseLabel :: SParser ()
|
||||
parseLabel = do
|
||||
s <- get
|
||||
l <- lift $ try $ mimaLabel' <* colon
|
||||
void $ lift $ many newline
|
||||
if alreadySeen l s
|
||||
then lift $ failAtLabel l "label already defined earlier"
|
||||
else modify (addLabel l)
|
||||
|
||||
parseAddressLabel :: SParser ()
|
||||
parseAddressLabel = do
|
||||
s <- get
|
||||
(addr, offset) <- lift $ try $ withOffset largeValue' <* colon
|
||||
void $ lift $ many newline
|
||||
when (addr < sCurrentPos s) $ do
|
||||
let errorMsg = "address can't be earlier than " ++ show (sCurrentPos s)
|
||||
lift $ failAt offset errorMsg
|
||||
case sActualPos s of
|
||||
Just _ -> lift $ failAt offset "can't set an instruction's address twice"
|
||||
Nothing -> put s{sActualPos = Just addr}
|
||||
|
||||
parseInstruction' :: SParser (RawInstruction Address)
|
||||
parseInstruction' = do
|
||||
void $ many (parseLabel <|> parseAddressLabel)
|
||||
lift $ rawInstruction <* newlines
|
||||
|
||||
-- | @'parseInstruction' currentPos knownLabels@ parses an instruction and
|
||||
-- its preceding label markings.
|
||||
--
|
||||
-- * @currentPos@ is the position at which, if no other marking is
|
||||
-- specified, this instruction is located.
|
||||
--
|
||||
-- * @knownLabels@ are the labels which have already been set
|
||||
-- elsewhere and thus cannot be set again on this instruction.
|
||||
--
|
||||
-- Returns @(actualPos, instruction, labels)@.
|
||||
--
|
||||
-- * @actualPos@ is the position at which the instruction is actually
|
||||
-- located in memory. This can differ from @currentPos@ if a
|
||||
-- location label is attached to this instruction. The following
|
||||
-- must always hold: @actualPos >= currentPos@.
|
||||
--
|
||||
-- * @instruction@ is the 'RawInstruction' that was parsed.
|
||||
--
|
||||
-- * @labels@ are the labels attached to the parsed instruction.
|
||||
parseInstruction :: MimaAddress
|
||||
-> Set.Set MimaLabel
|
||||
-> Parser (MimaAddress, RawInstruction Address, Set.Set MimaLabel)
|
||||
parseInstruction currentPos knownLabels = do
|
||||
let s = initialState currentPos knownLabels
|
||||
(instruction, s') <- runStatefulParser parseInstruction' s
|
||||
let actualPos = getActualPos s'
|
||||
labels = sLabels s'
|
||||
pure (actualPos, instruction, labels)
|
||||
53
src/Mima/Assembler/Parser/Label.hs
Normal file
53
src/Mima/Assembler/Parser/Label.hs
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
module Mima.Assembler.Parser.Label
|
||||
( MimaLabel
|
||||
, mimaLabel
|
||||
, mimaLabel'
|
||||
, failAtLabel
|
||||
, resolveLabel
|
||||
, Address
|
||||
, address
|
||||
) where
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Text as T
|
||||
import Text.Megaparsec
|
||||
|
||||
import Mima.Assembler.Parser.Basic
|
||||
import Mima.Word
|
||||
|
||||
{- Labels -}
|
||||
|
||||
data MimaLabel = MimaLabel { lName :: T.Text, lOffset :: Int }
|
||||
deriving (Show)
|
||||
|
||||
instance Eq MimaLabel where
|
||||
a == b = lName a == lName b
|
||||
|
||||
instance Ord MimaLabel where
|
||||
compare a b = compare (lName a) (lName b)
|
||||
|
||||
mimaLabel :: Parser MimaLabel
|
||||
mimaLabel = lexeme mimaLabel'
|
||||
|
||||
mimaLabel' :: Parser MimaLabel
|
||||
mimaLabel' = label "label" $ do
|
||||
name <- takeWhile1P Nothing (\c -> isAlphabet c || isConnecting c)
|
||||
offset <- getOffset
|
||||
pure MimaLabel{lName = name, lOffset = offset}
|
||||
|
||||
failAtLabel :: MimaLabel -> String -> Parser a
|
||||
failAtLabel l = failAt (lOffset l)
|
||||
|
||||
resolveLabel :: Map.Map MimaLabel MimaAddress -> MimaLabel -> Parser MimaAddress
|
||||
resolveLabel lmap l =
|
||||
case lmap Map.!? l of
|
||||
Just addr -> pure addr
|
||||
Nothing -> failAtLabel l "could not resolve label"
|
||||
|
||||
{- Addresses -}
|
||||
|
||||
data Address = Direct LargeValue | Indirect MimaLabel
|
||||
deriving (Show)
|
||||
|
||||
address :: Parser Address
|
||||
address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel')
|
||||
117
src/Mima/Assembler/Parser/RawInstruction.hs
Normal file
117
src/Mima/Assembler/Parser/RawInstruction.hs
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Mima.Assembler.Parser.RawInstruction
|
||||
( RawInstruction
|
||||
, rawInstruction
|
||||
) where
|
||||
|
||||
import Data.Bits
|
||||
import Text.Megaparsec
|
||||
import qualified Text.Megaparsec.Char as C
|
||||
|
||||
import Mima.Assembler.Parser.Basic
|
||||
import Mima.Assembler.Parser.Label
|
||||
import Mima.Word
|
||||
|
||||
data RawInstruction addr
|
||||
= RawLIT MimaWord
|
||||
| RawLDC addr
|
||||
| RawLDV addr
|
||||
| RawSTV addr
|
||||
| RawADD addr
|
||||
| RawAND addr
|
||||
| RawOR addr
|
||||
| RawXOR addr
|
||||
| RawEQL addr
|
||||
| RawJMP addr
|
||||
| RawJMN addr
|
||||
| RawLDIV addr
|
||||
| RawSTIV addr
|
||||
| RawCALL addr
|
||||
| RawLDVR addr
|
||||
| RawSTVR addr
|
||||
| RawHALT SmallValue
|
||||
| RawNOT SmallValue
|
||||
| RawRAR SmallValue
|
||||
| RawRET SmallValue
|
||||
| RawLDRA SmallValue
|
||||
| RawSTRA SmallValue
|
||||
| RawLDSP SmallValue
|
||||
| RawSTSP SmallValue
|
||||
| RawLDFP SmallValue
|
||||
| RawSTFP SmallValue
|
||||
| RawADC SmallValue
|
||||
deriving (Show)
|
||||
|
||||
rawInstruction :: Parser (RawInstruction Address)
|
||||
rawInstruction
|
||||
= label "instruction"
|
||||
$ RawLIT <$> instr "LIT" mimaWord
|
||||
<|> RawLDC <$> instr "LDC" address
|
||||
<|> RawLDV <$> instr "LDV" address
|
||||
<|> RawSTV <$> instr "STV" address
|
||||
<|> RawADD <$> instr "ADD" address
|
||||
<|> RawAND <$> instr "AND" address
|
||||
<|> RawOR <$> instr "OR" address
|
||||
<|> RawXOR <$> instr "XOR" address
|
||||
<|> RawEQL <$> instr "EQL" address
|
||||
<|> RawJMP <$> instr "JMP" address
|
||||
<|> RawJMN <$> instr "JMN" address
|
||||
<|> RawLDIV <$> instr "LDIV" address
|
||||
<|> RawSTIV <$> instr "STIV" address
|
||||
<|> RawCALL <$> instr "CALL" address
|
||||
<|> RawLDVR <$> instr "LDVR" address
|
||||
<|> RawSTVR <$> instr "STVR" address
|
||||
<|> RawHALT <$> instr' "HALT" smallValue
|
||||
<|> RawNOT <$> instr' "NOT" smallValue
|
||||
<|> RawRAR <$> instr' "RAR" smallValue
|
||||
<|> RawRET <$> instr' "RET" smallValue
|
||||
<|> RawLDRA <$> instr' "LDRA" smallValue
|
||||
<|> RawSTRA <$> instr' "STRA" smallValue
|
||||
<|> RawLDSP <$> instr' "LDSP" smallValue
|
||||
<|> RawSTSP <$> instr' "STSP" smallValue
|
||||
<|> RawLDFP <$> instr' "LDFP" smallValue
|
||||
<|> RawSTFP <$> instr' "STFP" smallValue
|
||||
<|> RawADC <$> instr "ADC" smallValue
|
||||
where
|
||||
instr name value = lexeme (C.string' name >> whitespace) >> space *> value
|
||||
instr' name value = lexeme (C.string' name >> whitespace) *> (value <|> pure zeroBits)
|
||||
|
||||
{-
|
||||
data InstrState = InstrState
|
||||
{ isCurPos :: MimaAddress
|
||||
, isPrevLabels :: Set.Set Label
|
||||
, isNewPos :: Maybe MimaAddress
|
||||
, isLabels :: Set.Set Label
|
||||
} deriving (Show)
|
||||
|
||||
instrState :: MimaAddress -> Set.Set Label -> InstrState
|
||||
instrState curPos prevLabels = InstrState curPos prevLabels Nothing Set.empty
|
||||
|
||||
parseLabel :: StatefulParser InstrState ()
|
||||
parseLabel = do
|
||||
name <- lift labelName
|
||||
is <- get
|
||||
let prevLabels = isPrevLabels is
|
||||
labels = isLabels is
|
||||
if name `Set.member` prevLabels || name `Set.member` labels
|
||||
then fail "label can't be specified more than once"
|
||||
else do
|
||||
void $ lift $ lexeme $ C.string ":"
|
||||
put is{isLabels = Set.insert name labels}
|
||||
|
||||
parseLocationLabel :: StatefulParser InstrState ()
|
||||
parseLocationLabel = do
|
||||
newPos <- lift largeValue'
|
||||
is <- get
|
||||
case isNewPos is of
|
||||
Just _ -> fail "cannot specify two positions for one instruction"
|
||||
Nothing -> if newPos < isCurPos is
|
||||
then fail "cannot set a position to an earlier position"
|
||||
else do
|
||||
void $ lift $ lexeme $ C.string ":"
|
||||
put is{isNewPos = Just newPos}
|
||||
|
||||
parseInstruction :: StatefulParser InstrState RawInstruction
|
||||
parseInstruction = try parseLabel <|> parseLocationLabel
|
||||
-}
|
||||
Loading…
Add table
Add a link
Reference in a new issue