Parse instructions with labels

This commit is contained in:
Joscha 2019-11-09 19:24:50 +00:00
parent 9258aa4f4d
commit 803c826395
5 changed files with 392 additions and 0 deletions

View file

@ -0,0 +1,122 @@
{-# LANGUAGE OverloadedStrings #-}
module Mima.Assembler.Parser.Basic
( Parser
, withOffset
, failAt
-- * Character specifications
, isAlphabet
, isConnecting
, isWhitespace
-- * Lexme
, whitespace
, space
, lexeme
, newline
, newlines
, colon
-- * Basic data types
, mimaWord
, largeValue
, largeValue'
, smallValue
-- * Stateful parsing
, StatefulParser
, runStatefulParser
) where
import Control.Monad
import Control.Monad.Trans.State
import qualified Data.Set as Set
import qualified Data.Text as T
import Data.Void
import Text.Megaparsec
import qualified Text.Megaparsec.Char as C
import qualified Text.Megaparsec.Char.Lexer as L
import Mima.Word
type Parser = Parsec Void T.Text
withOffset :: Parser a -> Parser (a, Int)
withOffset parser = (,) <$> parser <*> getOffset
failAt :: Int -> String -> Parser a
failAt offset message = do
setOffset offset
fail message
{- Character specifications -}
isOneOf :: String -> Char -> Bool
isOneOf s t =
let charSet = Set.fromList s
in t `Set.member` charSet
isAlphabet :: Char -> Bool
isAlphabet = isOneOf (['a'..'z'] ++ ['A'..'Z'])
isConnecting :: Char -> Bool
isConnecting = isOneOf "_-"
isWhitespace :: Char -> Bool
isWhitespace = isOneOf " \t"
{- Lexeme stuff -}
whitespace :: Parser Char
whitespace = label "space" $ satisfy isWhitespace
space :: Parser ()
space = L.space (void whitespace) (L.skipLineComment ";") empty
lexeme :: Parser a -> Parser a
lexeme = L.lexeme space
newline :: Parser ()
newline = void $ lexeme C.newline
newlines :: Parser ()
newlines = void (some newline) <|> eof
colon :: Parser ()
colon = void $ lexeme $ C.string ":"
{- Basic data types -}
fromHex :: (Num a) => Int -> Parser a
fromHex bitWidth = do
void $ C.string' "0x"
n <- L.hexadecimal :: Parser Integer
let upperBound = 2^bitWidth - 1
if 0 <= n && n <= upperBound
then pure $ fromIntegral n
else fail $ "value " ++ show n ++ " out of bounds " ++ show (0 :: Integer, upperBound)
fromDec :: (Num a) => Int -> Parser a
fromDec bitWidth = do
n <- L.signed mempty L.decimal :: Parser Integer
let upperBound = 2^bitWidth - 1
if (-upperBound) <= n && n <= upperBound
then pure $ fromIntegral n
else fail $ "value " ++ show n ++ " out of bounds " ++ show (-upperBound, upperBound)
mimaWord :: Parser MimaWord
mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24
largeValue :: Parser LargeValue
largeValue = lexeme $ largeValue'
-- | Non-lexeme version of 'largeValue'
largeValue' :: Parser LargeValue
largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20
smallValue :: Parser SmallValue
smallValue = lexeme $ label "16-bit number" $ fromHex 16 <|> fromDec 16
{- Stateful parsing -}
type StatefulParser s a = StateT s Parser a
runStatefulParser :: StatefulParser s a -> s -> Parser (a, s)
runStatefulParser = runStateT

View file

@ -0,0 +1,98 @@
module Mima.Assembler.Parser.Instruction
( parseInstruction
) where
import Control.Monad
import Control.Monad.Trans.Class
import Control.Monad.Trans.State
import Data.Maybe
import qualified Data.Set as Set
import Text.Megaparsec
import Mima.Assembler.Parser.Basic
import Mima.Assembler.Parser.Label
import Mima.Assembler.Parser.RawInstruction
import Mima.Word
data MyState = MyState
{ sCurrentPos :: MimaAddress
, sKnownLabels :: Set.Set MimaLabel
, sActualPos :: Maybe MimaAddress
, sLabels :: Set.Set MimaLabel
} deriving (Show)
initialState :: MimaAddress -> Set.Set MimaLabel -> MyState
initialState currentPos knownLabels = MyState
{ sCurrentPos = currentPos
, sKnownLabels = knownLabels
, sActualPos = Nothing
, sLabels = Set.empty
}
getActualPos :: MyState -> MimaAddress
getActualPos s = fromMaybe (sCurrentPos s) (sActualPos s)
alreadySeen :: MimaLabel -> MyState -> Bool
alreadySeen l s = l `Set.member` sKnownLabels s || l `Set.member` sLabels s
addLabel :: MimaLabel -> MyState -> MyState
addLabel l s = s{sLabels = Set.insert l $ sLabels s}
{- And now, the parsing -}
type SParser a = StatefulParser MyState a
parseLabel :: SParser ()
parseLabel = do
s <- get
l <- lift $ try $ mimaLabel' <* colon
void $ lift $ many newline
if alreadySeen l s
then lift $ failAtLabel l "label already defined earlier"
else modify (addLabel l)
parseAddressLabel :: SParser ()
parseAddressLabel = do
s <- get
(addr, offset) <- lift $ try $ withOffset largeValue' <* colon
void $ lift $ many newline
when (addr < sCurrentPos s) $ do
let errorMsg = "address can't be earlier than " ++ show (sCurrentPos s)
lift $ failAt offset errorMsg
case sActualPos s of
Just _ -> lift $ failAt offset "can't set an instruction's address twice"
Nothing -> put s{sActualPos = Just addr}
parseInstruction' :: SParser (RawInstruction Address)
parseInstruction' = do
void $ many (parseLabel <|> parseAddressLabel)
lift $ rawInstruction <* newlines
-- | @'parseInstruction' currentPos knownLabels@ parses an instruction and
-- its preceding label markings.
--
-- * @currentPos@ is the position at which, if no other marking is
-- specified, this instruction is located.
--
-- * @knownLabels@ are the labels which have already been set
-- elsewhere and thus cannot be set again on this instruction.
--
-- Returns @(actualPos, instruction, labels)@.
--
-- * @actualPos@ is the position at which the instruction is actually
-- located in memory. This can differ from @currentPos@ if a
-- location label is attached to this instruction. The following
-- must always hold: @actualPos >= currentPos@.
--
-- * @instruction@ is the 'RawInstruction' that was parsed.
--
-- * @labels@ are the labels attached to the parsed instruction.
parseInstruction :: MimaAddress
-> Set.Set MimaLabel
-> Parser (MimaAddress, RawInstruction Address, Set.Set MimaLabel)
parseInstruction currentPos knownLabels = do
let s = initialState currentPos knownLabels
(instruction, s') <- runStatefulParser parseInstruction' s
let actualPos = getActualPos s'
labels = sLabels s'
pure (actualPos, instruction, labels)

View file

@ -0,0 +1,53 @@
module Mima.Assembler.Parser.Label
( MimaLabel
, mimaLabel
, mimaLabel'
, failAtLabel
, resolveLabel
, Address
, address
) where
import qualified Data.Map as Map
import qualified Data.Text as T
import Text.Megaparsec
import Mima.Assembler.Parser.Basic
import Mima.Word
{- Labels -}
data MimaLabel = MimaLabel { lName :: T.Text, lOffset :: Int }
deriving (Show)
instance Eq MimaLabel where
a == b = lName a == lName b
instance Ord MimaLabel where
compare a b = compare (lName a) (lName b)
mimaLabel :: Parser MimaLabel
mimaLabel = lexeme mimaLabel'
mimaLabel' :: Parser MimaLabel
mimaLabel' = label "label" $ do
name <- takeWhile1P Nothing (\c -> isAlphabet c || isConnecting c)
offset <- getOffset
pure MimaLabel{lName = name, lOffset = offset}
failAtLabel :: MimaLabel -> String -> Parser a
failAtLabel l = failAt (lOffset l)
resolveLabel :: Map.Map MimaLabel MimaAddress -> MimaLabel -> Parser MimaAddress
resolveLabel lmap l =
case lmap Map.!? l of
Just addr -> pure addr
Nothing -> failAtLabel l "could not resolve label"
{- Addresses -}
data Address = Direct LargeValue | Indirect MimaLabel
deriving (Show)
address :: Parser Address
address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel')

View file

@ -0,0 +1,117 @@
{-# LANGUAGE OverloadedStrings #-}
module Mima.Assembler.Parser.RawInstruction
( RawInstruction
, rawInstruction
) where
import Data.Bits
import Text.Megaparsec
import qualified Text.Megaparsec.Char as C
import Mima.Assembler.Parser.Basic
import Mima.Assembler.Parser.Label
import Mima.Word
data RawInstruction addr
= RawLIT MimaWord
| RawLDC addr
| RawLDV addr
| RawSTV addr
| RawADD addr
| RawAND addr
| RawOR addr
| RawXOR addr
| RawEQL addr
| RawJMP addr
| RawJMN addr
| RawLDIV addr
| RawSTIV addr
| RawCALL addr
| RawLDVR addr
| RawSTVR addr
| RawHALT SmallValue
| RawNOT SmallValue
| RawRAR SmallValue
| RawRET SmallValue
| RawLDRA SmallValue
| RawSTRA SmallValue
| RawLDSP SmallValue
| RawSTSP SmallValue
| RawLDFP SmallValue
| RawSTFP SmallValue
| RawADC SmallValue
deriving (Show)
rawInstruction :: Parser (RawInstruction Address)
rawInstruction
= label "instruction"
$ RawLIT <$> instr "LIT" mimaWord
<|> RawLDC <$> instr "LDC" address
<|> RawLDV <$> instr "LDV" address
<|> RawSTV <$> instr "STV" address
<|> RawADD <$> instr "ADD" address
<|> RawAND <$> instr "AND" address
<|> RawOR <$> instr "OR" address
<|> RawXOR <$> instr "XOR" address
<|> RawEQL <$> instr "EQL" address
<|> RawJMP <$> instr "JMP" address
<|> RawJMN <$> instr "JMN" address
<|> RawLDIV <$> instr "LDIV" address
<|> RawSTIV <$> instr "STIV" address
<|> RawCALL <$> instr "CALL" address
<|> RawLDVR <$> instr "LDVR" address
<|> RawSTVR <$> instr "STVR" address
<|> RawHALT <$> instr' "HALT" smallValue
<|> RawNOT <$> instr' "NOT" smallValue
<|> RawRAR <$> instr' "RAR" smallValue
<|> RawRET <$> instr' "RET" smallValue
<|> RawLDRA <$> instr' "LDRA" smallValue
<|> RawSTRA <$> instr' "STRA" smallValue
<|> RawLDSP <$> instr' "LDSP" smallValue
<|> RawSTSP <$> instr' "STSP" smallValue
<|> RawLDFP <$> instr' "LDFP" smallValue
<|> RawSTFP <$> instr' "STFP" smallValue
<|> RawADC <$> instr "ADC" smallValue
where
instr name value = lexeme (C.string' name >> whitespace) >> space *> value
instr' name value = lexeme (C.string' name >> whitespace) *> (value <|> pure zeroBits)
{-
data InstrState = InstrState
{ isCurPos :: MimaAddress
, isPrevLabels :: Set.Set Label
, isNewPos :: Maybe MimaAddress
, isLabels :: Set.Set Label
} deriving (Show)
instrState :: MimaAddress -> Set.Set Label -> InstrState
instrState curPos prevLabels = InstrState curPos prevLabels Nothing Set.empty
parseLabel :: StatefulParser InstrState ()
parseLabel = do
name <- lift labelName
is <- get
let prevLabels = isPrevLabels is
labels = isLabels is
if name `Set.member` prevLabels || name `Set.member` labels
then fail "label can't be specified more than once"
else do
void $ lift $ lexeme $ C.string ":"
put is{isLabels = Set.insert name labels}
parseLocationLabel :: StatefulParser InstrState ()
parseLocationLabel = do
newPos <- lift largeValue'
is <- get
case isNewPos is of
Just _ -> fail "cannot specify two positions for one instruction"
Nothing -> if newPos < isCurPos is
then fail "cannot set a position to an earlier position"
else do
void $ lift $ lexeme $ C.string ":"
put is{isNewPos = Just newPos}
parseInstruction :: StatefulParser InstrState RawInstruction
parseInstruction = try parseLabel <|> parseLocationLabel
-}