Parse instructions with labels
This commit is contained in:
parent
9258aa4f4d
commit
803c826395
5 changed files with 392 additions and 0 deletions
|
|
@ -24,8 +24,10 @@ dependencies:
|
||||||
- binary
|
- binary
|
||||||
- bytestring
|
- bytestring
|
||||||
- containers
|
- containers
|
||||||
|
- megaparsec
|
||||||
- optparse-applicative
|
- optparse-applicative
|
||||||
- text
|
- text
|
||||||
|
- transformers
|
||||||
- OddWord >= 1.0 && < 1.1
|
- OddWord >= 1.0 && < 1.1
|
||||||
|
|
||||||
library:
|
library:
|
||||||
|
|
|
||||||
122
src/Mima/Assembler/Parser/Basic.hs
Normal file
122
src/Mima/Assembler/Parser/Basic.hs
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Mima.Assembler.Parser.Basic
|
||||||
|
( Parser
|
||||||
|
, withOffset
|
||||||
|
, failAt
|
||||||
|
-- * Character specifications
|
||||||
|
, isAlphabet
|
||||||
|
, isConnecting
|
||||||
|
, isWhitespace
|
||||||
|
-- * Lexme
|
||||||
|
, whitespace
|
||||||
|
, space
|
||||||
|
, lexeme
|
||||||
|
, newline
|
||||||
|
, newlines
|
||||||
|
, colon
|
||||||
|
-- * Basic data types
|
||||||
|
, mimaWord
|
||||||
|
, largeValue
|
||||||
|
, largeValue'
|
||||||
|
, smallValue
|
||||||
|
-- * Stateful parsing
|
||||||
|
, StatefulParser
|
||||||
|
, runStatefulParser
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import Control.Monad.Trans.State
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Data.Void
|
||||||
|
import Text.Megaparsec
|
||||||
|
import qualified Text.Megaparsec.Char as C
|
||||||
|
import qualified Text.Megaparsec.Char.Lexer as L
|
||||||
|
|
||||||
|
import Mima.Word
|
||||||
|
|
||||||
|
type Parser = Parsec Void T.Text
|
||||||
|
|
||||||
|
withOffset :: Parser a -> Parser (a, Int)
|
||||||
|
withOffset parser = (,) <$> parser <*> getOffset
|
||||||
|
|
||||||
|
failAt :: Int -> String -> Parser a
|
||||||
|
failAt offset message = do
|
||||||
|
setOffset offset
|
||||||
|
fail message
|
||||||
|
|
||||||
|
{- Character specifications -}
|
||||||
|
|
||||||
|
isOneOf :: String -> Char -> Bool
|
||||||
|
isOneOf s t =
|
||||||
|
let charSet = Set.fromList s
|
||||||
|
in t `Set.member` charSet
|
||||||
|
|
||||||
|
isAlphabet :: Char -> Bool
|
||||||
|
isAlphabet = isOneOf (['a'..'z'] ++ ['A'..'Z'])
|
||||||
|
|
||||||
|
isConnecting :: Char -> Bool
|
||||||
|
isConnecting = isOneOf "_-"
|
||||||
|
|
||||||
|
isWhitespace :: Char -> Bool
|
||||||
|
isWhitespace = isOneOf " \t"
|
||||||
|
|
||||||
|
{- Lexeme stuff -}
|
||||||
|
|
||||||
|
whitespace :: Parser Char
|
||||||
|
whitespace = label "space" $ satisfy isWhitespace
|
||||||
|
|
||||||
|
space :: Parser ()
|
||||||
|
space = L.space (void whitespace) (L.skipLineComment ";") empty
|
||||||
|
|
||||||
|
lexeme :: Parser a -> Parser a
|
||||||
|
lexeme = L.lexeme space
|
||||||
|
|
||||||
|
newline :: Parser ()
|
||||||
|
newline = void $ lexeme C.newline
|
||||||
|
|
||||||
|
newlines :: Parser ()
|
||||||
|
newlines = void (some newline) <|> eof
|
||||||
|
|
||||||
|
colon :: Parser ()
|
||||||
|
colon = void $ lexeme $ C.string ":"
|
||||||
|
|
||||||
|
{- Basic data types -}
|
||||||
|
|
||||||
|
fromHex :: (Num a) => Int -> Parser a
|
||||||
|
fromHex bitWidth = do
|
||||||
|
void $ C.string' "0x"
|
||||||
|
n <- L.hexadecimal :: Parser Integer
|
||||||
|
let upperBound = 2^bitWidth - 1
|
||||||
|
if 0 <= n && n <= upperBound
|
||||||
|
then pure $ fromIntegral n
|
||||||
|
else fail $ "value " ++ show n ++ " out of bounds " ++ show (0 :: Integer, upperBound)
|
||||||
|
|
||||||
|
fromDec :: (Num a) => Int -> Parser a
|
||||||
|
fromDec bitWidth = do
|
||||||
|
n <- L.signed mempty L.decimal :: Parser Integer
|
||||||
|
let upperBound = 2^bitWidth - 1
|
||||||
|
if (-upperBound) <= n && n <= upperBound
|
||||||
|
then pure $ fromIntegral n
|
||||||
|
else fail $ "value " ++ show n ++ " out of bounds " ++ show (-upperBound, upperBound)
|
||||||
|
|
||||||
|
mimaWord :: Parser MimaWord
|
||||||
|
mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24
|
||||||
|
|
||||||
|
largeValue :: Parser LargeValue
|
||||||
|
largeValue = lexeme $ largeValue'
|
||||||
|
|
||||||
|
-- | Non-lexeme version of 'largeValue'
|
||||||
|
largeValue' :: Parser LargeValue
|
||||||
|
largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20
|
||||||
|
|
||||||
|
smallValue :: Parser SmallValue
|
||||||
|
smallValue = lexeme $ label "16-bit number" $ fromHex 16 <|> fromDec 16
|
||||||
|
|
||||||
|
{- Stateful parsing -}
|
||||||
|
|
||||||
|
type StatefulParser s a = StateT s Parser a
|
||||||
|
|
||||||
|
runStatefulParser :: StatefulParser s a -> s -> Parser (a, s)
|
||||||
|
runStatefulParser = runStateT
|
||||||
98
src/Mima/Assembler/Parser/Instruction.hs
Normal file
98
src/Mima/Assembler/Parser/Instruction.hs
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
module Mima.Assembler.Parser.Instruction
|
||||||
|
( parseInstruction
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import Control.Monad.Trans.Class
|
||||||
|
import Control.Monad.Trans.State
|
||||||
|
import Data.Maybe
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import Text.Megaparsec
|
||||||
|
|
||||||
|
import Mima.Assembler.Parser.Basic
|
||||||
|
import Mima.Assembler.Parser.Label
|
||||||
|
import Mima.Assembler.Parser.RawInstruction
|
||||||
|
import Mima.Word
|
||||||
|
|
||||||
|
data MyState = MyState
|
||||||
|
{ sCurrentPos :: MimaAddress
|
||||||
|
, sKnownLabels :: Set.Set MimaLabel
|
||||||
|
, sActualPos :: Maybe MimaAddress
|
||||||
|
, sLabels :: Set.Set MimaLabel
|
||||||
|
} deriving (Show)
|
||||||
|
|
||||||
|
initialState :: MimaAddress -> Set.Set MimaLabel -> MyState
|
||||||
|
initialState currentPos knownLabels = MyState
|
||||||
|
{ sCurrentPos = currentPos
|
||||||
|
, sKnownLabels = knownLabels
|
||||||
|
, sActualPos = Nothing
|
||||||
|
, sLabels = Set.empty
|
||||||
|
}
|
||||||
|
|
||||||
|
getActualPos :: MyState -> MimaAddress
|
||||||
|
getActualPos s = fromMaybe (sCurrentPos s) (sActualPos s)
|
||||||
|
|
||||||
|
alreadySeen :: MimaLabel -> MyState -> Bool
|
||||||
|
alreadySeen l s = l `Set.member` sKnownLabels s || l `Set.member` sLabels s
|
||||||
|
|
||||||
|
addLabel :: MimaLabel -> MyState -> MyState
|
||||||
|
addLabel l s = s{sLabels = Set.insert l $ sLabels s}
|
||||||
|
|
||||||
|
{- And now, the parsing -}
|
||||||
|
|
||||||
|
type SParser a = StatefulParser MyState a
|
||||||
|
|
||||||
|
parseLabel :: SParser ()
|
||||||
|
parseLabel = do
|
||||||
|
s <- get
|
||||||
|
l <- lift $ try $ mimaLabel' <* colon
|
||||||
|
void $ lift $ many newline
|
||||||
|
if alreadySeen l s
|
||||||
|
then lift $ failAtLabel l "label already defined earlier"
|
||||||
|
else modify (addLabel l)
|
||||||
|
|
||||||
|
parseAddressLabel :: SParser ()
|
||||||
|
parseAddressLabel = do
|
||||||
|
s <- get
|
||||||
|
(addr, offset) <- lift $ try $ withOffset largeValue' <* colon
|
||||||
|
void $ lift $ many newline
|
||||||
|
when (addr < sCurrentPos s) $ do
|
||||||
|
let errorMsg = "address can't be earlier than " ++ show (sCurrentPos s)
|
||||||
|
lift $ failAt offset errorMsg
|
||||||
|
case sActualPos s of
|
||||||
|
Just _ -> lift $ failAt offset "can't set an instruction's address twice"
|
||||||
|
Nothing -> put s{sActualPos = Just addr}
|
||||||
|
|
||||||
|
parseInstruction' :: SParser (RawInstruction Address)
|
||||||
|
parseInstruction' = do
|
||||||
|
void $ many (parseLabel <|> parseAddressLabel)
|
||||||
|
lift $ rawInstruction <* newlines
|
||||||
|
|
||||||
|
-- | @'parseInstruction' currentPos knownLabels@ parses an instruction and
|
||||||
|
-- its preceding label markings.
|
||||||
|
--
|
||||||
|
-- * @currentPos@ is the position at which, if no other marking is
|
||||||
|
-- specified, this instruction is located.
|
||||||
|
--
|
||||||
|
-- * @knownLabels@ are the labels which have already been set
|
||||||
|
-- elsewhere and thus cannot be set again on this instruction.
|
||||||
|
--
|
||||||
|
-- Returns @(actualPos, instruction, labels)@.
|
||||||
|
--
|
||||||
|
-- * @actualPos@ is the position at which the instruction is actually
|
||||||
|
-- located in memory. This can differ from @currentPos@ if a
|
||||||
|
-- location label is attached to this instruction. The following
|
||||||
|
-- must always hold: @actualPos >= currentPos@.
|
||||||
|
--
|
||||||
|
-- * @instruction@ is the 'RawInstruction' that was parsed.
|
||||||
|
--
|
||||||
|
-- * @labels@ are the labels attached to the parsed instruction.
|
||||||
|
parseInstruction :: MimaAddress
|
||||||
|
-> Set.Set MimaLabel
|
||||||
|
-> Parser (MimaAddress, RawInstruction Address, Set.Set MimaLabel)
|
||||||
|
parseInstruction currentPos knownLabels = do
|
||||||
|
let s = initialState currentPos knownLabels
|
||||||
|
(instruction, s') <- runStatefulParser parseInstruction' s
|
||||||
|
let actualPos = getActualPos s'
|
||||||
|
labels = sLabels s'
|
||||||
|
pure (actualPos, instruction, labels)
|
||||||
53
src/Mima/Assembler/Parser/Label.hs
Normal file
53
src/Mima/Assembler/Parser/Label.hs
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
module Mima.Assembler.Parser.Label
|
||||||
|
( MimaLabel
|
||||||
|
, mimaLabel
|
||||||
|
, mimaLabel'
|
||||||
|
, failAtLabel
|
||||||
|
, resolveLabel
|
||||||
|
, Address
|
||||||
|
, address
|
||||||
|
) where
|
||||||
|
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Text.Megaparsec
|
||||||
|
|
||||||
|
import Mima.Assembler.Parser.Basic
|
||||||
|
import Mima.Word
|
||||||
|
|
||||||
|
{- Labels -}
|
||||||
|
|
||||||
|
data MimaLabel = MimaLabel { lName :: T.Text, lOffset :: Int }
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
instance Eq MimaLabel where
|
||||||
|
a == b = lName a == lName b
|
||||||
|
|
||||||
|
instance Ord MimaLabel where
|
||||||
|
compare a b = compare (lName a) (lName b)
|
||||||
|
|
||||||
|
mimaLabel :: Parser MimaLabel
|
||||||
|
mimaLabel = lexeme mimaLabel'
|
||||||
|
|
||||||
|
mimaLabel' :: Parser MimaLabel
|
||||||
|
mimaLabel' = label "label" $ do
|
||||||
|
name <- takeWhile1P Nothing (\c -> isAlphabet c || isConnecting c)
|
||||||
|
offset <- getOffset
|
||||||
|
pure MimaLabel{lName = name, lOffset = offset}
|
||||||
|
|
||||||
|
failAtLabel :: MimaLabel -> String -> Parser a
|
||||||
|
failAtLabel l = failAt (lOffset l)
|
||||||
|
|
||||||
|
resolveLabel :: Map.Map MimaLabel MimaAddress -> MimaLabel -> Parser MimaAddress
|
||||||
|
resolveLabel lmap l =
|
||||||
|
case lmap Map.!? l of
|
||||||
|
Just addr -> pure addr
|
||||||
|
Nothing -> failAtLabel l "could not resolve label"
|
||||||
|
|
||||||
|
{- Addresses -}
|
||||||
|
|
||||||
|
data Address = Direct LargeValue | Indirect MimaLabel
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
address :: Parser Address
|
||||||
|
address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel')
|
||||||
117
src/Mima/Assembler/Parser/RawInstruction.hs
Normal file
117
src/Mima/Assembler/Parser/RawInstruction.hs
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Mima.Assembler.Parser.RawInstruction
|
||||||
|
( RawInstruction
|
||||||
|
, rawInstruction
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Bits
|
||||||
|
import Text.Megaparsec
|
||||||
|
import qualified Text.Megaparsec.Char as C
|
||||||
|
|
||||||
|
import Mima.Assembler.Parser.Basic
|
||||||
|
import Mima.Assembler.Parser.Label
|
||||||
|
import Mima.Word
|
||||||
|
|
||||||
|
data RawInstruction addr
|
||||||
|
= RawLIT MimaWord
|
||||||
|
| RawLDC addr
|
||||||
|
| RawLDV addr
|
||||||
|
| RawSTV addr
|
||||||
|
| RawADD addr
|
||||||
|
| RawAND addr
|
||||||
|
| RawOR addr
|
||||||
|
| RawXOR addr
|
||||||
|
| RawEQL addr
|
||||||
|
| RawJMP addr
|
||||||
|
| RawJMN addr
|
||||||
|
| RawLDIV addr
|
||||||
|
| RawSTIV addr
|
||||||
|
| RawCALL addr
|
||||||
|
| RawLDVR addr
|
||||||
|
| RawSTVR addr
|
||||||
|
| RawHALT SmallValue
|
||||||
|
| RawNOT SmallValue
|
||||||
|
| RawRAR SmallValue
|
||||||
|
| RawRET SmallValue
|
||||||
|
| RawLDRA SmallValue
|
||||||
|
| RawSTRA SmallValue
|
||||||
|
| RawLDSP SmallValue
|
||||||
|
| RawSTSP SmallValue
|
||||||
|
| RawLDFP SmallValue
|
||||||
|
| RawSTFP SmallValue
|
||||||
|
| RawADC SmallValue
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
rawInstruction :: Parser (RawInstruction Address)
|
||||||
|
rawInstruction
|
||||||
|
= label "instruction"
|
||||||
|
$ RawLIT <$> instr "LIT" mimaWord
|
||||||
|
<|> RawLDC <$> instr "LDC" address
|
||||||
|
<|> RawLDV <$> instr "LDV" address
|
||||||
|
<|> RawSTV <$> instr "STV" address
|
||||||
|
<|> RawADD <$> instr "ADD" address
|
||||||
|
<|> RawAND <$> instr "AND" address
|
||||||
|
<|> RawOR <$> instr "OR" address
|
||||||
|
<|> RawXOR <$> instr "XOR" address
|
||||||
|
<|> RawEQL <$> instr "EQL" address
|
||||||
|
<|> RawJMP <$> instr "JMP" address
|
||||||
|
<|> RawJMN <$> instr "JMN" address
|
||||||
|
<|> RawLDIV <$> instr "LDIV" address
|
||||||
|
<|> RawSTIV <$> instr "STIV" address
|
||||||
|
<|> RawCALL <$> instr "CALL" address
|
||||||
|
<|> RawLDVR <$> instr "LDVR" address
|
||||||
|
<|> RawSTVR <$> instr "STVR" address
|
||||||
|
<|> RawHALT <$> instr' "HALT" smallValue
|
||||||
|
<|> RawNOT <$> instr' "NOT" smallValue
|
||||||
|
<|> RawRAR <$> instr' "RAR" smallValue
|
||||||
|
<|> RawRET <$> instr' "RET" smallValue
|
||||||
|
<|> RawLDRA <$> instr' "LDRA" smallValue
|
||||||
|
<|> RawSTRA <$> instr' "STRA" smallValue
|
||||||
|
<|> RawLDSP <$> instr' "LDSP" smallValue
|
||||||
|
<|> RawSTSP <$> instr' "STSP" smallValue
|
||||||
|
<|> RawLDFP <$> instr' "LDFP" smallValue
|
||||||
|
<|> RawSTFP <$> instr' "STFP" smallValue
|
||||||
|
<|> RawADC <$> instr "ADC" smallValue
|
||||||
|
where
|
||||||
|
instr name value = lexeme (C.string' name >> whitespace) >> space *> value
|
||||||
|
instr' name value = lexeme (C.string' name >> whitespace) *> (value <|> pure zeroBits)
|
||||||
|
|
||||||
|
{-
|
||||||
|
data InstrState = InstrState
|
||||||
|
{ isCurPos :: MimaAddress
|
||||||
|
, isPrevLabels :: Set.Set Label
|
||||||
|
, isNewPos :: Maybe MimaAddress
|
||||||
|
, isLabels :: Set.Set Label
|
||||||
|
} deriving (Show)
|
||||||
|
|
||||||
|
instrState :: MimaAddress -> Set.Set Label -> InstrState
|
||||||
|
instrState curPos prevLabels = InstrState curPos prevLabels Nothing Set.empty
|
||||||
|
|
||||||
|
parseLabel :: StatefulParser InstrState ()
|
||||||
|
parseLabel = do
|
||||||
|
name <- lift labelName
|
||||||
|
is <- get
|
||||||
|
let prevLabels = isPrevLabels is
|
||||||
|
labels = isLabels is
|
||||||
|
if name `Set.member` prevLabels || name `Set.member` labels
|
||||||
|
then fail "label can't be specified more than once"
|
||||||
|
else do
|
||||||
|
void $ lift $ lexeme $ C.string ":"
|
||||||
|
put is{isLabels = Set.insert name labels}
|
||||||
|
|
||||||
|
parseLocationLabel :: StatefulParser InstrState ()
|
||||||
|
parseLocationLabel = do
|
||||||
|
newPos <- lift largeValue'
|
||||||
|
is <- get
|
||||||
|
case isNewPos is of
|
||||||
|
Just _ -> fail "cannot specify two positions for one instruction"
|
||||||
|
Nothing -> if newPos < isCurPos is
|
||||||
|
then fail "cannot set a position to an earlier position"
|
||||||
|
else do
|
||||||
|
void $ lift $ lexeme $ C.string ":"
|
||||||
|
put is{isNewPos = Just newPos}
|
||||||
|
|
||||||
|
parseInstruction :: StatefulParser InstrState RawInstruction
|
||||||
|
parseInstruction = try parseLabel <|> parseLocationLabel
|
||||||
|
-}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue