Parse instructions with labels

This commit is contained in:
Joscha 2019-11-09 23:37:47 +00:00
parent 21e72420dd
commit dc990a2e7a
4 changed files with 157 additions and 101 deletions

View file

@ -0,0 +1,86 @@
{-# LANGUAGE TupleSections #-}
module Mima.Assembler.Parser
( parseState
) where
import Control.Monad
import Control.Monad.Trans.Class
import Control.Monad.Trans.State
import qualified Data.Map as Map
import qualified Data.Set as Set
import Text.Megaparsec
import Mima.Assembler.Parser.Basic
import Mima.Assembler.Parser.Instruction
import Mima.Assembler.Parser.Label
import Mima.Assembler.Parser.RawInstruction
import Mima.State
import Mima.Word
data MyState = MyState
{ sCurrentPos :: MimaAddress
, sLabels :: Map.Map MimaLabel MimaAddress
, sInstructions :: Map.Map MimaAddress (RawInstruction Address)
} deriving (Show)
initialState :: MyState
initialState = MyState 0 Map.empty Map.empty
type SParser a = StatefulParser MyState a
incrementCurrentPos :: SParser ()
incrementCurrentPos = do
s <- get
when (sCurrentPos s == maxBound) empty
put s{sCurrentPos = succ $ sCurrentPos s}
parseInstructions' :: SParser ()
parseInstructions' = sepBy parseInstruction' incrementCurrentPos >> lift (eof <|> fail atMaxAddress)
where
atMaxAddress = "already at maximum address (" ++ show (maxBound :: MimaAddress)
++ ") - can't go any further"
parseInstruction' :: SParser ()
parseInstruction' = do
s <- get
let currentPos = sCurrentPos s
knownLabels = Map.keysSet $ sLabels s
(actualPos, instruction, labels) <- lift $ parseInstruction currentPos knownLabels
let newLabels = Map.fromList [(l, actualPos) | l <- Set.toList labels]
put s { sCurrentPos = actualPos
, sLabels = Map.union newLabels $ sLabels s
, sInstructions = Map.insert actualPos instruction $ sInstructions s
}
parseInstructions :: Parser (Map.Map MimaLabel MimaAddress, Map.Map MimaAddress (RawInstruction Address))
parseInstructions = do
(_, s) <- runStatefulParser parseInstructions' initialState
pure (sLabels s, sInstructions s)
resolveRawInstruction :: Map.Map MimaLabel MimaAddress
-> RawInstruction Address
-> Parser (RawInstruction MimaAddress)
resolveRawInstruction _ (RawLIT word) = pure $ RawLIT word
resolveRawInstruction _ (RawLargeInstruction lo sv) = pure $ RawLargeInstruction lo sv
resolveRawInstruction labels (RawSmallInstruction so lv) = do
addr <- resolveAddress labels lv
pure $ RawSmallInstruction so addr
resolveLabels :: Map.Map MimaLabel MimaAddress
-> Map.Map MimaAddress (RawInstruction Address)
-> Parser (Map.Map MimaAddress (RawInstruction MimaAddress))
resolveLabels labels rawLabeledInstructions = do
let labeledInstrList = Map.toList rawLabeledInstructions
resolve = resolveRawInstruction labels
instrList <- forM labeledInstrList $ \(addr, instr) -> (addr,) <$> resolve instr
let rawInstructions = Map.fromList instrList
pure rawInstructions
parseState :: Parser MimaState
parseState = do
space
(labels, rawLabeledInstructions) <- parseInstructions
rawInstructions <- resolveLabels labels rawLabeledInstructions
let mem = mapToMemory $ Map.map rawInstructionToWord rawInstructions
pure $ basicState mem

View file

@ -20,6 +20,7 @@ module Mima.Assembler.Parser.Basic
, largeValue , largeValue
, largeValue' , largeValue'
, smallValue , smallValue
, smallValue'
-- * Stateful parsing -- * Stateful parsing
, StatefulParser , StatefulParser
, runStatefulParser , runStatefulParser
@ -105,14 +106,18 @@ mimaWord :: Parser MimaWord
mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24 mimaWord = lexeme $ label "24-bit number" $ fromHex 24 <|> fromDec 24
largeValue :: Parser LargeValue largeValue :: Parser LargeValue
largeValue = lexeme $ largeValue' largeValue = lexeme largeValue'
-- | Non-lexeme version of 'largeValue' -- | Non-lexeme version of 'largeValue'
largeValue' :: Parser LargeValue largeValue' :: Parser LargeValue
largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20 largeValue' = label "20-bit number" $ fromHex 20 <|> fromDec 20
smallValue :: Parser SmallValue smallValue :: Parser SmallValue
smallValue = lexeme $ label "16-bit number" $ fromHex 16 <|> fromDec 16 smallValue = lexeme smallValue'
-- | Non-lexeme version of 'smallValue'
smallValue' :: Parser SmallValue
smallValue' = label "16-bit number" $ fromHex 16 <|> fromDec 16
{- Stateful parsing -} {- Stateful parsing -}

View file

@ -6,6 +6,7 @@ module Mima.Assembler.Parser.Label
, resolveLabel , resolveLabel
, Address , Address
, address , address
, resolveAddress
) where ) where
import qualified Data.Map as Map import qualified Data.Map as Map
@ -50,4 +51,8 @@ data Address = Direct LargeValue | Indirect MimaLabel
deriving (Show) deriving (Show)
address :: Parser Address address :: Parser Address
address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel') address = try (Direct <$> largeValue) <|> (Indirect <$> mimaLabel)
resolveAddress :: Map.Map MimaLabel MimaAddress -> Address -> Parser MimaAddress
resolveAddress _ (Direct addr) = pure addr
resolveAddress labels (Indirect l) = resolveLabel labels l

View file

@ -1,117 +1,77 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Mima.Assembler.Parser.RawInstruction module Mima.Assembler.Parser.RawInstruction
( RawInstruction ( RawInstruction(..)
, rawInstruction , rawInstruction
, rawInstructionToWord
) where ) where
import Data.Bits import qualified Data.Text as T
import Text.Megaparsec import Text.Megaparsec
import qualified Text.Megaparsec.Char as C import qualified Text.Megaparsec.Char as C
import Mima.Assembler.Parser.Basic import Mima.Assembler.Parser.Basic
import Mima.Assembler.Parser.Label import Mima.Assembler.Parser.Label
import Mima.Instruction
import Mima.Word import Mima.Word
data RawInstruction addr data RawInstruction addr
= RawLIT MimaWord = RawLIT MimaWord
| RawLDC addr | RawSmallInstruction SmallOpcode addr
| RawLDV addr | RawLargeInstruction LargeOpcode SmallValue
| RawSTV addr
| RawADD addr
| RawAND addr
| RawOR addr
| RawXOR addr
| RawEQL addr
| RawJMP addr
| RawJMN addr
| RawLDIV addr
| RawSTIV addr
| RawCALL addr
| RawLDVR addr
| RawSTVR addr
| RawHALT SmallValue
| RawNOT SmallValue
| RawRAR SmallValue
| RawRET SmallValue
| RawLDRA SmallValue
| RawSTRA SmallValue
| RawLDSP SmallValue
| RawSTSP SmallValue
| RawLDFP SmallValue
| RawSTFP SmallValue
| RawADC SmallValue
deriving (Show) deriving (Show)
parseByLiteral :: [(T.Text, b)] -> Parser b
parseByLiteral = foldl (<|>) empty . map (\(a, b) -> b <$ C.string' a)
smallOpcode' :: Parser SmallOpcode
smallOpcode' = parseByLiteral
[ ( "LDC", LDC)
, ( "LDV", LDV)
, ( "STV", STV)
, ( "ADD", ADD)
, ( "AND", AND)
, ( "OR", OR)
, ( "XOR", XOR)
, ( "EQL", EQL)
, ( "JMP", JMP)
, ( "JMN", JMN)
, ("LDIV", LDIV)
, ("STIV", STIV)
, ("CALL", CALL)
, ("LDVR", LDVR)
, ("STVR", STVR)
]
largeOpcode' :: Parser LargeOpcode
largeOpcode' = parseByLiteral [( "ADC", ADC)]
largeOptionalOpcode' :: Parser LargeOpcode
largeOptionalOpcode' = parseByLiteral
[ ("HALT", HALT)
, ( "NOT", NOT)
, ( "RAR", RAR)
, ( "RET", RET)
, ("LDRA", LDRA)
, ("STRA", STRA)
, ("LDSP", LDSP)
, ("STSP", STSP)
, ("LDFP", LDFP)
, ("STFP", STFP)
]
rawInstruction :: Parser (RawInstruction Address) rawInstruction :: Parser (RawInstruction Address)
rawInstruction rawInstruction = label "instruction" $
= label "instruction" (RawLIT <$> (C.string' "LIT" *> instr mimaWord))
$ RawLIT <$> instr "LIT" mimaWord <|> (RawSmallInstruction <$> smallOpcode' <*> instr address)
<|> RawLDC <$> instr "LDC" address <|> (RawLargeInstruction <$> largeOpcode' <*> instr smallValue)
<|> RawLDV <$> instr "LDV" address <|> (RawLargeInstruction <$> largeOptionalOpcode' <*> instr' smallValue)
<|> RawSTV <$> instr "STV" address
<|> RawADD <$> instr "ADD" address
<|> RawAND <$> instr "AND" address
<|> RawOR <$> instr "OR" address
<|> RawXOR <$> instr "XOR" address
<|> RawEQL <$> instr "EQL" address
<|> RawJMP <$> instr "JMP" address
<|> RawJMN <$> instr "JMN" address
<|> RawLDIV <$> instr "LDIV" address
<|> RawSTIV <$> instr "STIV" address
<|> RawCALL <$> instr "CALL" address
<|> RawLDVR <$> instr "LDVR" address
<|> RawSTVR <$> instr "STVR" address
<|> RawHALT <$> instr' "HALT" smallValue
<|> RawNOT <$> instr' "NOT" smallValue
<|> RawRAR <$> instr' "RAR" smallValue
<|> RawRET <$> instr' "RET" smallValue
<|> RawLDRA <$> instr' "LDRA" smallValue
<|> RawSTRA <$> instr' "STRA" smallValue
<|> RawLDSP <$> instr' "LDSP" smallValue
<|> RawSTSP <$> instr' "STSP" smallValue
<|> RawLDFP <$> instr' "LDFP" smallValue
<|> RawSTFP <$> instr' "STFP" smallValue
<|> RawADC <$> instr "ADC" smallValue
where where
instr name value = lexeme (C.string' name >> whitespace) >> space *> value -- These assume that the parser is a lexeme
instr' name value = lexeme (C.string' name >> whitespace) *> (value <|> pure zeroBits) instr parser = lexeme whitespace *> parser
instr' parser = try (instr parser) <|> pure 0
{- rawInstructionToWord :: RawInstruction MimaAddress -> MimaWord
data InstrState = InstrState rawInstructionToWord (RawLIT word) = word
{ isCurPos :: MimaAddress rawInstructionToWord (RawSmallInstruction so lv) = instructionToWord (SmallInstruction so lv)
, isPrevLabels :: Set.Set Label rawInstructionToWord (RawLargeInstruction lo sv) = instructionToWord (LargeInstruction lo sv)
, isNewPos :: Maybe MimaAddress
, isLabels :: Set.Set Label
} deriving (Show)
instrState :: MimaAddress -> Set.Set Label -> InstrState
instrState curPos prevLabels = InstrState curPos prevLabels Nothing Set.empty
parseLabel :: StatefulParser InstrState ()
parseLabel = do
name <- lift labelName
is <- get
let prevLabels = isPrevLabels is
labels = isLabels is
if name `Set.member` prevLabels || name `Set.member` labels
then fail "label can't be specified more than once"
else do
void $ lift $ lexeme $ C.string ":"
put is{isLabels = Set.insert name labels}
parseLocationLabel :: StatefulParser InstrState ()
parseLocationLabel = do
newPos <- lift largeValue'
is <- get
case isNewPos is of
Just _ -> fail "cannot specify two positions for one instruction"
Nothing -> if newPos < isCurPos is
then fail "cannot set a position to an earlier position"
else do
void $ lift $ lexeme $ C.string ":"
put is{isNewPos = Just newPos}
parseInstruction :: StatefulParser InstrState RawInstruction
parseInstruction = try parseLabel <|> parseLocationLabel
-}