Split up phase 1 into multiple modules
This commit is contained in:
parent
8d0e70cf5d
commit
ff9b7a35c7
5 changed files with 470 additions and 449 deletions
|
|
@ -1,453 +1,10 @@
|
|||
{-# LANGUAGE DeriveFunctor #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Mima.Asm.Phase1
|
||||
(
|
||||
-- * Types
|
||||
Name(..)
|
||||
, Address(..)
|
||||
, Location(..)
|
||||
, SmallOpcode(..)
|
||||
, LargeOpcode(..)
|
||||
, MimaWord(..)
|
||||
, SmallValue(..)
|
||||
, Instruction(..)
|
||||
, RegisterDirective(..)
|
||||
, JsonValue(..)
|
||||
, Directive(..)
|
||||
, Span(..)
|
||||
, AsmToken(..)
|
||||
-- * Phase1
|
||||
( AsmToken
|
||||
, Phase1
|
||||
, parsePhase1
|
||||
, formatPhase1
|
||||
) where
|
||||
|
||||
import Control.Monad
|
||||
import qualified Data.Aeson as A
|
||||
import qualified Data.Aeson.Text as A
|
||||
import qualified Data.ByteString.Lazy as BS
|
||||
import Data.Char
|
||||
import Data.Foldable
|
||||
import Data.Maybe
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import Data.Void
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.Char
|
||||
import Text.Megaparsec.Char.Lexer hiding (space)
|
||||
|
||||
import Mima.Asm.Types
|
||||
import Mima.Format
|
||||
import qualified Mima.Vm.Instruction as Vm
|
||||
|
||||
{-
|
||||
<value> := <word> | <address>
|
||||
<address> := <absolute address> | <relative address> | <label>
|
||||
<name> := [a-z]([a-z0-9_]*)
|
||||
<label> := <name>:
|
||||
<instruction> := as usual
|
||||
|
||||
.reg ACC <value>
|
||||
.reg <other register> <address>
|
||||
|
||||
.org (<absolute address> | <positive relative address> | <negative address>) ???
|
||||
.lit <value>
|
||||
.arr [<value>]
|
||||
|
||||
.meta <name> <json value>
|
||||
.meta-start <name> [<json value>]
|
||||
.meta-stop <name>
|
||||
-}
|
||||
|
||||
{- Types -}
|
||||
|
||||
data Name a = Name a T.Text
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Name where
|
||||
peel (Name a _) = a
|
||||
|
||||
data Address a
|
||||
= AddressAbsolute a Integer
|
||||
| AddressRelative a Integer
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Address where
|
||||
peel (AddressAbsolute a _) = a
|
||||
peel (AddressRelative a _) = a
|
||||
|
||||
data Location a
|
||||
= LocationAddress (Address a)
|
||||
| LocationLabel (Name a)
|
||||
| LocationLabelRel a (Name a) a Integer
|
||||
-- ^ @Lo LocationLabelRel completeSpan name offsetSpan offset@
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Location where
|
||||
peel (LocationAddress a) = peel a
|
||||
peel (LocationLabel a) = peel a
|
||||
peel (LocationLabelRel a _ _ _) = a
|
||||
|
||||
data SmallOpcode a = SmallOpcode a Vm.SmallOpcode
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion SmallOpcode where
|
||||
peel (SmallOpcode a _) = a
|
||||
|
||||
data LargeOpcode a = LargeOpcode a Vm.LargeOpcode
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion LargeOpcode where
|
||||
peel (LargeOpcode a _) = a
|
||||
|
||||
data MimaWord a
|
||||
= WordRaw a Integer
|
||||
| WordLocation (Location a)
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion MimaWord where
|
||||
peel (WordRaw a _) = a
|
||||
peel (WordLocation a) = peel a
|
||||
|
||||
data SmallValue a = SmallValue a Integer
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion SmallValue where
|
||||
peel (SmallValue a _) = a
|
||||
|
||||
data Instruction a
|
||||
= SmallInstruction a (SmallOpcode a) (Location a)
|
||||
| LargeInstruction a (LargeOpcode a) (Maybe (SmallValue a))
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Instruction where
|
||||
peel (SmallInstruction a _ _) = a
|
||||
peel (LargeInstruction a _ _) = a
|
||||
|
||||
-- | The first @a@ parameter represents the span of the whole thing. The second
|
||||
-- @a@ parameter represents the span of the directive literal (e. g. @.org@).
|
||||
data RegisterDirective a
|
||||
= RegIar a a (Location a)
|
||||
| RegAcc a a (MimaWord a)
|
||||
| RegRa a a (Location a)
|
||||
| RegSp a a (Location a)
|
||||
| RegFp a a (Location a)
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion RegisterDirective where
|
||||
peel (RegIar a _ _) = a
|
||||
peel (RegAcc a _ _) = a
|
||||
peel (RegRa a _ _) = a
|
||||
peel (RegSp a _ _) = a
|
||||
peel (RegFp a _ _) = a
|
||||
|
||||
data JsonValue a = JsonValue a A.Value
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion JsonValue where
|
||||
peel (JsonValue a _) = a
|
||||
|
||||
-- | The first @a@ parameter represents the span of the whole thing. The second
|
||||
-- @a@ parameter represents the span of the directive literal (e. g. @.org@).
|
||||
data Directive a
|
||||
= Reg a a (RegisterDirective a)
|
||||
| Org a a (Address a)
|
||||
| Lit a a (MimaWord a)
|
||||
| Arr a a [MimaWord a]
|
||||
| Meta a a (Name a) (JsonValue a)
|
||||
| MetaStart a a (Name a) (JsonValue a)
|
||||
| MetaStop a a (Name a)
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Directive where
|
||||
peel (Reg a _ _) = a
|
||||
peel (Org a _ _) = a
|
||||
peel (Lit a _ _) = a
|
||||
peel (Arr a _ _) = a
|
||||
peel (Meta a _ _ _) = a
|
||||
peel (MetaStart a _ _ _) = a
|
||||
peel (MetaStop a _ _) = a
|
||||
|
||||
data AsmToken a
|
||||
= TokenLabel (Name a)
|
||||
| TokenInstruction (Instruction a)
|
||||
| TokenDirective (Directive a)
|
||||
| TokenComment a T.Text Bool
|
||||
-- ^ @'TokenComment' a text inline@ represents a comment.
|
||||
-- @inline@ is true if the comment is on the same line as an instruction or a label.
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion AsmToken where
|
||||
peel (TokenLabel a) = peel a
|
||||
peel (TokenInstruction a) = peel a
|
||||
peel (TokenDirective a) = peel a
|
||||
peel (TokenComment a _ _) = a
|
||||
|
||||
data Span = Span SourcePos SourcePos
|
||||
|
||||
instance Show Span where
|
||||
show (Span start end) = "<" ++ showPos start ++ "-" ++ showPos end ++ ">"
|
||||
where
|
||||
showPos pos =
|
||||
show (unPos $ sourceLine pos) ++ ":" ++ show (unPos $ sourceColumn pos)
|
||||
|
||||
type Phase1 s = [AsmToken s]
|
||||
|
||||
{- Parsing -}
|
||||
|
||||
type Parser = Parsec Void T.Text
|
||||
|
||||
inlineSpace :: Parser ()
|
||||
inlineSpace = void $ takeWhileP (Just "space (no newline)") (\x -> isSpace x && x /= '\n')
|
||||
|
||||
inlineSpace1 :: Parser ()
|
||||
inlineSpace1 = void $ takeWhile1P (Just "space (no newline)") (\x -> isSpace x && x /= '\n')
|
||||
|
||||
withSpan :: Parser a -> Parser (Span, a)
|
||||
withSpan f = do
|
||||
start <- getSourcePos
|
||||
result <- f
|
||||
stop <- getSourcePos
|
||||
pure (Span start stop, result)
|
||||
|
||||
name :: Parser (Name Span)
|
||||
name = fmap (uncurry Name) $ withSpan $ do
|
||||
firstChar <- satisfy isLower <?> "lowercase character"
|
||||
otherChars <- takeWhileP (Just "alphanumeric character or '_'") (\c -> isAlphaNum c || c == '_')
|
||||
pure $ T.pack [firstChar] <> otherChars
|
||||
|
||||
number :: (Num a) => Parser a
|
||||
number =
|
||||
(chunk "0b" *> binary) <|>
|
||||
(chunk "0o" *> octal) <|>
|
||||
(chunk "0x" *> hexadecimal) <|>
|
||||
decimal
|
||||
|
||||
signedNumber :: (Num a) => Parser a
|
||||
signedNumber = signed (pure ()) number -- do not allow any space
|
||||
|
||||
address :: Parser (Address Span)
|
||||
address =
|
||||
fmap (uncurry AddressRelative) (withSpan $ between (char '[') (char ']') signedNumber) <|>
|
||||
fmap (uncurry AddressAbsolute) (withSpan signedNumber)
|
||||
|
||||
labelWithOffset :: Parser (Location Span)
|
||||
labelWithOffset = do
|
||||
(completeSpan, (n, offsetSpan, offset)) <- withSpan $ do
|
||||
n <- name
|
||||
(offsetSpan, offset) <- withSpan $ between (char '[') (char ']') signedNumber
|
||||
pure (n, offsetSpan, offset)
|
||||
|
||||
pure $ LocationLabelRel completeSpan n offsetSpan offset
|
||||
|
||||
location :: Parser (Location Span)
|
||||
location =
|
||||
(LocationAddress <$> address) <|> try labelWithOffset <|> (LocationLabel <$> name)
|
||||
|
||||
smallOpcode :: Parser (SmallOpcode Span)
|
||||
smallOpcode = asum $ map parseOpcode [minBound..maxBound]
|
||||
where
|
||||
parseOpcode o = do
|
||||
(s, _) <- withSpan $ chunk $ T.pack $ show o
|
||||
pure $ SmallOpcode s o
|
||||
|
||||
largeOpcode :: Parser (LargeOpcode Span)
|
||||
largeOpcode = asum $ map parseOpcode [minBound..maxBound]
|
||||
where
|
||||
parseOpcode o = do
|
||||
(s, _) <- withSpan $ chunk $ T.pack $ show o
|
||||
pure $ LargeOpcode s o
|
||||
|
||||
mimaWord :: Parser (MimaWord Span)
|
||||
mimaWord =
|
||||
(uncurry WordRaw <$> withSpan signedNumber) <|> (WordLocation <$> location)
|
||||
|
||||
smallValue :: Parser (SmallValue Span)
|
||||
smallValue = uncurry SmallValue <$> withSpan signedNumber
|
||||
|
||||
instruction :: Parser (Instruction Span)
|
||||
instruction = small <|> large
|
||||
where
|
||||
small = do
|
||||
start <- getSourcePos
|
||||
so <- smallOpcode
|
||||
inlineSpace1
|
||||
loc <- location
|
||||
stop <- getSourcePos
|
||||
pure $ SmallInstruction (Span start stop) so loc
|
||||
large = do
|
||||
start <- getSourcePos
|
||||
lo <- largeOpcode
|
||||
sv <- optionalAwareArgument lo
|
||||
stop <- getSourcePos
|
||||
pure $ LargeInstruction (Span start stop) lo sv
|
||||
optionalAwareArgument (LargeOpcode _ code)
|
||||
| Vm.argumentIsOptional code = optional (inlineSpace1 *> smallValue <?> "argument")
|
||||
| otherwise = Just <$> (inlineSpace1 *> smallValue <?> "argument")
|
||||
|
||||
singleDirective
|
||||
:: (Span -> Span -> a -> b Span)
|
||||
-> T.Text
|
||||
-> Parser a
|
||||
-> Parser (b Span)
|
||||
singleDirective f t p = do
|
||||
(outerSpan, (nameSpan, a)) <- withSpan $ do
|
||||
(nameSpan, _) <- withSpan $ chunk t
|
||||
inlineSpace1
|
||||
a <- p
|
||||
pure (nameSpan, a)
|
||||
pure $ f outerSpan nameSpan a
|
||||
|
||||
registerDirective :: Parser (RegisterDirective Span)
|
||||
registerDirective =
|
||||
singleDirective RegIar "IAR" location <|>
|
||||
singleDirective RegAcc "ACC" mimaWord <|>
|
||||
singleDirective RegRa "RA" location <|>
|
||||
singleDirective RegSp "SP" location <|>
|
||||
singleDirective RegFp "FP" location
|
||||
|
||||
directive :: Parser (Directive Span)
|
||||
directive =
|
||||
singleDirective Reg ".reg" registerDirective <|>
|
||||
singleDirective Org ".org" address <|>
|
||||
singleDirective Lit ".lit" mimaWord <|>
|
||||
arr <|>
|
||||
metaStart MetaStart ".meta-start" <|>
|
||||
singleDirective MetaStop ".meta-stop" name <|>
|
||||
metaStart Meta ".meta"
|
||||
where
|
||||
arr = do
|
||||
(outerSpan, (regSpan, mimaWords)) <- withSpan $ do
|
||||
(dirSpan, _) <- withSpan $ chunk ".arr"
|
||||
inlineSpace1
|
||||
mimaWords <- between (char '[') (char ']') $
|
||||
sepBy1 mimaWord (char ',' *> inlineSpace)
|
||||
pure (dirSpan, mimaWords)
|
||||
pure $ Arr outerSpan regSpan mimaWords
|
||||
|
||||
metaStart f keyword = do
|
||||
(outerSpan, (regSpan, metaName, jsonValue)) <- withSpan $ do
|
||||
(dirSpan, _) <- withSpan $ chunk keyword
|
||||
inlineSpace1
|
||||
metaName <- name
|
||||
inlineSpace1
|
||||
|
||||
(valueSpan, rawJsonValue) <- withSpan $ do
|
||||
metaValueBS <- BS.fromStrict . T.encodeUtf8
|
||||
<$> takeWhile1P (Just "json value") (/= '\n')
|
||||
case A.eitherDecode metaValueBS of
|
||||
Left msg -> fail msg
|
||||
Right value -> pure value
|
||||
|
||||
pure (dirSpan, metaName, JsonValue valueSpan rawJsonValue)
|
||||
pure $ f outerSpan regSpan metaName jsonValue
|
||||
|
||||
comment :: Bool -> Parser (AsmToken Span)
|
||||
comment inline = fmap (\(s, text) -> TokenComment s text inline) $ withSpan $
|
||||
char ';' *> takeWhileP (Just "comment") (/= '\n')
|
||||
|
||||
asmToken :: Parser (AsmToken Span)
|
||||
asmToken = (TokenInstruction <$> instruction) <|> (TokenDirective <$> directive)
|
||||
|
||||
labels :: Parser [AsmToken Span]
|
||||
labels = do
|
||||
start <- optional label_
|
||||
case start of
|
||||
Nothing -> pure []
|
||||
Just l -> do
|
||||
ls <- many $ try $ inlineSpace1 *> label_
|
||||
pure $ l : ls
|
||||
where
|
||||
label_ = (TokenLabel <$> name) <* char ':'
|
||||
|
||||
-- | Parses a single line consisting of zero or more tokens:
|
||||
-- inlineSpace, zero or more labels, zero or more instructions/directives,
|
||||
-- zero or more comments and a final newline or EOF.
|
||||
lineParser :: Parser [AsmToken Span]
|
||||
lineParser = do
|
||||
inlineSpace
|
||||
ls <- labels
|
||||
t <- optional $ try $ do
|
||||
unless (null ls) inlineSpace1
|
||||
asmToken
|
||||
c <- optional $ try $ do
|
||||
let aloneOnLine = null ls && isNothing t
|
||||
unless aloneOnLine inlineSpace1
|
||||
comment $ not aloneOnLine
|
||||
pure $ ls ++ toList t ++ toList c
|
||||
|
||||
parsePhase1 :: Parser (Phase1 Span)
|
||||
parsePhase1 = mconcat <$> sepBy lineParser newline <* eof
|
||||
|
||||
{- Formatting -}
|
||||
|
||||
formatName :: Name a -> T.Text
|
||||
formatName (Name _ text) = text
|
||||
|
||||
formatAddress :: Address a -> T.Text
|
||||
formatAddress (AddressAbsolute _ addr) = toDec addr
|
||||
formatAddress (AddressRelative _ rel)
|
||||
| rel >= 0 = T.pack $ "+" ++ show rel
|
||||
| otherwise = T.pack $ show rel
|
||||
|
||||
formatLocation :: Location a -> T.Text
|
||||
formatLocation (LocationAddress addr) = formatAddress addr
|
||||
formatLocation (LocationLabel l) = formatName l
|
||||
formatLocation (LocationLabelRel _ l _ offset)
|
||||
= formatName l <> "[" <> toDec offset <> "]"
|
||||
|
||||
formatSmallOpcode :: SmallOpcode a -> T.Text
|
||||
formatSmallOpcode (SmallOpcode _ opcode) = T.pack $ show opcode
|
||||
|
||||
formatLargeOpcode :: LargeOpcode a -> T.Text
|
||||
formatLargeOpcode (LargeOpcode _ opcode) = T.pack $ show opcode
|
||||
|
||||
formatMimaWord :: MimaWord a -> T.Text
|
||||
formatMimaWord (WordRaw _ word) = toDec word
|
||||
formatMimaWord (WordLocation loc) = formatLocation loc
|
||||
|
||||
formatSmallValue :: SmallValue a -> T.Text
|
||||
formatSmallValue (SmallValue _ val) = toDec val
|
||||
|
||||
formatInstruction :: Instruction a -> T.Text
|
||||
formatInstruction (SmallInstruction _ opcode loc) =
|
||||
formatSmallOpcode opcode <> " " <> formatLocation loc
|
||||
formatInstruction (LargeInstruction _ opcode Nothing) =
|
||||
formatLargeOpcode opcode
|
||||
formatInstruction (LargeInstruction _ opcode (Just val)) =
|
||||
formatLargeOpcode opcode <> " " <> formatSmallValue val
|
||||
|
||||
formatRegisterDirective :: RegisterDirective a -> T.Text
|
||||
formatRegisterDirective (RegIar _ _ loc) = "IAR " <> formatLocation loc
|
||||
formatRegisterDirective (RegAcc _ _ word) = "ACC " <> formatMimaWord word
|
||||
formatRegisterDirective (RegRa _ _ loc) = "RA " <> formatLocation loc
|
||||
formatRegisterDirective (RegSp _ _ loc) = "SP " <> formatLocation loc
|
||||
formatRegisterDirective (RegFp _ _ loc) = "FP " <> formatLocation loc
|
||||
|
||||
formatJsonValue :: JsonValue a -> T.Text
|
||||
formatJsonValue (JsonValue _ val) = TL.toStrict $ A.encodeToLazyText val
|
||||
|
||||
formatDirective :: Directive a -> T.Text
|
||||
formatDirective (Reg _ _ regDir) = ".reg " <> formatRegisterDirective regDir
|
||||
formatDirective (Org _ _ addr) = ".org " <> formatAddress addr
|
||||
formatDirective (Lit _ _ val) = ".lit " <> formatMimaWord val
|
||||
formatDirective (Arr _ _ vals) =
|
||||
".arr [" <> T.intercalate ", " (map formatMimaWord vals) <> "]"
|
||||
formatDirective (Meta _ _ n val) =
|
||||
".meta " <> formatName n <> " " <> formatJsonValue val
|
||||
formatDirective (MetaStart _ _ n val) =
|
||||
".meta-start " <> formatName n <> " " <> formatJsonValue val
|
||||
formatDirective (MetaStop _ _ n) = ".meta-stop " <> formatName n
|
||||
|
||||
formatToken :: AsmToken a -> T.Text
|
||||
formatToken (TokenLabel n) = formatName n <> ":"
|
||||
formatToken (TokenInstruction ins) = " " <> formatInstruction ins
|
||||
formatToken (TokenDirective dir) = formatDirective dir
|
||||
formatToken (TokenComment _ text _) = ";" <> text
|
||||
|
||||
formatPhase1 :: Phase1 a -> T.Text
|
||||
formatPhase1 (x:y@(TokenComment _ _ True):xs) = formatToken x <> " " <> formatPhase1 (y:xs)
|
||||
formatPhase1 (x:xs) = formatToken x <> "\n" <> formatPhase1 xs
|
||||
formatPhase1 [] = ""
|
||||
import Mima.Asm.Phase1.Format
|
||||
import Mima.Asm.Phase1.Parse
|
||||
import Mima.Asm.Phase1.Types
|
||||
|
|
|
|||
81
src/Mima/Asm/Phase1/Format.hs
Normal file
81
src/Mima/Asm/Phase1/Format.hs
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Mima.Asm.Phase1.Format
|
||||
( formatPhase1
|
||||
) where
|
||||
|
||||
import qualified Data.Aeson.Text as A
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
|
||||
import Mima.Asm.Phase1.Types
|
||||
import Mima.Format
|
||||
|
||||
formatName :: Name a -> T.Text
|
||||
formatName (Name _ text) = text
|
||||
|
||||
formatAddress :: Address a -> T.Text
|
||||
formatAddress (AddressAbsolute _ addr) = toDec addr
|
||||
formatAddress (AddressRelative _ rel)
|
||||
| rel >= 0 = T.pack $ "+" ++ show rel
|
||||
| otherwise = T.pack $ show rel
|
||||
|
||||
formatLocation :: Location a -> T.Text
|
||||
formatLocation (LocationAddress addr) = formatAddress addr
|
||||
formatLocation (LocationLabel l) = formatName l
|
||||
formatLocation (LocationLabelRel _ l _ offset)
|
||||
= formatName l <> "[" <> toDec offset <> "]"
|
||||
|
||||
formatSmallOpcode :: SmallOpcode a -> T.Text
|
||||
formatSmallOpcode (SmallOpcode _ opcode) = T.pack $ show opcode
|
||||
|
||||
formatLargeOpcode :: LargeOpcode a -> T.Text
|
||||
formatLargeOpcode (LargeOpcode _ opcode) = T.pack $ show opcode
|
||||
|
||||
formatMimaWord :: MimaWord a -> T.Text
|
||||
formatMimaWord (WordRaw _ word) = toDec word
|
||||
formatMimaWord (WordLocation loc) = formatLocation loc
|
||||
|
||||
formatSmallValue :: SmallValue a -> T.Text
|
||||
formatSmallValue (SmallValue _ val) = toDec val
|
||||
|
||||
formatInstruction :: Instruction a -> T.Text
|
||||
formatInstruction (SmallInstruction _ opcode loc) =
|
||||
formatSmallOpcode opcode <> " " <> formatLocation loc
|
||||
formatInstruction (LargeInstruction _ opcode Nothing) =
|
||||
formatLargeOpcode opcode
|
||||
formatInstruction (LargeInstruction _ opcode (Just val)) =
|
||||
formatLargeOpcode opcode <> " " <> formatSmallValue val
|
||||
|
||||
formatRegisterDirective :: RegisterDirective a -> T.Text
|
||||
formatRegisterDirective (RegIar _ _ loc) = "IAR " <> formatLocation loc
|
||||
formatRegisterDirective (RegAcc _ _ word) = "ACC " <> formatMimaWord word
|
||||
formatRegisterDirective (RegRa _ _ loc) = "RA " <> formatLocation loc
|
||||
formatRegisterDirective (RegSp _ _ loc) = "SP " <> formatLocation loc
|
||||
formatRegisterDirective (RegFp _ _ loc) = "FP " <> formatLocation loc
|
||||
|
||||
formatJsonValue :: JsonValue a -> T.Text
|
||||
formatJsonValue (JsonValue _ val) = TL.toStrict $ A.encodeToLazyText val
|
||||
|
||||
formatDirective :: Directive a -> T.Text
|
||||
formatDirective (Reg _ _ regDir) = ".reg " <> formatRegisterDirective regDir
|
||||
formatDirective (Org _ _ addr) = ".org " <> formatAddress addr
|
||||
formatDirective (Lit _ _ val) = ".lit " <> formatMimaWord val
|
||||
formatDirective (Arr _ _ vals) =
|
||||
".arr [" <> T.intercalate ", " (map formatMimaWord vals) <> "]"
|
||||
formatDirective (Meta _ _ n val) =
|
||||
".meta " <> formatName n <> " " <> formatJsonValue val
|
||||
formatDirective (MetaStart _ _ n val) =
|
||||
".meta-start " <> formatName n <> " " <> formatJsonValue val
|
||||
formatDirective (MetaStop _ _ n) = ".meta-stop " <> formatName n
|
||||
|
||||
formatToken :: AsmToken a -> T.Text
|
||||
formatToken (TokenLabel n) = formatName n <> ":"
|
||||
formatToken (TokenInstruction ins) = " " <> formatInstruction ins
|
||||
formatToken (TokenDirective dir) = formatDirective dir
|
||||
formatToken (TokenComment _ text _) = ";" <> text
|
||||
|
||||
formatPhase1 :: Phase1 a -> T.Text
|
||||
formatPhase1 (x:y@(TokenComment _ _ True):xs) = formatToken x <> " " <> formatPhase1 (y:xs)
|
||||
formatPhase1 (x:xs) = formatToken x <> "\n" <> formatPhase1 xs
|
||||
formatPhase1 [] = ""
|
||||
215
src/Mima/Asm/Phase1/Parse.hs
Normal file
215
src/Mima/Asm/Phase1/Parse.hs
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Mima.Asm.Phase1.Parse
|
||||
( Span(..)
|
||||
, Parser
|
||||
, parsePhase1
|
||||
) where
|
||||
|
||||
import Control.Monad
|
||||
import qualified Data.Aeson as A
|
||||
import qualified Data.ByteString.Lazy as BS
|
||||
import Data.Char
|
||||
import Data.Foldable
|
||||
import Data.Maybe
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import Data.Void
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.Char
|
||||
import Text.Megaparsec.Char.Lexer hiding (space)
|
||||
|
||||
import Mima.Asm.Phase1.Types
|
||||
import qualified Mima.Vm.Instruction as Vm
|
||||
|
||||
data Span = Span SourcePos SourcePos
|
||||
|
||||
instance Show Span where
|
||||
show (Span start end) = "<" ++ showPos start ++ "-" ++ showPos end ++ ">"
|
||||
where
|
||||
showPos pos =
|
||||
show (unPos $ sourceLine pos) ++ ":" ++ show (unPos $ sourceColumn pos)
|
||||
|
||||
type Parser = Parsec Void T.Text
|
||||
|
||||
inlineSpace :: Parser ()
|
||||
inlineSpace = void $ takeWhileP (Just "space (no newline)") (\x -> isSpace x && x /= '\n')
|
||||
|
||||
inlineSpace1 :: Parser ()
|
||||
inlineSpace1 = void $ takeWhile1P (Just "space (no newline)") (\x -> isSpace x && x /= '\n')
|
||||
|
||||
withSpan :: Parser a -> Parser (Span, a)
|
||||
withSpan f = do
|
||||
start <- getSourcePos
|
||||
result <- f
|
||||
stop <- getSourcePos
|
||||
pure (Span start stop, result)
|
||||
|
||||
name :: Parser (Name Span)
|
||||
name = fmap (uncurry Name) $ withSpan $ do
|
||||
firstChar <- satisfy isLower <?> "lowercase character"
|
||||
otherChars <- takeWhileP (Just "alphanumeric character or '_'") (\c -> isAlphaNum c || c == '_')
|
||||
pure $ T.pack [firstChar] <> otherChars
|
||||
|
||||
number :: (Num a) => Parser a
|
||||
number =
|
||||
(chunk "0b" *> binary) <|>
|
||||
(chunk "0o" *> octal) <|>
|
||||
(chunk "0x" *> hexadecimal) <|>
|
||||
decimal
|
||||
|
||||
signedNumber :: (Num a) => Parser a
|
||||
signedNumber = signed (pure ()) number -- do not allow any space
|
||||
|
||||
address :: Parser (Address Span)
|
||||
address =
|
||||
fmap (uncurry AddressRelative) (withSpan $ between (char '[') (char ']') signedNumber) <|>
|
||||
fmap (uncurry AddressAbsolute) (withSpan signedNumber)
|
||||
|
||||
labelWithOffset :: Parser (Location Span)
|
||||
labelWithOffset = do
|
||||
(completeSpan, (n, offsetSpan, offset)) <- withSpan $ do
|
||||
n <- name
|
||||
(offsetSpan, offset) <- withSpan $ between (char '[') (char ']') signedNumber
|
||||
pure (n, offsetSpan, offset)
|
||||
|
||||
pure $ LocationLabelRel completeSpan n offsetSpan offset
|
||||
|
||||
location :: Parser (Location Span)
|
||||
location =
|
||||
(LocationAddress <$> address) <|> try labelWithOffset <|> (LocationLabel <$> name)
|
||||
|
||||
smallOpcode :: Parser (SmallOpcode Span)
|
||||
smallOpcode = asum $ map parseOpcode [minBound..maxBound]
|
||||
where
|
||||
parseOpcode o = do
|
||||
(s, _) <- withSpan $ chunk $ T.pack $ show o
|
||||
pure $ SmallOpcode s o
|
||||
|
||||
largeOpcode :: Parser (LargeOpcode Span)
|
||||
largeOpcode = asum $ map parseOpcode [minBound..maxBound]
|
||||
where
|
||||
parseOpcode o = do
|
||||
(s, _) <- withSpan $ chunk $ T.pack $ show o
|
||||
pure $ LargeOpcode s o
|
||||
|
||||
mimaWord :: Parser (MimaWord Span)
|
||||
mimaWord =
|
||||
(uncurry WordRaw <$> withSpan signedNumber) <|> (WordLocation <$> location)
|
||||
|
||||
smallValue :: Parser (SmallValue Span)
|
||||
smallValue = uncurry SmallValue <$> withSpan signedNumber
|
||||
|
||||
instruction :: Parser (Instruction Span)
|
||||
instruction = small <|> large
|
||||
where
|
||||
small = do
|
||||
start <- getSourcePos
|
||||
so <- smallOpcode
|
||||
inlineSpace1
|
||||
loc <- location
|
||||
stop <- getSourcePos
|
||||
pure $ SmallInstruction (Span start stop) so loc
|
||||
large = do
|
||||
start <- getSourcePos
|
||||
lo <- largeOpcode
|
||||
sv <- optionalAwareArgument lo
|
||||
stop <- getSourcePos
|
||||
pure $ LargeInstruction (Span start stop) lo sv
|
||||
optionalAwareArgument (LargeOpcode _ code)
|
||||
| Vm.argumentIsOptional code = optional (inlineSpace1 *> smallValue <?> "argument")
|
||||
| otherwise = Just <$> (inlineSpace1 *> smallValue <?> "argument")
|
||||
|
||||
singleDirective
|
||||
:: (Span -> Span -> a -> b Span)
|
||||
-> T.Text
|
||||
-> Parser a
|
||||
-> Parser (b Span)
|
||||
singleDirective f t p = do
|
||||
(outerSpan, (nameSpan, a)) <- withSpan $ do
|
||||
(nameSpan, _) <- withSpan $ chunk t
|
||||
inlineSpace1
|
||||
a <- p
|
||||
pure (nameSpan, a)
|
||||
pure $ f outerSpan nameSpan a
|
||||
|
||||
registerDirective :: Parser (RegisterDirective Span)
|
||||
registerDirective =
|
||||
singleDirective RegIar "IAR" location <|>
|
||||
singleDirective RegAcc "ACC" mimaWord <|>
|
||||
singleDirective RegRa "RA" location <|>
|
||||
singleDirective RegSp "SP" location <|>
|
||||
singleDirective RegFp "FP" location
|
||||
|
||||
directive :: Parser (Directive Span)
|
||||
directive =
|
||||
singleDirective Reg ".reg" registerDirective <|>
|
||||
singleDirective Org ".org" address <|>
|
||||
singleDirective Lit ".lit" mimaWord <|>
|
||||
arr <|>
|
||||
metaStart MetaStart ".meta-start" <|>
|
||||
singleDirective MetaStop ".meta-stop" name <|>
|
||||
metaStart Meta ".meta"
|
||||
where
|
||||
arr = do
|
||||
(outerSpan, (regSpan, mimaWords)) <- withSpan $ do
|
||||
(dirSpan, _) <- withSpan $ chunk ".arr"
|
||||
inlineSpace1
|
||||
mimaWords <- between (char '[') (char ']') $
|
||||
sepBy1 mimaWord (char ',' *> inlineSpace)
|
||||
pure (dirSpan, mimaWords)
|
||||
pure $ Arr outerSpan regSpan mimaWords
|
||||
|
||||
metaStart f keyword = do
|
||||
(outerSpan, (regSpan, metaName, jsonValue)) <- withSpan $ do
|
||||
(dirSpan, _) <- withSpan $ chunk keyword
|
||||
inlineSpace1
|
||||
metaName <- name
|
||||
inlineSpace1
|
||||
|
||||
(valueSpan, rawJsonValue) <- withSpan $ do
|
||||
metaValueBS <- BS.fromStrict . T.encodeUtf8
|
||||
<$> takeWhile1P (Just "json value") (/= '\n')
|
||||
case A.eitherDecode metaValueBS of
|
||||
Left msg -> fail msg
|
||||
Right value -> pure value
|
||||
|
||||
pure (dirSpan, metaName, JsonValue valueSpan rawJsonValue)
|
||||
pure $ f outerSpan regSpan metaName jsonValue
|
||||
|
||||
comment :: Bool -> Parser (AsmToken Span)
|
||||
comment inline = fmap (\(s, text) -> TokenComment s text inline) $ withSpan $
|
||||
char ';' *> takeWhileP (Just "comment") (/= '\n')
|
||||
|
||||
asmToken :: Parser (AsmToken Span)
|
||||
asmToken = (TokenInstruction <$> instruction) <|> (TokenDirective <$> directive)
|
||||
|
||||
labels :: Parser [AsmToken Span]
|
||||
labels = do
|
||||
start <- optional label_
|
||||
case start of
|
||||
Nothing -> pure []
|
||||
Just l -> do
|
||||
ls <- many $ try $ inlineSpace1 *> label_
|
||||
pure $ l : ls
|
||||
where
|
||||
label_ = (TokenLabel <$> name) <* char ':'
|
||||
|
||||
-- | Parses a single line consisting of zero or more tokens:
|
||||
-- inlineSpace, zero or more labels, zero or more instructions/directives,
|
||||
-- zero or more comments and a final newline or EOF.
|
||||
lineParser :: Parser [AsmToken Span]
|
||||
lineParser = do
|
||||
inlineSpace
|
||||
ls <- labels
|
||||
t <- optional $ try $ do
|
||||
unless (null ls) inlineSpace1
|
||||
asmToken
|
||||
c <- optional $ try $ do
|
||||
let aloneOnLine = null ls && isNothing t
|
||||
unless aloneOnLine inlineSpace1
|
||||
comment $ not aloneOnLine
|
||||
pure $ ls ++ toList t ++ toList c
|
||||
|
||||
parsePhase1 :: Parser (Phase1 Span)
|
||||
parsePhase1 = mconcat <$> sepBy lineParser newline <* eof
|
||||
168
src/Mima/Asm/Phase1/Types.hs
Normal file
168
src/Mima/Asm/Phase1/Types.hs
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
{-# LANGUAGE DeriveFunctor #-}
|
||||
|
||||
module Mima.Asm.Phase1.Types
|
||||
( Name(..)
|
||||
, Address(..)
|
||||
, Location(..)
|
||||
, SmallOpcode(..)
|
||||
, LargeOpcode(..)
|
||||
, MimaWord(..)
|
||||
, SmallValue(..)
|
||||
, Instruction(..)
|
||||
, RegisterDirective(..)
|
||||
, JsonValue(..)
|
||||
, Directive(..)
|
||||
, AsmToken(..)
|
||||
, Phase1
|
||||
) where
|
||||
|
||||
import qualified Data.Aeson as A
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Mima.Asm.Types
|
||||
import qualified Mima.Vm.Instruction as Vm
|
||||
|
||||
{-
|
||||
<value> := <word> | <address>
|
||||
<address> := <absolute address> | <relative address> | <label>
|
||||
<name> := [a-z]([a-z0-9_]*)
|
||||
<label> := <name>:
|
||||
<instruction> := as usual
|
||||
|
||||
.reg ACC <value>
|
||||
.reg <other register> <address>
|
||||
|
||||
.org (<absolute address> | <positive relative address> | <negative address>) ???
|
||||
.lit <value>
|
||||
.arr [<value>]
|
||||
|
||||
.meta <name> <json value>
|
||||
.meta-start <name> [<json value>]
|
||||
.meta-stop <name>
|
||||
-}
|
||||
|
||||
{- Types -}
|
||||
|
||||
data Name a = Name a T.Text
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Name where
|
||||
peel (Name a _) = a
|
||||
|
||||
data Address a
|
||||
= AddressAbsolute a Integer
|
||||
| AddressRelative a Integer
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Address where
|
||||
peel (AddressAbsolute a _) = a
|
||||
peel (AddressRelative a _) = a
|
||||
|
||||
data Location a
|
||||
= LocationAddress (Address a)
|
||||
| LocationLabel (Name a)
|
||||
| LocationLabelRel a (Name a) a Integer
|
||||
-- ^ @Lo LocationLabelRel completeSpan name offsetSpan offset@
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Location where
|
||||
peel (LocationAddress a) = peel a
|
||||
peel (LocationLabel a) = peel a
|
||||
peel (LocationLabelRel a _ _ _) = a
|
||||
|
||||
data SmallOpcode a = SmallOpcode a Vm.SmallOpcode
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion SmallOpcode where
|
||||
peel (SmallOpcode a _) = a
|
||||
|
||||
data LargeOpcode a = LargeOpcode a Vm.LargeOpcode
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion LargeOpcode where
|
||||
peel (LargeOpcode a _) = a
|
||||
|
||||
data MimaWord a
|
||||
= WordRaw a Integer
|
||||
| WordLocation (Location a)
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion MimaWord where
|
||||
peel (WordRaw a _) = a
|
||||
peel (WordLocation a) = peel a
|
||||
|
||||
data SmallValue a = SmallValue a Integer
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion SmallValue where
|
||||
peel (SmallValue a _) = a
|
||||
|
||||
data Instruction a
|
||||
= SmallInstruction a (SmallOpcode a) (Location a)
|
||||
| LargeInstruction a (LargeOpcode a) (Maybe (SmallValue a))
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Instruction where
|
||||
peel (SmallInstruction a _ _) = a
|
||||
peel (LargeInstruction a _ _) = a
|
||||
|
||||
-- | The first @a@ parameter represents the span of the whole thing. The second
|
||||
-- @a@ parameter represents the span of the directive literal (e. g. @.org@).
|
||||
data RegisterDirective a
|
||||
= RegIar a a (Location a)
|
||||
| RegAcc a a (MimaWord a)
|
||||
| RegRa a a (Location a)
|
||||
| RegSp a a (Location a)
|
||||
| RegFp a a (Location a)
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion RegisterDirective where
|
||||
peel (RegIar a _ _) = a
|
||||
peel (RegAcc a _ _) = a
|
||||
peel (RegRa a _ _) = a
|
||||
peel (RegSp a _ _) = a
|
||||
peel (RegFp a _ _) = a
|
||||
|
||||
data JsonValue a = JsonValue a A.Value
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion JsonValue where
|
||||
peel (JsonValue a _) = a
|
||||
|
||||
-- | The first @a@ parameter represents the span of the whole thing. The second
|
||||
-- @a@ parameter represents the span of the directive literal (e. g. @.org@).
|
||||
data Directive a
|
||||
= Reg a a (RegisterDirective a)
|
||||
| Org a a (Address a)
|
||||
| Lit a a (MimaWord a)
|
||||
| Arr a a [MimaWord a]
|
||||
| Meta a a (Name a) (JsonValue a)
|
||||
| MetaStart a a (Name a) (JsonValue a)
|
||||
| MetaStop a a (Name a)
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion Directive where
|
||||
peel (Reg a _ _) = a
|
||||
peel (Org a _ _) = a
|
||||
peel (Lit a _ _) = a
|
||||
peel (Arr a _ _) = a
|
||||
peel (Meta a _ _ _) = a
|
||||
peel (MetaStart a _ _ _) = a
|
||||
peel (MetaStop a _ _) = a
|
||||
|
||||
data AsmToken a
|
||||
= TokenLabel (Name a)
|
||||
| TokenInstruction (Instruction a)
|
||||
| TokenDirective (Directive a)
|
||||
| TokenComment a T.Text Bool
|
||||
-- ^ @'TokenComment' a text inline@ represents a comment.
|
||||
-- @inline@ is true if the comment is on the same line as an instruction or a label.
|
||||
deriving (Show, Functor)
|
||||
|
||||
instance Onion AsmToken where
|
||||
peel (TokenLabel a) = peel a
|
||||
peel (TokenInstruction a) = peel a
|
||||
peel (TokenDirective a) = peel a
|
||||
peel (TokenComment a _ _) = a
|
||||
|
||||
type Phase1 s = [AsmToken s]
|
||||
|
|
@ -11,7 +11,7 @@ import Data.Foldable
|
|||
import qualified Data.Map.Strict as Map
|
||||
import qualified Data.Text as T
|
||||
|
||||
import qualified Mima.Asm.Phase1 as P1
|
||||
import qualified Mima.Asm.Phase1.Types as P1
|
||||
import Mima.Asm.Phase2.Types
|
||||
import Mima.Asm.Phase2.Util
|
||||
import Mima.Asm.Types
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue