{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.SCargot.Parse
( -- * Parsing
decode
, decodeOne
-- * Parsing Control
, SExprParser
, Reader
, Comment
, mkParser
, setCarrier
, addReader
, setComment
-- * Specific SExprParser Conversions
, asRich
, asWellFormed
, withQuote
) where
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*), pure)
#endif
import Control.Monad ((>=>))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Text (Text)
import Data.String (IsString)
import Text.Parsec ( (<|>)
, (<?>)
, char
, eof
, lookAhead
, many1
, runParser
, skipMany
)
import Text.Parsec.Char (anyChar, space)
import Text.Parsec.Text (Parser)
import Data.SCargot.Repr ( SExpr(..)
, RichSExpr
, WellFormedSExpr
, toRich
, toWellFormed
)
type ReaderMacroMap atom = Map Char (Reader atom)
-- | A 'Reader' represents a reader macro: it takes a parser for
-- the S-Expression type and performs as much or as little
-- parsing as it would like, and then returns an S-expression.
type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
-- | A 'Comment' represents any kind of skippable comment. This
-- parser __must__ be able to fail if a comment is not being
-- recognized, and it __must__ not consume any input in case
-- of failure.
type Comment = Parser ()
-- | A 'SExprParser' describes a parser for a particular value
-- that has been serialized as an s-expression. The @atom@ parameter
-- corresponds to a Haskell type used to represent the atoms,
-- and the @carrier@ parameter corresponds to the parsed S-Expression
-- structure.
data SExprParser atom carrier = SExprParser
{ sesPAtom :: Parser atom
, readerMap :: ReaderMacroMap atom
, comment :: Maybe Comment
, postparse :: SExpr atom -> Either String carrier
}
-- | Create a basic 'SExprParser' when given a parser
-- for an atom type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = mkParser (many1 alphaNum)
-- >>> decode parser "(ele phant)"
-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
mkParser :: Parser atom -> SExprParser atom (SExpr atom)
mkParser parser = SExprParser
{ sesPAtom = parser
, readerMap = M.empty
, comment = Nothing
, postparse = return
}
-- | Modify the carrier type for a 'SExprParser'. This is
-- used internally to convert between various 'SExpr' representations,
-- but could also be used externally to add an extra conversion layer
-- onto a 'SExprParser'.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> import Data.SCargot.Repr (toRich)
-- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
setCarrier f spec = spec { postparse = postparse spec >=> f }
-- | Convert the final output representation from the 'SExpr' type
-- to the 'RichSExpr' type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = asRich (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
asRich = setCarrier (return . toRich)
-- | Convert the final output representation from the 'SExpr' type
-- to the 'WellFormedSExpr' type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
asWellFormed = setCarrier toWellFormed
-- | Add the ability to execute some particular reader macro, as
-- defined by its initial character and the 'Parser' which returns
-- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
-- can be recursively called to parse more S-Expressions, and begins
-- parsing after the reader character has been removed from the
-- stream.
--
-- >>> import Text.Parsec (alphaNum, char, many1)
-- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
-- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
-- >>> decode parser "(an [ele phant])"
-- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]
addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
addReader c reader spec = spec
{ readerMap = M.insert c reader (readerMap spec) }
-- | Add the ability to ignore some kind of comment. This gets
-- factored into whitespace parsing, and it's very important that
-- the parser supplied __be able to fail__ (as otherwise it will
-- cause an infinite loop), and also that it __not consume any input__
-- (which may require it to be wrapped in 'try'.)
--
-- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
-- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
-- >>> let parser = setComment comment (mkParser (many1 alphaNum))
-- >>> decode parser "(ele //a comment\n phant)"
-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
setComment :: Comment -> SExprParser a c -> SExprParser a c
setComment c spec = spec { comment = Just (c <?> "comment") }
-- | Add the ability to understand a quoted S-Expression.
-- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
-- assumes that the underlying atom type implements the "IsString"
-- class, and will create the @quote@ atom using @fromString "quote"@.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = withQuote (mkParser (many1 alphaNum))
-- >>> decode parser "'elephant"
-- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
withQuote = addReader '\'' (fmap go)
where go s = SCons "quote" (SCons s SNil)
peekChar :: Parser (Maybe Char)
peekChar = Just <$> lookAhead anyChar <|> pure Nothing
parseGenericSExpr ::
Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
parseGenericSExpr atom reader skip = do
let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
skip
c <- peekChar
r <- case c of
Nothing -> fail "Unexpected end of input"
Just '(' -> char '(' >> skip >> parseList sExpr skip
Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
_ -> SAtom `fmap` atom
skip
return r
parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
parseList sExpr skip = do
i <- peekChar
case i of
Nothing -> fail "Unexpected end of input"
Just ')' -> char ')' >> return SNil
_ -> do
car <- sExpr
skip
c <- peekChar
case c of
Just '.' -> do
_ <- char '.'
cdr <- sExpr
skip
_ <- char ')'
skip
return (SCons car cdr)
Just ')' -> do
_ <- char ')'
skip
return (SCons car SNil)
_ -> do
cdr <- parseList sExpr skip
return (SCons car cdr)
-- | Given a CommentMap, create the corresponding parser to
-- skip those comments (if they exist).
buildSkip :: Maybe (Parser ()) -> Parser ()
buildSkip Nothing = skipMany space
buildSkip (Just c) = alternate
where alternate = skipMany space >> ((c >> alternate) <|> return ())
doParse :: Parser a -> Text -> Either String a
doParse p t = case runParser p () "" t of
Left err -> Left (show err)
Right x -> Right x
-- | Decode a single S-expression. If any trailing input is left after
-- the S-expression (ignoring comments or whitespace) then this
-- will fail: for those cases, use 'decode', which returns a list of
-- all the S-expressions found at the top level.
decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
where parser = parseGenericSExpr
(sesPAtom spec)
(readerMap spec)
(buildSkip (comment spec))
-- | Decode several S-expressions according to a given 'SExprParser'. This
-- will return a list of every S-expression that appears at the top-level
-- of the document.
decode :: SExprParser atom carrier -> Text -> Either String [carrier]
decode spec =
doParse (many1 parser <* eof) >=> mapM (postparse spec)
where parser = parseGenericSExpr
(sesPAtom spec)
(readerMap spec)
(buildSkip (comment spec))
{-
-- | Encode (without newlines) a single S-expression.
encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
encodeSExpr SNil _ = "()"
encodeSExpr (SAtom s) t = t s
encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
go SNil rs = "(" <> rs <> ")"
go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)
-- | Emit an S-Expression in a machine-readable way. This does no
-- pretty-printing or indentation, and produces no comments.
encodeOne :: SExprParser atom carrier -> carrier -> Text
encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
encode :: SExprParser atom carrier -> [carrier] -> Text
encode spec cs = T.concat (map (encodeOne spec) cs)
-}