gdritter repos s-cargot / master Data / SCargot / Parse.hs
master

Tree @master (Download .tar.gz)

Parse.hs @masterraw · history · blame

{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}

module Data.SCargot.Parse
  ( -- * Parsing
    decode
  , decodeOne
    -- * Parsing Control
  , SExprParser
  , Reader
  , Comment
  , mkParser
  , setCarrier
  , addReader
  , setComment
    -- * Specific SExprParser Conversions
  , asRich
  , asWellFormed
  , withQuote
  ) where

#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*), pure)
#endif
import           Control.Monad ((>=>))
import           Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import           Data.Text (Text)
import           Data.String (IsString)
import           Text.Parsec ( (<|>)
                             , (<?>)
                             , char
                             , eof
                             , lookAhead
                             , many1
                             , runParser
                             , skipMany
                             )
import           Text.Parsec.Char (anyChar, space)
import           Text.Parsec.Text (Parser)

import           Data.SCargot.Repr ( SExpr(..)
                                   , RichSExpr
                                   , WellFormedSExpr
                                   , toRich
                                   , toWellFormed
                                   )

type ReaderMacroMap atom = Map Char (Reader atom)

-- | A 'Reader' represents a reader macro: it takes a parser for
--   the S-Expression type and performs as much or as little
--   parsing as it would like, and then returns an S-expression.
type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))

-- | A 'Comment' represents any kind of skippable comment. This
--   parser __must__ be able to fail if a comment is not being
--   recognized, and it __must__ not consume any input in case
--   of failure.
type Comment = Parser ()

-- | A 'SExprParser' describes a parser for a particular value
--   that has been serialized as an s-expression. The @atom@ parameter
--   corresponds to a Haskell type used to represent the atoms,
--   and the @carrier@ parameter corresponds to the parsed S-Expression
--   structure.
data SExprParser atom carrier = SExprParser
  { sesPAtom   :: Parser atom
  , readerMap  :: ReaderMacroMap atom
  , comment    :: Maybe Comment
  , postparse  :: SExpr atom -> Either String carrier
  }

-- | Create a basic 'SExprParser' when given a parser
--   for an atom type.
--
--   >>> import Text.Parsec (alphaNum, many1)
--   >>> let parser = mkParser (many1 alphaNum)
--   >>> decode parser "(ele phant)"
--   Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
mkParser :: Parser atom -> SExprParser atom (SExpr atom)
mkParser parser = SExprParser
  { sesPAtom   = parser
  , readerMap  = M.empty
  , comment    = Nothing
  , postparse  = return
  }

-- | Modify the carrier type for a 'SExprParser'. This is
--   used internally to convert between various 'SExpr' representations,
--   but could also be used externally to add an extra conversion layer
--   onto a 'SExprParser'.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> import Data.SCargot.Repr (toRich)
-- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
setCarrier f spec = spec { postparse = postparse spec >=> f }

-- | Convert the final output representation from the 'SExpr' type
--   to the 'RichSExpr' type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = asRich (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
asRich = setCarrier (return . toRich)

-- | Convert the final output representation from the 'SExpr' type
--   to the 'WellFormedSExpr' type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
asWellFormed = setCarrier toWellFormed

-- | Add the ability to execute some particular reader macro, as
--   defined by its initial character and the 'Parser' which returns
--   the parsed S-Expression. The 'Reader' is passed a 'Parser' which
--   can be recursively called to parse more S-Expressions, and begins
--   parsing after the reader character has been removed from the
--   stream.
--
-- >>> import Text.Parsec (alphaNum, char, many1)
-- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
-- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
-- >>> decode parser "(an [ele phant])"
-- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]

addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
addReader c reader spec = spec
  { readerMap = M.insert c reader (readerMap spec) }

-- | Add the ability to ignore some kind of comment. This gets
--   factored into whitespace parsing, and it's very important that
--   the parser supplied __be able to fail__ (as otherwise it will
--   cause an infinite loop), and also that it __not consume any input__
--   (which may require it to be wrapped in 'try'.)
--
-- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
-- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
-- >>> let parser = setComment comment (mkParser (many1 alphaNum))
-- >>> decode parser "(ele //a comment\n  phant)"
-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]

setComment :: Comment -> SExprParser a c -> SExprParser a c
setComment c spec = spec { comment = Just (c <?> "comment") }

-- | Add the ability to understand a quoted S-Expression.
--   Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
--   assumes that the underlying atom type implements the "IsString"
--   class, and will create the @quote@ atom using @fromString "quote"@.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = withQuote (mkParser (many1 alphaNum))
-- >>> decode parser "'elephant"
-- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
withQuote = addReader '\'' (fmap go)
  where go s  = SCons "quote" (SCons s SNil)

peekChar :: Parser (Maybe Char)
peekChar = Just <$> lookAhead anyChar <|> pure Nothing

parseGenericSExpr ::
  Parser atom  -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
parseGenericSExpr atom reader skip = do
  let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
  skip
  c <- peekChar
  r <- case c of
    Nothing -> fail "Unexpected end of input"
    Just '(' -> char '(' >> skip >> parseList sExpr skip
    Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
    _ -> SAtom `fmap` atom
  skip
  return r

parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
parseList sExpr skip = do
  i <- peekChar
  case i of
    Nothing  -> fail "Unexpected end of input"
    Just ')' -> char ')' >> return SNil
    _        -> do
      car <- sExpr
      skip
      c <- peekChar
      case c of
        Just '.' -> do
          _ <- char '.'
          cdr <- sExpr
          skip
          _ <- char ')'
          skip
          return (SCons car cdr)
        Just ')' -> do
          _ <- char ')'
          skip
          return (SCons car SNil)
        _ -> do
          cdr <- parseList sExpr skip
          return (SCons car cdr)

-- | Given a CommentMap, create the corresponding parser to
--   skip those comments (if they exist).
buildSkip :: Maybe (Parser ()) -> Parser ()
buildSkip Nothing  = skipMany space
buildSkip (Just c) = alternate
  where alternate = skipMany space >> ((c >> alternate) <|> return ())

doParse :: Parser a -> Text -> Either String a
doParse p t = case runParser p () "" t of
  Left err -> Left (show err)
  Right x  -> Right x

-- | Decode a single S-expression. If any trailing input is left after
--   the S-expression (ignoring comments or whitespace) then this
--   will fail: for those cases, use 'decode', which returns a list of
--   all the S-expressions found at the top level.
decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
  where parser = parseGenericSExpr
                   (sesPAtom spec)
                   (readerMap spec)
                   (buildSkip (comment spec))

-- | Decode several S-expressions according to a given 'SExprParser'. This
--   will return a list of every S-expression that appears at the top-level
--   of the document.
decode :: SExprParser atom carrier -> Text -> Either String [carrier]
decode spec =
  doParse (many1 parser <* eof) >=> mapM (postparse spec)
    where parser = parseGenericSExpr
                     (sesPAtom spec)
                     (readerMap spec)
                     (buildSkip (comment spec))

{-
-- | Encode (without newlines) a single S-expression.
encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
encodeSExpr SNil _         = "()"
encodeSExpr (SAtom s) t    = t s
encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
  where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
        go SNil rs      = "(" <> rs <> ")"
        go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)

-- | Emit an S-Expression in a machine-readable way. This does no
--   pretty-printing or indentation, and produces no comments.
encodeOne :: SExprParser atom carrier -> carrier -> Text
encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)

encode :: SExprParser atom carrier -> [carrier] -> Text
encode spec cs = T.concat (map (encodeOne spec) cs)
-}