8 | 8 |
, mkSpec
|
9 | 9 |
, convertSpec
|
10 | 10 |
, addReader
|
11 | |
, addComment
|
| 11 |
, setComment
|
12 | 12 |
-- * Specific SExprSpec Conversions
|
13 | 13 |
, asRich
|
14 | 14 |
, asWellFormed
|
|
24 | 24 |
, Serializer
|
25 | 25 |
) where
|
26 | 26 |
|
27 | |
import Control.Applicative ((<*))
|
| 27 |
import Control.Applicative ((<*), (*>), (<|>), (<*>), (<$>), pure)
|
28 | 28 |
import Control.Monad ((>=>))
|
29 | 29 |
import Data.Attoparsec.Text
|
30 | |
import Data.Char (isAlpha)
|
| 30 |
import Data.Char (isAlpha, isDigit, isAlphaNum)
|
31 | 31 |
import Data.Map.Strict (Map)
|
32 | 32 |
import qualified Data.Map.Strict as M
|
33 | |
import Data.Text (Text)
|
| 33 |
import Data.Text (Text, pack, unpack)
|
34 | 34 |
|
35 | 35 |
import Prelude hiding (takeWhile)
|
36 | 36 |
|
37 | 37 |
import Data.SCargot.Repr
|
38 | 38 |
|
39 | 39 |
type ReaderMacroMap atom = Map Char (Reader atom)
|
40 | |
type CommentMap = Map Char Comment
|
41 | 40 |
|
42 | 41 |
-- | A 'Reader' represents a reader macro: it takes a parser for
|
43 | 42 |
-- the S-Expression type and performs as much or as little
|
44 | 43 |
-- parsing as it would like, and then returns an S-expression.
|
45 | 44 |
type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
|
46 | 45 |
|
47 | |
-- | A 'Comment' represents any kind of skippable comment.
|
| 46 |
-- | A 'Comment' represents any kind of skippable comment. This
|
| 47 |
-- parser __must__ be able to fail if a comment is not being
|
| 48 |
-- recognized, and it __must__ not consume any input.
|
48 | 49 |
type Comment = Parser ()
|
49 | 50 |
|
50 | 51 |
-- | A 'Serializer' is any function which can serialize an Atom
|
|
61 | 62 |
{ sesPAtom :: Parser atom
|
62 | 63 |
, sesSAtom :: Serializer atom
|
63 | 64 |
, readerMap :: ReaderMacroMap atom
|
64 | |
, comment :: Comment
|
| 65 |
, comment :: Maybe Comment
|
65 | 66 |
, postparse :: SExpr atom -> Either String carrier
|
66 | 67 |
, preserial :: carrier -> SExpr atom
|
67 | 68 |
}
|
68 | 69 |
|
69 | 70 |
-- | Create a basic 'SExprSpec' when given a parser and serializer
|
70 | |
-- for an atom type.
|
| 71 |
-- for an atom type. A small minimal 'SExprSpec' that recognizes
|
| 72 |
-- any alphanumeric sequence as a valid atom looks like:
|
| 73 |
--
|
| 74 |
-- > simpleSpec :: SExprSpec Text (SExpr Text)
|
| 75 |
-- > simpleSpec = mkSpec (takeWhile1 isAlphaNum) id
|
71 | 76 |
mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
|
72 | 77 |
mkSpec p s = SExprSpec
|
73 | 78 |
{ sesPAtom = p
|
74 | 79 |
, sesSAtom = s
|
75 | 80 |
, readerMap = M.empty
|
76 | |
, commentMap = skipSpace
|
| 81 |
, comment = Nothing
|
77 | 82 |
, postparse = return
|
78 | 83 |
, preserial = id
|
79 | 84 |
}
|
|
81 | 86 |
-- | Modify the carrier type for a 'SExprSpec'. This is
|
82 | 87 |
-- used internally to convert between various 'SExpr' representations,
|
83 | 88 |
-- but could also be used externally to add an extra conversion layer
|
84 | |
-- onto a 'SExprSpec', e.g. for a custom Lisp-like language:
|
85 | |
--
|
86 | |
-- > mySpec :: SExprSpec MyAtomType MyAST
|
87 | |
-- > mySpec = convertSpec sexprToMyAST myASTToSexpr spec
|
88 | |
-- > where spec = mkSpec myParser mySerializer
|
89 | |
convertSpec :: (b -> Either String c) -> (c -> b) -> SExprSpec a b -> SExprSpec a c
|
| 89 |
-- onto a 'SExprSpec'.
|
| 90 |
--
|
| 91 |
-- The following defines an S-expression spec that recognizes the
|
| 92 |
-- language of binary addition trees. It does so by first transforming
|
| 93 |
-- the internal S-expression representation using 'asWellFormed', and
|
| 94 |
-- then providing a conversion between the 'WellFormedSExpr' type and
|
| 95 |
-- an @Expr@ AST. Notice that the below parser uses 'String' as its
|
| 96 |
-- underlying atom type.
|
| 97 |
--
|
| 98 |
-- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show)
|
| 99 |
-- >
|
| 100 |
-- > toExpr :: WellFormedSExpr String -> Either String Expr
|
| 101 |
-- > toExpr (WFSList [WFSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r
|
| 102 |
-- > toExpr (WFSAtom c) | all isDigit c = pure (Num (read c))
|
| 103 |
-- > toExpr c = Left ("Invalid expr: " ++ show c)
|
| 104 |
-- >
|
| 105 |
-- > fromExpr :: Expr -> WellFormedSExpr String
|
| 106 |
-- > fromExpr (Add l r) = WFSList [WFSAtom "+", fromExpr l, fromExpr r]
|
| 107 |
-- > fromExpr (Num n) = WFSAtom (show n)
|
| 108 |
-- >
|
| 109 |
-- > mySpec :: SExprSpec String Expr
|
| 110 |
-- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack
|
| 111 |
-- > where parser = unpack <$> takeWhile1 isValidChar
|
| 112 |
-- > isValidChar c = isDigit c || c == '+'
|
| 113 |
convertSpec :: (b -> Either String c) -> (c -> b)
|
| 114 |
-> SExprSpec a b -> SExprSpec a c
|
90 | 115 |
convertSpec f g spec = spec
|
91 | 116 |
{ postparse = postparse spec >=> f
|
92 | 117 |
, preserial = preserial spec . g
|
|
108 | 133 |
-- can be recursively called to parse more S-Expressions, and begins
|
109 | 134 |
-- parsing after the reader character has been removed from the
|
110 | 135 |
-- stream.
|
| 136 |
--
|
| 137 |
-- The following defines an S-expression variant that treats
|
| 138 |
-- @'expr@ as being sugar for @(quote expr)@:
|
| 139 |
--
|
| 140 |
-- > mySpec :: SExprSpec Text (SExpr Text)
|
| 141 |
-- > mySpec = addReader '\'' reader $ mkSpec (takeWhile1 isAlphaNum) id
|
| 142 |
-- > where reader p = quote <$> p
|
| 143 |
-- > quote e = SCons (SAtom "quote") (SCons e SNil)
|
111 | 144 |
addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
|
112 | 145 |
addReader c reader spec = spec
|
113 | 146 |
{ readerMap = M.insert c reader (readerMap spec) }
|
114 | 147 |
|
115 | |
-- | Add the ability to ignore some kind of comment. If the comment
|
116 | |
-- parser overlaps with a reader macro or the atom parser, then the
|
117 | |
-- former will be tried first.
|
| 148 |
-- | Add the ability to ignore some kind of comment. This gets
|
| 149 |
-- factored into whitespace parsing, and it's very important that
|
| 150 |
-- the parser supplied __be able to fail__ (as otherwise it will
|
| 151 |
-- cause an infinite loop), and also that it __not consume any input__
|
| 152 |
-- (which may require it to be wrapped in 'try'.)
|
| 153 |
--
|
| 154 |
-- The following code defines an S-expression variant that skips
|
| 155 |
-- C++-style comments, i.e. those which begin with @//@ and last
|
| 156 |
-- until the end of a line:
|
| 157 |
--
|
| 158 |
-- > t :: SExprSpec Text (SExpr Text)
|
| 159 |
-- > t = setComment comm $ mkSpec (takeWhile1 isAlphaNum) id
|
| 160 |
-- > where comm = try (string "//" *> takeWhile (/= '\n') *> pure ())
|
| 161 |
|
118 | 162 |
setComment :: Comment -> SExprSpec a c -> SExprSpec a c
|
119 | |
setComment c spec = spec { comment = c }
|
| 163 |
setComment c spec = spec { comment = Just c }
|
120 | 164 |
|
121 | 165 |
-- | Add the ability to skip line comments beginning with a semicolon.
|
122 | 166 |
withSemicolonComments :: SExprSpec a c -> SExprSpec a c
|
123 | |
withSemicolonComments = addComment ';' (skipWhile (\ c -> c /= '\n'))
|
| 167 |
withSemicolonComments = setComment (char ';' >> takeWhile (/='\n') >> return ())
|
124 | 168 |
|
125 | 169 |
-- | Add the ability to understand a quoted S-Expression. In general,
|
126 | 170 |
-- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
|
|
128 | 172 |
-- expressions to a 'SExprSpec', provided that you supply which
|
129 | 173 |
-- atom you want substituted in for the symbol @quote@.
|
130 | 174 |
withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a)
|
131 | |
withQuote q = addReader '\'' prs
|
132 | |
where prs p = go `fmap` p
|
133 | |
go s = SCons (SAtom q) (SCons s SNil)
|
| 175 |
withQuote q = addReader '\'' (fmap go)
|
| 176 |
where go s = SCons (SAtom q) (SCons s SNil)
|
134 | 177 |
|
135 | 178 |
parseGenericSExpr ::
|
136 | 179 |
Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
|
|
174 | 217 |
|
175 | 218 |
-- | Given a CommentMap, create the corresponding parser to
|
176 | 219 |
-- skip those comments (if they exist).
|
177 | |
buildSkip :: CommentMap -> Parser ()
|
178 | |
buildSkip m = skipSpace >> comments >> skipSpace
|
179 | |
where comments = do
|
180 | |
c <- peekChar
|
181 | |
case c of
|
182 | |
Nothing -> return ()
|
183 | |
Just c' -> case M.lookup c' m of
|
184 | |
Just p -> anyChar >> p
|
185 | |
Nothing -> return ()
|
186 | |
|
187 | |
(#) :: a -> (a -> b) -> b
|
188 | |
(#) = flip ($)
|
189 | |
|
190 | |
testSpec :: SExprSpec Text (SExpr Text)
|
191 | |
testSpec = mkSpec (takeWhile1 isAlpha) id
|
192 | |
# withQuote "quote"
|
193 | |
# addReader '#' (\ p -> SCons (SAtom "vector") `fmap` p)
|
| 220 |
buildSkip :: Maybe (Parser ()) -> Parser ()
|
| 221 |
buildSkip Nothing = skipSpace
|
| 222 |
buildSkip (Just c) = alternate
|
| 223 |
where alternate = skipSpace >> ((c >> alternate) <|> return ())
|
194 | 224 |
|
195 | 225 |
-- | Decode a single S-expression. If any trailing input is left after
|
196 | 226 |
-- the S-expression (ignoring comments or whitespace) then this
|
|
198 | 228 |
-- all the S-expressions found at the top level.
|
199 | 229 |
decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
|
200 | 230 |
decodeOne SExprSpec { .. } = parseOnly (parser <* endOfInput) >=> postparse
|
201 | |
where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
|
| 231 |
where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment)
|
202 | 232 |
|
203 | 233 |
-- | Decode several S-expressions according to a given 'SExprSpec'. This
|
204 | 234 |
-- will return a list of every S-expression that appears at the top-level
|
|
206 | 236 |
decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
|
207 | 237 |
decode SExprSpec { .. } =
|
208 | 238 |
parseOnly (many1 parser <* endOfInput) >=> mapM postparse
|
209 | |
where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
|
| 239 |
where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment)
|
210 | 240 |
|
211 | 241 |
-- | Emit an S-Expression in a machine-readable way. This
|
212 | 242 |
encode :: SExprSpec atom carrier -> carrier -> Text
|