gdritter repos s-cargot / 7fe3c74
Switched from AttoParsec to Parsec for somewhat better error messages Getty Ritter 10 years ago
5 changed file(s) with 66 addition(s) and 62 deletion(s). Collapse all Expand all
1010 , withQuote
1111 ) where
1212
13 import Control.Applicative ((<$>))
1314 import Data.Char (isAlphaNum)
14 import Data.Attoparsec.Text (Parser, takeWhile1)
15 import Data.Text (Text)
15 import Text.Parsec -- (Parser, takeWhile1)
16 import Data.Text (Text, pack)
1617
1718 import Data.SCargot.Repr.Basic
1819 import Data.SCargot.General
2021
2122 isAtomChar :: Char -> Bool
2223 isAtomChar c = isAlphaNum c
23 || c == '-'
24 || c == '*'
25 || c == '/'
26 || c == '+'
27 || c == '<'
28 || c == '>'
29 || c == '='
30 || c == '!'
31 || c == '?'
24 || c == '-' || c == '*' || c == '/'
25 || c == '+' || c == '<' || c == '>'
26 || c == '=' || c == '!' || c == '?'
3227
3328 -- | A 'SExprSpec' that understands atoms to be sequences of
3429 -- alphanumeric characters as well as the punctuation
35 -- characters @-*/+<>=!?@, and does no processing of them.
30 -- characters @[-*/+<>=!?]@, and does no processing of them.
3631 -- This is not quite representative of actual lisps, which
37 -- would (for example) accept various kinds of string
38 -- literals. This should be sufficient for most ad-hoc
39 -- storage or configuration formats.
32 -- would, for example, accept various kinds of string
33 -- and numeric literals.
4034 basicSpec :: SExprSpec Text (SExpr Text)
41 basicSpec = mkSpec (takeWhile1 isAtomChar) id
35 basicSpec = mkSpec pToken id
36 where pToken = pack <$> many1 (satisfy isAtomChar)
2424 , simpleBlockComment
2525 ) where
2626
27 import Control.Applicative ((<|>))
2827 import Control.Monad (void)
29 import Data.Attoparsec.Text
3028 import Data.Text (Text)
29 import Text.Parsec
3130
3231 import Prelude hiding (takeWhile)
3332
3635 -- | Given a string, produce a comment parser that matches that
3736 -- initial string and ignores everything until the end of the
3837 -- line.
39 lineComment :: Text -> Comment
40 lineComment s = string s >> takeWhile (/= '\n') >> return ()
38 lineComment :: String -> Comment
39 lineComment s = string s >> skipMany (noneOf "\n") >> return ()
4140
4241 -- | Given two strings, a begin and an end delimeter, produce a
4342 -- parser that matches the beginning delimeter and then ignores
5756 -- > /* this /* comment */
5857 --
5958 -- is a complete comment.
60 simpleBlockComment :: Text -> Text -> Comment
59 simpleBlockComment :: String -> String -> Comment
6160 simpleBlockComment begin end =
6261 string begin >>
6362 manyTill anyChar (string end) >>
129128 We can then use these to parse s-expressions with different kinds of
130129 comment syntaxes:
131130
132 > decode mySpec "(foo ; a lisp comment\n bar)\n"
131 > > decode mySpec "(foo ; a lisp comment\n bar)\n"
133132 > Left "Failed reading: takeWhile1"
134 > decode myLispySpec "(foo ; a lisp comment\n bar)\n"
133 > > decode myLispySpec "(foo ; a lisp comment\n bar)\n"
135134 > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
136 > decode mySpec "(foo /* a c-like\n comment */ bar)\n"
135 > > decode mySpec "(foo /* a c-like\n comment */ bar)\n"
137136 > Left "Failed reading: takeWhile1"
138 > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n"
137 > > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n"
139138 > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
140139
141140 -}
2222 , Serializer
2323 ) where
2424
25 import Control.Applicative ((<*), (*>), (<|>), (<*>), (<$>), pure)
25 import Control.Applicative ((<*), (*>), (<*>), (<$>), pure)
2626 import Control.Monad ((>=>))
27 import Data.Attoparsec.Text
2827 import Data.Char (isAlpha, isDigit, isAlphaNum)
2928 import Data.Map.Strict (Map)
3029 import qualified Data.Map.Strict as M
3231 import Data.String (IsString)
3332 import Data.Text (Text, pack, unpack)
3433 import qualified Data.Text as T
34 import Text.Parsec
35 import Text.Parsec.Char (anyChar, space)
36 import Text.Parsec.Text
3537
3638 import Prelude hiding (takeWhile)
3739
171173 withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t)
172174 withQuote = addReader '\'' (fmap go)
173175 where go s = SCons "quote" (SCons s SNil)
176
177 peekChar :: Parser (Maybe Char)
178 peekChar = Just <$> lookAhead anyChar <|> pure Nothing
174179
175180 parseGenericSExpr ::
176181 Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
215220 -- | Given a CommentMap, create the corresponding parser to
216221 -- skip those comments (if they exist).
217222 buildSkip :: Maybe (Parser ()) -> Parser ()
218 buildSkip Nothing = skipSpace
223 buildSkip Nothing = skipMany space
219224 buildSkip (Just c) = alternate
220 where alternate = skipSpace >> ((c >> alternate) <|> return ())
225 where alternate = skipMany space >> ((c >> alternate) <|> return ())
226
227 doParse :: Parser a -> Text -> Either String a
228 doParse p t = case runParser p () "" t of
229 Left err -> Left (show err)
230 Right x -> Right x
221231
222232 -- | Decode a single S-expression. If any trailing input is left after
223233 -- the S-expression (ignoring comments or whitespace) then this
224234 -- will fail: for those cases, use 'decode', which returns a list of
225235 -- all the S-expressions found at the top level.
226236 decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
227 decodeOne spec = parseOnly (parser <* endOfInput) >=> (postparse spec)
237 decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
228238 where parser = parseGenericSExpr
229239 (sesPAtom spec)
230240 (readerMap spec)
235245 -- of the document.
236246 decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
237247 decode spec =
238 parseOnly (many1 parser <* endOfInput) >=> mapM (postparse spec)
248 doParse (many1 parser <* eof) >=> mapM (postparse spec)
239249 where parser = parseGenericSExpr
240250 (sesPAtom spec)
241251 (readerMap spec)
6868 functions.
6969
7070 ~~~~.haskell
71 *Data.SCargot.General> decode spec "(a b)"
71 > decode spec "(a b)"
7272 Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)]
73 *Data.SCargot.General> decode (asRich spec) "(a b)"
73 > decode (asRich spec) "(a b)"
7474 Right [RSList [RSAtom "a",RSAtom "b"]]
75 *Data.SCargot.General> decode (asWellFormed spec) "(a b)"
75 > decode (asWellFormed spec) "(a b)"
7676 Right [WFSList [WFSAtom "a",WFSAtom "b"]]
77 *Data.SCargot.General> decode spec "(a . b)"
77 > decode spec "(a . b)"
7878 Right [SCons (SAtom "a") (SAtom "b")]
79 *Data.SCargot.General> decode (asRich spec) "(a . b)"
79 > decode (asRich spec) "(a . b)"
8080 Right [RSDotted [RSAtom "a"] "b"]
81 *Data.SCargot.General> decode (asWellFormed spec) "(a . b)"
81 > decode (asWellFormed spec) "(a . b)"
8282 Left "Found atom in cdr position"
8383 ~~~~
8484
9090 you plan on working with:
9191
9292 ~~~~.haskell
93 *Data.SCargot.Repr.Basic> A 2 ::: A 3 ::: A 4 ::: Nil
93 > A 2 ::: A 3 ::: A 4 ::: Nil
9494 SCons (SCons (SCons (SAtom 2) (SAtom 3)) (SAtom 4)) SNil
9595 ~~~~
9696
9797 ~~~~.haskell
98 *Data.SCargot.Repr.WellFormed> L [A 1,A 2,A 3]
98 > L [A 1,A 2,A 3]
9999 WFSList [WFSAtom 1,WFSAtom 2,WFSAtom 3]
100 *Data.SCargot.Repr.WellFormed> let sexprSum (L xs) = sum (map sexprSum xs); sexprSum (A n) = n
101 *Data.SCargot.Repr Data.SCargot.Repr.WellFormed> :t sexprSum
100 > let sexprSum (L xs) = sum (map sexprSum xs); sexprSum (A n) = n
101 > :t sexprSum
102102 sexprSum :: Num a => WellFormedSExpr a -> a
103 *Data.SCargot.Repr.WellFormed> sexprSum (L [A 2, L [A 3, A 4]])
103 > sexprSum (L [A 2, L [A 3, A 4]])
104104 9
105105 ~~~~
106106
107107 ## Atom Types
108108
109109 Any type can serve as an underlying atom type provided that it has
110 an AttoParsec parser and a serializer (i.e. a way of turning it
110 an Parsec parser and a serializer (i.e. a way of turning it
111111 into `Text`.) For these examples, I'm going to use a very simple
112112 serializer that is roughly like the one found in `Data.SCargot.Basic`,
113113 which parses symbolic tokens of letters, numbers, and some
146146 for both parsing and serialization:
147147
148148 ~~~~.haskell
149 *Data.SCargot.General T> decode mySpec "(foo 1)"
149 > decode mySpec "(foo 1)"
150150 Right [SCons (SAtom (Ident "foo")) (SCons (SAtom (Num 1)) SNil)]
151 *Data.SCargot.General T> encode mySpec [SCons (SAtom (Num 0)) SNil]
151 > encode mySpec [SCons (SAtom (Num 0)) SNil]
152152 "(0)"
153153 ~~~~
154154
187187 the `SExprSpec`:
188188
189189 ~~~~.haskell
190 *Data.SCargot.General T> decode (convertSpec toExpr fromExpr (asRich spec)) "(+ 1 2)"
190 > decode (convertSpec toExpr fromExpr (asRich spec)) "(+ 1 2)"
191191 Right [Add (Num 1) (Num 2)]
192 *Data.SCargot.General T> decode (convertSpec toExpr fromExpr (asRich spec)) "(0 1 2)"
192 > decode (convertSpec toExpr fromExpr (asRich spec)) "(0 1 2)"
193193 Left "Unrecognized s-expr"
194194 ~~~~
195195
200200 traditional Lisp line-oriented comments that begin with a semicolon:
201201
202202 ~~~~.haskell
203 *Data.SCargot.General> decode spec "(this ; has a comment\n inside)\n"
203 > decode spec "(this ; has a comment\n inside)\n"
204204 Left "Failed reading: takeWhile1"
205 *Data.SCargot.General> decode (withSemicolonComments spec) "(this ; has a comment\n inside)\n"
205 > decode (withSemicolonComments spec) "(this ; has a comment\n inside)\n"
206206 Right [SCons (SAtom "this") (SCons (SAtom "inside") SNil)]
207207 ~~~~
208208
209209 Additionally, you can provide your own comment syntax in the form of an
210 AttoParsec parser. Any AttoParsec parser can be used, so long as it meets
210 Parsec parser. Any Parsec parser can be used, so long as it meets
211211 the following criteria:
212212 - it is capable of failing (as is called until SCargot believes that there
213213 are no more comments)
217217 For example, the following adds C++-style comments to an S-expression format:
218218
219219 ~~~~.haskell
220 *Data.SCargot.General> let cppComment = string "//" >> takeWhile (/= '\n') >> return ()
221 *Data.SCargot.General> decode (setComment cppComment spec) "(a //comment\n b)\n"
220 > let cppComment = string "//" >> takeWhile (/= '\n') >> return ()
221 > decode (setComment cppComment spec) "(a //comment\n b)\n"
222222 Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)]
223223 ~~~~
224224
228228 allows the _lexical_ syntax of a Lisp to be modified. The most commonly
229229 seen reader macro is the quote, which allows the syntax `'expr` to stand
230230 in for the s-expression `(quote expr)`. The S-Cargot library enables this
231 by keeping a map of characters to AttoParsec parsers that can be used as
231 by keeping a map of characters to Parsec parsers that can be used as
232232 readers. There is a special case for the aforementioned quote, but that
233233 could easily be written by hand as
234234
235235 ~~~~.haskell
236 *Data.SCargot.General> let doQuote c = SCons (SAtom "quote") (SCons c SNil)
237 *Data.SCargot.General> let qReader = addReader '\'' (\ p -> fmap doQuote p)
238 *Data.SCargot.General> decode (qReader mySpec) "'foo"
236 > let quoteExpr c = SCons (SAtom "quote") (SCons c SNil)
237 > let withQuote = addReader '\'' (\ p -> fmap quoteExpr p)
238 > decode (withQuote mySpec) "'foo"
239239 Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
240240 ~~~~
241241
246246 parsing anything else and merely returns a new token:
247247
248248 ~~~~.haskell
249 *Data.SCargot.General> let qmReader = addReader '?' (\ _ -> pure (SAtom "huh"))
250 *Data.SCargot.General> decode (qmReader mySpec) "(?1 2)"
249 > let qmReader = addReader '?' (\ _ -> pure (SAtom "huh"))
250 > decode (qmReader mySpec) "(?1 2)"
251251 Right [SCons (SAtom "huh") (SCons (SAtom "1") (SCons (SAtom "2") SNil))]
252252 ~~~~
253253
259259 is reached:
260260
261261 ~~~~.haskell
262 *Data.SCargot.General> let pVec p = (char ']' *> pure SNil) <|> (SCons <$> p <*> pVec p)
263 *Data.SCargot.General> let vec = addReader '[' pVec
264 *Data.SCargot.General> decode (asRich (vec mySpec)) "(1 [2 3])"
262 > let pVec p = (char ']' *> pure SNil) <|> (SCons <$> p <*> pVec p)
263 > let vec = addReader '[' pVec
264 > decode (asRich (vec mySpec)) "(1 [2 3])"
265265 Right [RSList [RSAtom "1",RSList [RSAtom "2",RSAtom "3"]]]
266266 ~~~~
267267
2323 Data.SCargot.Tutorial
2424 -- other-modules:
2525 -- other-extensions:
26 build-depends: base >=4.7 && <4.8, attoparsec, text, containers
26 build-depends: base >=4.7 && <4.8, parsec, text, containers
2727 -- hs-source-dirs:
2828 default-language: Haskell2010