gdritter repos s-cargot / d115597
Fixed comment representation and began to add more documentation + examples Getty Ritter 9 years ago
4 changed file(s) with 84 addition(s) and 54 deletion(s). Collapse all Expand all
88 , mkSpec
99 , convertSpec
1010 , addReader
11 , addComment
11 , setComment
1212 -- * Specific SExprSpec Conversions
1313 , asRich
1414 , asWellFormed
2424 , Serializer
2525 ) where
2626
27 import Control.Applicative ((<*))
27 import Control.Applicative ((<*), (*>), (<|>), (<*>), (<$>), pure)
2828 import Control.Monad ((>=>))
2929 import Data.Attoparsec.Text
30 import Data.Char (isAlpha)
30 import Data.Char (isAlpha, isDigit, isAlphaNum)
3131 import Data.Map.Strict (Map)
3232 import qualified Data.Map.Strict as M
33 import Data.Text (Text)
33 import Data.Text (Text, pack, unpack)
3434
3535 import Prelude hiding (takeWhile)
3636
3737 import Data.SCargot.Repr
3838
3939 type ReaderMacroMap atom = Map Char (Reader atom)
40 type CommentMap = Map Char Comment
4140
4241 -- | A 'Reader' represents a reader macro: it takes a parser for
4342 -- the S-Expression type and performs as much or as little
4443 -- parsing as it would like, and then returns an S-expression.
4544 type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
4645
47 -- | A 'Comment' represents any kind of skippable comment.
46 -- | A 'Comment' represents any kind of skippable comment. This
47 -- parser __must__ be able to fail if a comment is not being
48 -- recognized, and it __must__ not consume any input.
4849 type Comment = Parser ()
4950
5051 -- | A 'Serializer' is any function which can serialize an Atom
6162 { sesPAtom :: Parser atom
6263 , sesSAtom :: Serializer atom
6364 , readerMap :: ReaderMacroMap atom
64 , comment :: Comment
65 , comment :: Maybe Comment
6566 , postparse :: SExpr atom -> Either String carrier
6667 , preserial :: carrier -> SExpr atom
6768 }
6869
6970 -- | Create a basic 'SExprSpec' when given a parser and serializer
70 -- for an atom type.
71 -- for an atom type. A small minimal 'SExprSpec' that recognizes
72 -- any alphanumeric sequence as a valid atom looks like:
73 --
74 -- > simpleSpec :: SExprSpec Text (SExpr Text)
75 -- > simpleSpec = mkSpec (takeWhile1 isAlphaNum) id
7176 mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
7277 mkSpec p s = SExprSpec
7378 { sesPAtom = p
7479 , sesSAtom = s
7580 , readerMap = M.empty
76 , commentMap = skipSpace
81 , comment = Nothing
7782 , postparse = return
7883 , preserial = id
7984 }
8186 -- | Modify the carrier type for a 'SExprSpec'. This is
8287 -- used internally to convert between various 'SExpr' representations,
8388 -- but could also be used externally to add an extra conversion layer
84 -- onto a 'SExprSpec', e.g. for a custom Lisp-like language:
85 --
86 -- > mySpec :: SExprSpec MyAtomType MyAST
87 -- > mySpec = convertSpec sexprToMyAST myASTToSexpr spec
88 -- > where spec = mkSpec myParser mySerializer
89 convertSpec :: (b -> Either String c) -> (c -> b) -> SExprSpec a b -> SExprSpec a c
89 -- onto a 'SExprSpec'.
90 --
91 -- The following defines an S-expression spec that recognizes the
92 -- language of binary addition trees. It does so by first transforming
93 -- the internal S-expression representation using 'asWellFormed', and
94 -- then providing a conversion between the 'WellFormedSExpr' type and
95 -- an @Expr@ AST. Notice that the below parser uses 'String' as its
96 -- underlying atom type.
97 --
98 -- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show)
99 -- >
100 -- > toExpr :: WellFormedSExpr String -> Either String Expr
101 -- > toExpr (WFSList [WFSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r
102 -- > toExpr (WFSAtom c) | all isDigit c = pure (Num (read c))
103 -- > toExpr c = Left ("Invalid expr: " ++ show c)
104 -- >
105 -- > fromExpr :: Expr -> WellFormedSExpr String
106 -- > fromExpr (Add l r) = WFSList [WFSAtom "+", fromExpr l, fromExpr r]
107 -- > fromExpr (Num n) = WFSAtom (show n)
108 -- >
109 -- > mySpec :: SExprSpec String Expr
110 -- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack
111 -- > where parser = unpack <$> takeWhile1 isValidChar
112 -- > isValidChar c = isDigit c || c == '+'
113 convertSpec :: (b -> Either String c) -> (c -> b)
114 -> SExprSpec a b -> SExprSpec a c
90115 convertSpec f g spec = spec
91116 { postparse = postparse spec >=> f
92117 , preserial = preserial spec . g
108133 -- can be recursively called to parse more S-Expressions, and begins
109134 -- parsing after the reader character has been removed from the
110135 -- stream.
136 --
137 -- The following defines an S-expression variant that treats
138 -- @'expr@ as being sugar for @(quote expr)@:
139 --
140 -- > mySpec :: SExprSpec Text (SExpr Text)
141 -- > mySpec = addReader '\'' reader $ mkSpec (takeWhile1 isAlphaNum) id
142 -- > where reader p = quote <$> p
143 -- > quote e = SCons (SAtom "quote") (SCons e SNil)
111144 addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
112145 addReader c reader spec = spec
113146 { readerMap = M.insert c reader (readerMap spec) }
114147
115 -- | Add the ability to ignore some kind of comment. If the comment
116 -- parser overlaps with a reader macro or the atom parser, then the
117 -- former will be tried first.
148 -- | Add the ability to ignore some kind of comment. This gets
149 -- factored into whitespace parsing, and it's very important that
150 -- the parser supplied __be able to fail__ (as otherwise it will
151 -- cause an infinite loop), and also that it __not consume any input__
152 -- (which may require it to be wrapped in 'try'.)
153 --
154 -- The following code defines an S-expression variant that skips
155 -- C++-style comments, i.e. those which begin with @//@ and last
156 -- until the end of a line:
157 --
158 -- > t :: SExprSpec Text (SExpr Text)
159 -- > t = setComment comm $ mkSpec (takeWhile1 isAlphaNum) id
160 -- > where comm = try (string "//" *> takeWhile (/= '\n') *> pure ())
161
118162 setComment :: Comment -> SExprSpec a c -> SExprSpec a c
119 setComment c spec = spec { comment = c }
163 setComment c spec = spec { comment = Just c }
120164
121165 -- | Add the ability to skip line comments beginning with a semicolon.
122166 withSemicolonComments :: SExprSpec a c -> SExprSpec a c
123 withSemicolonComments = addComment ';' (skipWhile (\ c -> c /= '\n'))
167 withSemicolonComments = setComment (char ';' >> takeWhile (/='\n') >> return ())
124168
125169 -- | Add the ability to understand a quoted S-Expression. In general,
126170 -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
128172 -- expressions to a 'SExprSpec', provided that you supply which
129173 -- atom you want substituted in for the symbol @quote@.
130174 withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a)
131 withQuote q = addReader '\'' prs
132 where prs p = go `fmap` p
133 go s = SCons (SAtom q) (SCons s SNil)
175 withQuote q = addReader '\'' (fmap go)
176 where go s = SCons (SAtom q) (SCons s SNil)
134177
135178 parseGenericSExpr ::
136179 Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
174217
175218 -- | Given a CommentMap, create the corresponding parser to
176219 -- skip those comments (if they exist).
177 buildSkip :: CommentMap -> Parser ()
178 buildSkip m = skipSpace >> comments >> skipSpace
179 where comments = do
180 c <- peekChar
181 case c of
182 Nothing -> return ()
183 Just c' -> case M.lookup c' m of
184 Just p -> anyChar >> p
185 Nothing -> return ()
186
187 (#) :: a -> (a -> b) -> b
188 (#) = flip ($)
189
190 testSpec :: SExprSpec Text (SExpr Text)
191 testSpec = mkSpec (takeWhile1 isAlpha) id
192 # withQuote "quote"
193 # addReader '#' (\ p -> SCons (SAtom "vector") `fmap` p)
220 buildSkip :: Maybe (Parser ()) -> Parser ()
221 buildSkip Nothing = skipSpace
222 buildSkip (Just c) = alternate
223 where alternate = skipSpace >> ((c >> alternate) <|> return ())
194224
195225 -- | Decode a single S-expression. If any trailing input is left after
196226 -- the S-expression (ignoring comments or whitespace) then this
198228 -- all the S-expressions found at the top level.
199229 decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
200230 decodeOne SExprSpec { .. } = parseOnly (parser <* endOfInput) >=> postparse
201 where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
231 where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment)
202232
203233 -- | Decode several S-expressions according to a given 'SExprSpec'. This
204234 -- will return a list of every S-expression that appears at the top-level
206236 decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
207237 decode SExprSpec { .. } =
208238 parseOnly (many1 parser <* endOfInput) >=> mapM postparse
209 where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
239 where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment)
210240
211241 -- | Emit an S-Expression in a machine-readable way. This
212242 encode :: SExprSpec atom carrier -> carrier -> Text
1 {-# LANGUAGE DeriveFunctor #-}
2
13 module Data.SCargot.Repr
24 ( -- * Elementary SExpr representation
35 SExpr(..)
1921 = SCons (SExpr atom) (SExpr atom)
2022 | SAtom atom
2123 | SNil
22 deriving (Eq, Show, Read)
24 deriving (Eq, Show, Read, Functor)
2325
2426 -- | Sometimes, the cons-based interface is too low
2527 -- level, and we'd rather have the lists themselves
3638 = RSList [RichSExpr atom]
3739 | RSDotted [RichSExpr atom] atom
3840 | RSAtom atom
39 deriving (Eq, Show, Read)
41 deriving (Eq, Show, Read, Functor)
4042
4143 -- | It should always be true that
4244 --
6668 data WellFormedSExpr atom
6769 = WFSList [WellFormedSExpr atom]
6870 | WFSAtom atom
69 deriving (Eq, Show, Read)
71 deriving (Eq, Show, Read, Functor)
7072
7173 -- | This will be @Nothing@ if the argument contains an
7274 -- improper list. It should hold that
55 import qualified Data.ByteString.Base64 as B64
66 import Data.Text (Text)
77 import qualified Data.Text as T
8
9 newtype Atom = Atom { fromAtom :: ByteString } deriving (Eq, Show, Read)
108
119 pToken :: Parser ByteString
1210 pToken = undefined
99 module Data.SCargot.Tutorial
1010 ( -- * Basic Usage and Organization
1111 -- $usage
12 -- * Building a Custom Config Format
13 -- $config
1214 -- * Analyzing Scheme code
1315 -- $scheme
14 -- * Building a Custom Config Format
15 -- $config
1616 -- * Building a Custom Lisp
1717 -- $lisp
1818 ) where
1919
2020 {- $usage
21 When people talk about s-expressions, they're really talking about
22 a _family_ of formats that have in common a rough structure and
23 the fact that -}
24
25 {- $config
2126
2227 -}
2328
2530
2631 -}
2732
28
29 {- $config
30
31 -}
32
3333 {- $lisp
3434
3535 -}