gdritter repos s-cargot / 4293524
More documentation updates for Parsec change Getty Ritter 9 years ago
4 changed file(s) with 50 addition(s) and 48 deletion(s). Collapse all Expand all
3434 , string
3535 )
3636
37 import Data.SCargot.General ( Comment
38 , SExprSpec
39 , setComment
40 )
37 import Data.SCargot.General ( Comment
38 , SExprSpec
39 , setComment
40 )
4141
4242 -- | Given a string, produce a comment parser that matches that
4343 -- initial string and ignores everything until the end of the
5757 --
5858 -- > { this { comment }
5959 --
60 -- to be a complete comment, despite the improper nesting. This is
61 -- analogous to standard C-style comments in which
60 -- to be a complete comment, despite the apparent improper nesting.
61 -- This is analogous to standard C-style comments in which
6262 --
6363 -- > /* this /* comment */
6464 --
124124 For example:
125125
126126 > mySpec :: SExprSpec Text (SExpr Text)
127 > mySpec = asWellFormed (mkSpec (takeWhile1 isAlphaNum) id)
127 > mySpec = asWellFormed $ mkSpec (pack <$> many1 alphaNum) id
128128 >
129129 > myLispySpec :: SExprSpec Text (SExpr Text)
130130 > myLispySpec = withLispComments mySpec
136136 comment syntaxes:
137137
138138 > > decode mySpec "(foo ; a lisp comment\n bar)\n"
139 > Left "Failed reading: takeWhile1"
139 > Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom"
140140 > > decode myLispySpec "(foo ; a lisp comment\n bar)\n"
141141 > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
142142 > > decode mySpec "(foo /* a c-like\n comment */ bar)\n"
143 > Left "Failed reading: takeWhile1"
143 > Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom"
144144 > > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n"
145145 > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
146146
2727 import Data.Char (isAlpha, isDigit, isAlphaNum)
2828 import Data.Map.Strict (Map)
2929 import qualified Data.Map.Strict as M
30 import Data.Maybe (fromJust)
3031 import Data.Monoid ((<>))
3132 import Data.String (IsString)
3233 import Data.Text (Text, pack, unpack)
3334 import qualified Data.Text as T
3435 import Text.Parsec ( (<|>)
36 , (<?>)
3537 , char
3638 , eof
3739 , lookAhead
8082 -- any alphanumeric sequence as a valid atom looks like:
8183 --
8284 -- > simpleSpec :: SExprSpec Text (SExpr Text)
83 -- > simpleSpec = mkSpec (takeWhile1 isAlphaNum) id
85 -- > simpleSpec = mkSpec (pack <$> many1 isAlphaNum) id
8486 mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
8587 mkSpec p s = SExprSpec
86 { sesPAtom = p
88 { sesPAtom = p <?> "atom"
8789 , sesSAtom = s
8890 , readerMap = M.empty
8991 , comment = Nothing
101103 -- the internal S-expression representation using 'asWellFormed', and
102104 -- then providing a conversion between the 'WellFormedSExpr' type and
103105 -- an @Expr@ AST. Notice that the below parser uses 'String' as its
104 -- underlying atom type.
106 -- underlying atom type, instead of some token type.
105107 --
106108 -- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show)
107109 -- >
108110 -- > toExpr :: WellFormedSExpr String -> Either String Expr
109 -- > toExpr (WFSList [WFSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r
110 -- > toExpr (WFSAtom c) | all isDigit c = pure (Num (read c))
111 -- > toExpr c = Left ("Invalid expr: " ++ show c)
111 -- > toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r
112 -- > toExpr (A c) | all isDigit c = pure (Num (read c))
113 -- > toExpr c = Left ("Invalid expr: " ++ show c)
112114 -- >
113115 -- > fromExpr :: Expr -> WellFormedSExpr String
114 -- > fromExpr (Add l r) = WFSList [WFSAtom "+", fromExpr l, fromExpr r]
115 -- > fromExpr (Num n) = WFSAtom (show n)
116 -- > fromExpr (Add l r) = L [A "+", fromExpr l, fromExpr r]
117 -- > fromExpr (Num n) = A (show n)
116118 -- >
117119 -- > mySpec :: SExprSpec String Expr
118120 -- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack
119 -- > where parser = unpack <$> takeWhile1 isValidChar
121 -- > where parser = many1 (satisfy isValidChar)
120122 -- > isValidChar c = isDigit c || c == '+'
121123 convertSpec :: (b -> Either String c) -> (c -> b)
122124 -> SExprSpec a b -> SExprSpec a c
143145 -- stream.
144146 --
145147 -- The following defines an S-expression variant that treats
146 -- @'expr@ as being sugar for @(quote expr)@:
147 --
148 -- > mySpec :: SExprSpec Text (SExpr Text)
149 -- > mySpec = addReader '\'' reader $ mkSpec (takeWhile1 isAlphaNum) id
148 -- @'expr@ as being sugar for @(quote expr)@. Note that this is done
149 -- already in a more general way by the 'withQuote' function, but
150 -- it is a good illustration of using reader macros in practice:
151 --
152 -- > mySpec :: SExprSpec String (SExpr Text)
153 -- > mySpec = addReader '\'' reader $ mkSpec (many1 alphaNum) pack
150154 -- > where reader p = quote <$> p
151155 -- > quote e = SCons (SAtom "quote") (SCons e SNil)
152156 addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
163167 -- C++-style comments, i.e. those which begin with @//@ and last
164168 -- until the end of a line:
165169 --
166 -- > t :: SExprSpec Text (SExpr Text)
167 -- > t = setComment comm $ mkSpec (takeWhile1 isAlphaNum) id
168 -- > where comm = try (string "//" *> takeWhile (/= '\n') *> pure ())
170 -- > t :: SExprSpec String (SExpr Text)
171 -- > t = setComment comm $ mkSpec (many1 alphaNum) pack
172 -- > where comm = try (string "//" *> manyTill newline *> pure ())
169173
170174 setComment :: Comment -> SExprSpec a c -> SExprSpec a c
171 setComment c spec = spec { comment = Just c }
175 setComment c spec = spec { comment = Just (c <?> "comment") }
172176
173177 -- | Add the ability to understand a quoted S-Expression. In general,
174178 -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
185189 parseGenericSExpr ::
186190 Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
187191 parseGenericSExpr atom reader skip = do
188 let sExpr = parseGenericSExpr atom reader skip
192 let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
189193 skip
190194 c <- peekChar
191195 r <- case c of
(New empty file)
116116
117117 ~~~~.haskell
118118 spec :: SExprSpec Text (SExpr Text)
119 spec = mkSpec (takeWhile1 (\ c -> isAlphaNum c || c `elem` "+-*/!?")) id
119 spec = mkSpec (pack <$> many1 (alphaNum <|> oneOf "+-*/!?")) id
120120 ~~~~
121121
122122 A more elaborate atom type would distinguish between different
124124 identifiers and numbers) is
125125
126126 ~~~~.haskell
127 import Data.Char (isDigit, isAlpha)
128 import Data.Text (Text)
129 import qualified Data.Text as T
127 import Data.Text (Text, pack)
130128
131129 data Atom = Ident Text | Num Int deriving (Eq, Show)
132130
133131 pAtom :: Parser Atom
134 pAtom = ((Num . read . T.unpack) <$> takeWhile1 isDigit)
135 <|> (Ident <$> takeWhile1 isAlpha)
132 pAtom = ((Num . read) <$> many1 digit)
133 <|> (Ident . pack) <$> takeWhile1 isAlpha)
136134
137135 sAtom :: Atom -> Text
138136 sAtom (Ident t) = t
139 sAtom (Num n) = T.pack (show n)
137 sAtom (Num n) = pack (show n)
140138
141139 mySpec :: SExprSpec Atom (SExpr Atom)
142140 mySpec = mkSpec pAtom sAtom
172170 data Expr = Add Expr Expr | Num Int deriving (Eq, Show)
173171
174172 toExpr :: RichSExpr Text -> Either String Expr
175 toExpr (RSList [RSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r
176 toExpr (RSAtom c)
173 toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r
174 toExpr (A c)
177175 | T.all isDigit c = pure (Num (read (T.unpack c)))
178176 | otherwise = Left "Non-numeric token as argument"
179177 toExpr _ = Left "Unrecognized s-expr"
180178
181179 fromExpr :: Expr -> RichSExpr Text
182 fromExpr (Add x y) = RSList [RSAtom "+", fromExpr x, fromExpr y]
183 fromExpr (Num n) = RSAtom (T.pack (show n))
180 fromExpr (Add x y) = L [A "+", fromExpr x, fromExpr y]
181 fromExpr (Num n) = A (T.pack (show n))
184182 ~~~~
185183
186184 then we could use the `convertSpec` function to add this directly to
196194 ## Comments
197195
198196 By default, an S-expression spec does not include a comment syntax, but
199 the provided `withSemicolonComments` function will cause it to understand
197 the provided `withLispComments` function will cause it to understand
200198 traditional Lisp line-oriented comments that begin with a semicolon:
201199
202200 ~~~~.haskell
203201 > decode spec "(this ; has a comment\n inside)\n"
204 Left "Failed reading: takeWhile1"
205 > decode (withSemicolonComments spec) "(this ; has a comment\n inside)\n"
202 Left "(line 1, column 7):\nunexpected \";\"\nexpecting space or atom"
203 > decode (withLispComments spec) "(this ; has a comment\n inside)\n"
206204 Right [SCons (SAtom "this") (SCons (SAtom "inside") SNil)]
207205 ~~~~
208206
217215 For example, the following adds C++-style comments to an S-expression format:
218216
219217 ~~~~.haskell
220 > let cppComment = string "//" >> takeWhile (/= '\n') >> return ()
218 > let cppComment = string "//" >> manyTill newline >> return ()
221219 > decode (setComment cppComment spec) "(a //comment\n b)\n"
222220 Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)]
223221 ~~~~
227225 A _reader macro_ is a Lisp macro which is invoked during read time. This
228226 allows the _lexical_ syntax of a Lisp to be modified. The most commonly
229227 seen reader macro is the quote, which allows the syntax `'expr` to stand
230 in for the s-expression `(quote expr)`. The S-Cargot library enables this
231 by keeping a map of characters to Parsec parsers that can be used as
228 in for the s-expression `(quote expr)`. The S-Cargot library accomodates
229 this by keeping a map of characters to Parsec parsers that can be used as
232230 readers. There is a special case for the aforementioned quote, but that
233231 could easily be written by hand as
234232
272270 literals:
273271
274272 ~~~~.haskell
275 data Op = Add | Sub | Mul
276 data Atom = AOp Op | ANum Int
273 data Op = Add | Sub | Mul deriving (Eq, Show)
274 data Atom = AOp Op | ANum Int deriving (Eq, Show)
277275 data Expr = EOp Op Expr Expr | ENum Int deriving (Eq, Show)
278276
279277 -- Conversions for our Expr type
288286
289287 -- Parser and serializer for our Atom type
290288 pAtom :: Parser Atom
291 pAtom = ((ANum . read . T.unpack) <$> takeWhile1 isDigit)
289 pAtom = ((ANum . read . T.unpack) <$> many1 isDigit)
292290 <|> (char "+" *> pure (AOp Add))
293291 <|> (char "-" *> pure (AOp Sub))
294292 <|> (char "*" *> pure (AOp Mul))
301299
302300 -- Our comment syntax
303301 hsComment :: Parser ()
304 hsComment = string "--" >> takeWhile (/= '\n') >> return ()
302 hsComment = string "--" >> manyTill newline >> return ()
305303
306304 -- Our custom reader macro
307305 hexReader :: Reader Atom