More documentation updates for Parsec change
Getty Ritter
10 years ago
34 | 34 | , string |
35 | 35 | ) |
36 | 36 | |
37 | import Data.SCargot.General ( Comment | |
38 | , SExprSpec | |
39 | , setComment | |
40 | ) | |
37 | import Data.SCargot.General ( Comment | |
38 | , SExprSpec | |
39 | , setComment | |
40 | ) | |
41 | 41 | |
42 | 42 | -- | Given a string, produce a comment parser that matches that |
43 | 43 | -- initial string and ignores everything until the end of the |
57 | 57 | -- |
58 | 58 | -- > { this { comment } |
59 | 59 | -- |
60 | -- to be a complete comment, despite the improper nesting. This is | |
61 | -- analogous to standard C-style comments in which | |
60 | -- to be a complete comment, despite the apparent improper nesting. | |
61 | -- This is analogous to standard C-style comments in which | |
62 | 62 | -- |
63 | 63 | -- > /* this /* comment */ |
64 | 64 | -- |
124 | 124 | For example: |
125 | 125 | |
126 | 126 | > mySpec :: SExprSpec Text (SExpr Text) |
127 |
> mySpec = asWellFormed |
|
127 | > mySpec = asWellFormed $ mkSpec (pack <$> many1 alphaNum) id | |
128 | 128 | > |
129 | 129 | > myLispySpec :: SExprSpec Text (SExpr Text) |
130 | 130 | > myLispySpec = withLispComments mySpec |
136 | 136 | comment syntaxes: |
137 | 137 | |
138 | 138 | > > decode mySpec "(foo ; a lisp comment\n bar)\n" |
139 |
> Left " |
|
139 | > Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom" | |
140 | 140 | > > decode myLispySpec "(foo ; a lisp comment\n bar)\n" |
141 | 141 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] |
142 | 142 | > > decode mySpec "(foo /* a c-like\n comment */ bar)\n" |
143 |
> Left " |
|
143 | > Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom" | |
144 | 144 | > > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" |
145 | 145 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] |
146 | 146 |
27 | 27 | import Data.Char (isAlpha, isDigit, isAlphaNum) |
28 | 28 | import Data.Map.Strict (Map) |
29 | 29 | import qualified Data.Map.Strict as M |
30 | import Data.Maybe (fromJust) | |
30 | 31 | import Data.Monoid ((<>)) |
31 | 32 | import Data.String (IsString) |
32 | 33 | import Data.Text (Text, pack, unpack) |
33 | 34 | import qualified Data.Text as T |
34 | 35 | import Text.Parsec ( (<|>) |
36 | , (<?>) | |
35 | 37 | , char |
36 | 38 | , eof |
37 | 39 | , lookAhead |
80 | 82 | -- any alphanumeric sequence as a valid atom looks like: |
81 | 83 | -- |
82 | 84 | -- > simpleSpec :: SExprSpec Text (SExpr Text) |
83 |
-- > simpleSpec = mkSpec ( |
|
85 | -- > simpleSpec = mkSpec (pack <$> many1 isAlphaNum) id | |
84 | 86 | mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom) |
85 | 87 | mkSpec p s = SExprSpec |
86 |
{ sesPAtom = p |
|
88 | { sesPAtom = p <?> "atom" | |
87 | 89 | , sesSAtom = s |
88 | 90 | , readerMap = M.empty |
89 | 91 | , comment = Nothing |
101 | 103 | -- the internal S-expression representation using 'asWellFormed', and |
102 | 104 | -- then providing a conversion between the 'WellFormedSExpr' type and |
103 | 105 | -- an @Expr@ AST. Notice that the below parser uses 'String' as its |
104 |
-- underlying atom type |
|
106 | -- underlying atom type, instead of some token type. | |
105 | 107 | -- |
106 | 108 | -- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show) |
107 | 109 | -- > |
108 | 110 | -- > toExpr :: WellFormedSExpr String -> Either String Expr |
109 | -- > toExpr (WFSList [WFSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
110 | -- > toExpr (WFSAtom c) | all isDigit c = pure (Num (read c)) | |
111 |
-- > toExpr |
|
111 | -- > toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
112 | -- > toExpr (A c) | all isDigit c = pure (Num (read c)) | |
113 | -- > toExpr c = Left ("Invalid expr: " ++ show c) | |
112 | 114 | -- > |
113 | 115 | -- > fromExpr :: Expr -> WellFormedSExpr String |
114 | -- > fromExpr (Add l r) = WFSList [WFSAtom "+", fromExpr l, fromExpr r] | |
115 | -- > fromExpr (Num n) = WFSAtom (show n) | |
116 | -- > fromExpr (Add l r) = L [A "+", fromExpr l, fromExpr r] | |
117 | -- > fromExpr (Num n) = A (show n) | |
116 | 118 | -- > |
117 | 119 | -- > mySpec :: SExprSpec String Expr |
118 | 120 | -- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack |
119 |
-- > where parser = |
|
121 | -- > where parser = many1 (satisfy isValidChar) | |
120 | 122 | -- > isValidChar c = isDigit c || c == '+' |
121 | 123 | convertSpec :: (b -> Either String c) -> (c -> b) |
122 | 124 | -> SExprSpec a b -> SExprSpec a c |
143 | 145 | -- stream. |
144 | 146 | -- |
145 | 147 | -- The following defines an S-expression variant that treats |
146 | -- @'expr@ as being sugar for @(quote expr)@: | |
147 | -- | |
148 | -- > mySpec :: SExprSpec Text (SExpr Text) | |
149 | -- > mySpec = addReader '\'' reader $ mkSpec (takeWhile1 isAlphaNum) id | |
148 | -- @'expr@ as being sugar for @(quote expr)@. Note that this is done | |
149 | -- already in a more general way by the 'withQuote' function, but | |
150 | -- it is a good illustration of using reader macros in practice: | |
151 | -- | |
152 | -- > mySpec :: SExprSpec String (SExpr Text) | |
153 | -- > mySpec = addReader '\'' reader $ mkSpec (many1 alphaNum) pack | |
150 | 154 | -- > where reader p = quote <$> p |
151 | 155 | -- > quote e = SCons (SAtom "quote") (SCons e SNil) |
152 | 156 | addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c |
163 | 167 | -- C++-style comments, i.e. those which begin with @//@ and last |
164 | 168 | -- until the end of a line: |
165 | 169 | -- |
166 | -- > t :: SExprSpec Text (SExpr Text) | |
167 | -- > t = setComment comm $ mkSpec (takeWhile1 isAlphaNum) id | |
168 |
-- > |
|
170 | -- > t :: SExprSpec String (SExpr Text) | |
171 | -- > t = setComment comm $ mkSpec (many1 alphaNum) pack | |
172 | -- > where comm = try (string "//" *> manyTill newline *> pure ()) | |
169 | 173 | |
170 | 174 | setComment :: Comment -> SExprSpec a c -> SExprSpec a c |
171 |
setComment c spec = spec { comment = Just |
|
175 | setComment c spec = spec { comment = Just (c <?> "comment") } | |
172 | 176 | |
173 | 177 | -- | Add the ability to understand a quoted S-Expression. In general, |
174 | 178 | -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is |
185 | 189 | parseGenericSExpr :: |
186 | 190 | Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) |
187 | 191 | parseGenericSExpr atom reader skip = do |
188 |
let sExpr = parseGenericSExpr atom reader skip |
|
192 | let sExpr = parseGenericSExpr atom reader skip <?> "s-expr" | |
189 | 193 | skip |
190 | 194 | c <- peekChar |
191 | 195 | r <- case c of |
116 | 116 | |
117 | 117 | ~~~~.haskell |
118 | 118 | spec :: SExprSpec Text (SExpr Text) |
119 |
spec = mkSpec ( |
|
119 | spec = mkSpec (pack <$> many1 (alphaNum <|> oneOf "+-*/!?")) id | |
120 | 120 | ~~~~ |
121 | 121 | |
122 | 122 | A more elaborate atom type would distinguish between different |
124 | 124 | identifiers and numbers) is |
125 | 125 | |
126 | 126 | ~~~~.haskell |
127 | import Data.Char (isDigit, isAlpha) | |
128 | import Data.Text (Text) | |
129 |
import |
|
127 | import Data.Text (Text, pack) | |
130 | 128 | |
131 | 129 | data Atom = Ident Text | Num Int deriving (Eq, Show) |
132 | 130 | |
133 | 131 | pAtom :: Parser Atom |
134 | pAtom = ((Num . read . T.unpack) <$> takeWhile1 isDigit) | |
135 | <|> (Ident <$> takeWhile1 isAlpha) | |
132 | pAtom = ((Num . read) <$> many1 digit) | |
133 | <|> (Ident . pack) <$> takeWhile1 isAlpha) | |
136 | 134 | |
137 | 135 | sAtom :: Atom -> Text |
138 | 136 | sAtom (Ident t) = t |
139 |
sAtom (Num n) = |
|
137 | sAtom (Num n) = pack (show n) | |
140 | 138 | |
141 | 139 | mySpec :: SExprSpec Atom (SExpr Atom) |
142 | 140 | mySpec = mkSpec pAtom sAtom |
172 | 170 | data Expr = Add Expr Expr | Num Int deriving (Eq, Show) |
173 | 171 | |
174 | 172 | toExpr :: RichSExpr Text -> Either String Expr |
175 | toExpr (RSList [RSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
176 | toExpr (RSAtom c) | |
173 | toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
174 | toExpr (A c) | |
177 | 175 | | T.all isDigit c = pure (Num (read (T.unpack c))) |
178 | 176 | | otherwise = Left "Non-numeric token as argument" |
179 | 177 | toExpr _ = Left "Unrecognized s-expr" |
180 | 178 | |
181 | 179 | fromExpr :: Expr -> RichSExpr Text |
182 | fromExpr (Add x y) = RSList [RSAtom "+", fromExpr x, fromExpr y] | |
183 | fromExpr (Num n) = RSAtom (T.pack (show n)) | |
180 | fromExpr (Add x y) = L [A "+", fromExpr x, fromExpr y] | |
181 | fromExpr (Num n) = A (T.pack (show n)) | |
184 | 182 | ~~~~ |
185 | 183 | |
186 | 184 | then we could use the `convertSpec` function to add this directly to |
196 | 194 | ## Comments |
197 | 195 | |
198 | 196 | By default, an S-expression spec does not include a comment syntax, but |
199 |
the provided `with |
|
197 | the provided `withLispComments` function will cause it to understand | |
200 | 198 | traditional Lisp line-oriented comments that begin with a semicolon: |
201 | 199 | |
202 | 200 | ~~~~.haskell |
203 | 201 | > decode spec "(this ; has a comment\n inside)\n" |
204 | Left "Failed reading: takeWhile1" | |
205 | > decode (withSemicolonComments spec) "(this ; has a comment\n inside)\n" | |
202 | Left "(line 1, column 7):\nunexpected \";\"\nexpecting space or atom" | |
203 | > decode (withLispComments spec) "(this ; has a comment\n inside)\n" | |
206 | 204 | Right [SCons (SAtom "this") (SCons (SAtom "inside") SNil)] |
207 | 205 | ~~~~ |
208 | 206 | |
217 | 215 | For example, the following adds C++-style comments to an S-expression format: |
218 | 216 | |
219 | 217 | ~~~~.haskell |
220 |
> let cppComment = string "//" >> |
|
218 | > let cppComment = string "//" >> manyTill newline >> return () | |
221 | 219 | > decode (setComment cppComment spec) "(a //comment\n b)\n" |
222 | 220 | Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)] |
223 | 221 | ~~~~ |
227 | 225 | A _reader macro_ is a Lisp macro which is invoked during read time. This |
228 | 226 | allows the _lexical_ syntax of a Lisp to be modified. The most commonly |
229 | 227 | seen reader macro is the quote, which allows the syntax `'expr` to stand |
230 | in for the s-expression `(quote expr)`. The S-Cargot library enables this | |
231 | by keeping a map of characters to Parsec parsers that can be used as | |
228 | in for the s-expression `(quote expr)`. The S-Cargot library accomodates | |
229 | this by keeping a map of characters to Parsec parsers that can be used as | |
232 | 230 | readers. There is a special case for the aforementioned quote, but that |
233 | 231 | could easily be written by hand as |
234 | 232 | |
272 | 270 | literals: |
273 | 271 | |
274 | 272 | ~~~~.haskell |
275 | data Op = Add | Sub | Mul | |
276 | data Atom = AOp Op | ANum Int | |
273 | data Op = Add | Sub | Mul deriving (Eq, Show) | |
274 | data Atom = AOp Op | ANum Int deriving (Eq, Show) | |
277 | 275 | data Expr = EOp Op Expr Expr | ENum Int deriving (Eq, Show) |
278 | 276 | |
279 | 277 | -- Conversions for our Expr type |
288 | 286 | |
289 | 287 | -- Parser and serializer for our Atom type |
290 | 288 | pAtom :: Parser Atom |
291 |
pAtom = ((ANum . read . T.unpack) <$> |
|
289 | pAtom = ((ANum . read . T.unpack) <$> many1 isDigit) | |
292 | 290 | <|> (char "+" *> pure (AOp Add)) |
293 | 291 | <|> (char "-" *> pure (AOp Sub)) |
294 | 292 | <|> (char "*" *> pure (AOp Mul)) |
301 | 299 | |
302 | 300 | -- Our comment syntax |
303 | 301 | hsComment :: Parser () |
304 |
hsComment = string "--" >> |
|
302 | hsComment = string "--" >> manyTill newline >> return () | |
305 | 303 | |
306 | 304 | -- Our custom reader macro |
307 | 305 | hexReader :: Reader Atom |