More documentation updates for Parsec change
Getty Ritter
10 years ago
| 34 | 34 | , string |
| 35 | 35 | ) |
| 36 | 36 | |
| 37 | import Data.SCargot.General ( Comment | |
| 38 | , SExprSpec | |
| 39 | , setComment | |
| 40 | ) | |
| 37 | import Data.SCargot.General ( Comment | |
| 38 | , SExprSpec | |
| 39 | , setComment | |
| 40 | ) | |
| 41 | 41 | |
| 42 | 42 | -- | Given a string, produce a comment parser that matches that |
| 43 | 43 | -- initial string and ignores everything until the end of the |
| 57 | 57 | -- |
| 58 | 58 | -- > { this { comment } |
| 59 | 59 | -- |
| 60 | -- to be a complete comment, despite the improper nesting. This is | |
| 61 | -- analogous to standard C-style comments in which | |
| 60 | -- to be a complete comment, despite the apparent improper nesting. | |
| 61 | -- This is analogous to standard C-style comments in which | |
| 62 | 62 | -- |
| 63 | 63 | -- > /* this /* comment */ |
| 64 | 64 | -- |
| 124 | 124 | For example: |
| 125 | 125 | |
| 126 | 126 | > mySpec :: SExprSpec Text (SExpr Text) |
| 127 |
> mySpec = asWellFormed |
|
| 127 | > mySpec = asWellFormed $ mkSpec (pack <$> many1 alphaNum) id | |
| 128 | 128 | > |
| 129 | 129 | > myLispySpec :: SExprSpec Text (SExpr Text) |
| 130 | 130 | > myLispySpec = withLispComments mySpec |
| 136 | 136 | comment syntaxes: |
| 137 | 137 | |
| 138 | 138 | > > decode mySpec "(foo ; a lisp comment\n bar)\n" |
| 139 |
> Left " |
|
| 139 | > Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom" | |
| 140 | 140 | > > decode myLispySpec "(foo ; a lisp comment\n bar)\n" |
| 141 | 141 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] |
| 142 | 142 | > > decode mySpec "(foo /* a c-like\n comment */ bar)\n" |
| 143 |
> Left " |
|
| 143 | > Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom" | |
| 144 | 144 | > > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" |
| 145 | 145 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] |
| 146 | 146 | |
| 27 | 27 | import Data.Char (isAlpha, isDigit, isAlphaNum) |
| 28 | 28 | import Data.Map.Strict (Map) |
| 29 | 29 | import qualified Data.Map.Strict as M |
| 30 | import Data.Maybe (fromJust) | |
| 30 | 31 | import Data.Monoid ((<>)) |
| 31 | 32 | import Data.String (IsString) |
| 32 | 33 | import Data.Text (Text, pack, unpack) |
| 33 | 34 | import qualified Data.Text as T |
| 34 | 35 | import Text.Parsec ( (<|>) |
| 36 | , (<?>) | |
| 35 | 37 | , char |
| 36 | 38 | , eof |
| 37 | 39 | , lookAhead |
| 80 | 82 | -- any alphanumeric sequence as a valid atom looks like: |
| 81 | 83 | -- |
| 82 | 84 | -- > simpleSpec :: SExprSpec Text (SExpr Text) |
| 83 |
-- > simpleSpec = mkSpec ( |
|
| 85 | -- > simpleSpec = mkSpec (pack <$> many1 isAlphaNum) id | |
| 84 | 86 | mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom) |
| 85 | 87 | mkSpec p s = SExprSpec |
| 86 |
{ sesPAtom = p |
|
| 88 | { sesPAtom = p <?> "atom" | |
| 87 | 89 | , sesSAtom = s |
| 88 | 90 | , readerMap = M.empty |
| 89 | 91 | , comment = Nothing |
| 101 | 103 | -- the internal S-expression representation using 'asWellFormed', and |
| 102 | 104 | -- then providing a conversion between the 'WellFormedSExpr' type and |
| 103 | 105 | -- an @Expr@ AST. Notice that the below parser uses 'String' as its |
| 104 |
-- underlying atom type |
|
| 106 | -- underlying atom type, instead of some token type. | |
| 105 | 107 | -- |
| 106 | 108 | -- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show) |
| 107 | 109 | -- > |
| 108 | 110 | -- > toExpr :: WellFormedSExpr String -> Either String Expr |
| 109 | -- > toExpr (WFSList [WFSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
| 110 | -- > toExpr (WFSAtom c) | all isDigit c = pure (Num (read c)) | |
| 111 |
-- > toExpr |
|
| 111 | -- > toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
| 112 | -- > toExpr (A c) | all isDigit c = pure (Num (read c)) | |
| 113 | -- > toExpr c = Left ("Invalid expr: " ++ show c) | |
| 112 | 114 | -- > |
| 113 | 115 | -- > fromExpr :: Expr -> WellFormedSExpr String |
| 114 | -- > fromExpr (Add l r) = WFSList [WFSAtom "+", fromExpr l, fromExpr r] | |
| 115 | -- > fromExpr (Num n) = WFSAtom (show n) | |
| 116 | -- > fromExpr (Add l r) = L [A "+", fromExpr l, fromExpr r] | |
| 117 | -- > fromExpr (Num n) = A (show n) | |
| 116 | 118 | -- > |
| 117 | 119 | -- > mySpec :: SExprSpec String Expr |
| 118 | 120 | -- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack |
| 119 |
-- > where parser = |
|
| 121 | -- > where parser = many1 (satisfy isValidChar) | |
| 120 | 122 | -- > isValidChar c = isDigit c || c == '+' |
| 121 | 123 | convertSpec :: (b -> Either String c) -> (c -> b) |
| 122 | 124 | -> SExprSpec a b -> SExprSpec a c |
| 143 | 145 | -- stream. |
| 144 | 146 | -- |
| 145 | 147 | -- The following defines an S-expression variant that treats |
| 146 | -- @'expr@ as being sugar for @(quote expr)@: | |
| 147 | -- | |
| 148 | -- > mySpec :: SExprSpec Text (SExpr Text) | |
| 149 | -- > mySpec = addReader '\'' reader $ mkSpec (takeWhile1 isAlphaNum) id | |
| 148 | -- @'expr@ as being sugar for @(quote expr)@. Note that this is done | |
| 149 | -- already in a more general way by the 'withQuote' function, but | |
| 150 | -- it is a good illustration of using reader macros in practice: | |
| 151 | -- | |
| 152 | -- > mySpec :: SExprSpec String (SExpr Text) | |
| 153 | -- > mySpec = addReader '\'' reader $ mkSpec (many1 alphaNum) pack | |
| 150 | 154 | -- > where reader p = quote <$> p |
| 151 | 155 | -- > quote e = SCons (SAtom "quote") (SCons e SNil) |
| 152 | 156 | addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c |
| 163 | 167 | -- C++-style comments, i.e. those which begin with @//@ and last |
| 164 | 168 | -- until the end of a line: |
| 165 | 169 | -- |
| 166 | -- > t :: SExprSpec Text (SExpr Text) | |
| 167 | -- > t = setComment comm $ mkSpec (takeWhile1 isAlphaNum) id | |
| 168 |
-- > |
|
| 170 | -- > t :: SExprSpec String (SExpr Text) | |
| 171 | -- > t = setComment comm $ mkSpec (many1 alphaNum) pack | |
| 172 | -- > where comm = try (string "//" *> manyTill newline *> pure ()) | |
| 169 | 173 | |
| 170 | 174 | setComment :: Comment -> SExprSpec a c -> SExprSpec a c |
| 171 |
setComment c spec = spec { comment = Just |
|
| 175 | setComment c spec = spec { comment = Just (c <?> "comment") } | |
| 172 | 176 | |
| 173 | 177 | -- | Add the ability to understand a quoted S-Expression. In general, |
| 174 | 178 | -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is |
| 185 | 189 | parseGenericSExpr :: |
| 186 | 190 | Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) |
| 187 | 191 | parseGenericSExpr atom reader skip = do |
| 188 |
let sExpr = parseGenericSExpr atom reader skip |
|
| 192 | let sExpr = parseGenericSExpr atom reader skip <?> "s-expr" | |
| 189 | 193 | skip |
| 190 | 194 | c <- peekChar |
| 191 | 195 | r <- case c of |
| 116 | 116 | |
| 117 | 117 | ~~~~.haskell |
| 118 | 118 | spec :: SExprSpec Text (SExpr Text) |
| 119 |
spec = mkSpec ( |
|
| 119 | spec = mkSpec (pack <$> many1 (alphaNum <|> oneOf "+-*/!?")) id | |
| 120 | 120 | ~~~~ |
| 121 | 121 | |
| 122 | 122 | A more elaborate atom type would distinguish between different |
| 124 | 124 | identifiers and numbers) is |
| 125 | 125 | |
| 126 | 126 | ~~~~.haskell |
| 127 | import Data.Char (isDigit, isAlpha) | |
| 128 | import Data.Text (Text) | |
| 129 |
import |
|
| 127 | import Data.Text (Text, pack) | |
| 130 | 128 | |
| 131 | 129 | data Atom = Ident Text | Num Int deriving (Eq, Show) |
| 132 | 130 | |
| 133 | 131 | pAtom :: Parser Atom |
| 134 | pAtom = ((Num . read . T.unpack) <$> takeWhile1 isDigit) | |
| 135 | <|> (Ident <$> takeWhile1 isAlpha) | |
| 132 | pAtom = ((Num . read) <$> many1 digit) | |
| 133 | <|> (Ident . pack) <$> takeWhile1 isAlpha) | |
| 136 | 134 | |
| 137 | 135 | sAtom :: Atom -> Text |
| 138 | 136 | sAtom (Ident t) = t |
| 139 |
sAtom (Num n) = |
|
| 137 | sAtom (Num n) = pack (show n) | |
| 140 | 138 | |
| 141 | 139 | mySpec :: SExprSpec Atom (SExpr Atom) |
| 142 | 140 | mySpec = mkSpec pAtom sAtom |
| 172 | 170 | data Expr = Add Expr Expr | Num Int deriving (Eq, Show) |
| 173 | 171 | |
| 174 | 172 | toExpr :: RichSExpr Text -> Either String Expr |
| 175 | toExpr (RSList [RSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
| 176 | toExpr (RSAtom c) | |
| 173 | toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
| 174 | toExpr (A c) | |
| 177 | 175 | | T.all isDigit c = pure (Num (read (T.unpack c))) |
| 178 | 176 | | otherwise = Left "Non-numeric token as argument" |
| 179 | 177 | toExpr _ = Left "Unrecognized s-expr" |
| 180 | 178 | |
| 181 | 179 | fromExpr :: Expr -> RichSExpr Text |
| 182 | fromExpr (Add x y) = RSList [RSAtom "+", fromExpr x, fromExpr y] | |
| 183 | fromExpr (Num n) = RSAtom (T.pack (show n)) | |
| 180 | fromExpr (Add x y) = L [A "+", fromExpr x, fromExpr y] | |
| 181 | fromExpr (Num n) = A (T.pack (show n)) | |
| 184 | 182 | ~~~~ |
| 185 | 183 | |
| 186 | 184 | then we could use the `convertSpec` function to add this directly to |
| 196 | 194 | ## Comments |
| 197 | 195 | |
| 198 | 196 | By default, an S-expression spec does not include a comment syntax, but |
| 199 |
the provided `with |
|
| 197 | the provided `withLispComments` function will cause it to understand | |
| 200 | 198 | traditional Lisp line-oriented comments that begin with a semicolon: |
| 201 | 199 | |
| 202 | 200 | ~~~~.haskell |
| 203 | 201 | > decode spec "(this ; has a comment\n inside)\n" |
| 204 | Left "Failed reading: takeWhile1" | |
| 205 | > decode (withSemicolonComments spec) "(this ; has a comment\n inside)\n" | |
| 202 | Left "(line 1, column 7):\nunexpected \";\"\nexpecting space or atom" | |
| 203 | > decode (withLispComments spec) "(this ; has a comment\n inside)\n" | |
| 206 | 204 | Right [SCons (SAtom "this") (SCons (SAtom "inside") SNil)] |
| 207 | 205 | ~~~~ |
| 208 | 206 | |
| 217 | 215 | For example, the following adds C++-style comments to an S-expression format: |
| 218 | 216 | |
| 219 | 217 | ~~~~.haskell |
| 220 |
> let cppComment = string "//" >> |
|
| 218 | > let cppComment = string "//" >> manyTill newline >> return () | |
| 221 | 219 | > decode (setComment cppComment spec) "(a //comment\n b)\n" |
| 222 | 220 | Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)] |
| 223 | 221 | ~~~~ |
| 227 | 225 | A _reader macro_ is a Lisp macro which is invoked during read time. This |
| 228 | 226 | allows the _lexical_ syntax of a Lisp to be modified. The most commonly |
| 229 | 227 | seen reader macro is the quote, which allows the syntax `'expr` to stand |
| 230 | in for the s-expression `(quote expr)`. The S-Cargot library enables this | |
| 231 | by keeping a map of characters to Parsec parsers that can be used as | |
| 228 | in for the s-expression `(quote expr)`. The S-Cargot library accomodates | |
| 229 | this by keeping a map of characters to Parsec parsers that can be used as | |
| 232 | 230 | readers. There is a special case for the aforementioned quote, but that |
| 233 | 231 | could easily be written by hand as |
| 234 | 232 | |
| 272 | 270 | literals: |
| 273 | 271 | |
| 274 | 272 | ~~~~.haskell |
| 275 | data Op = Add | Sub | Mul | |
| 276 | data Atom = AOp Op | ANum Int | |
| 273 | data Op = Add | Sub | Mul deriving (Eq, Show) | |
| 274 | data Atom = AOp Op | ANum Int deriving (Eq, Show) | |
| 277 | 275 | data Expr = EOp Op Expr Expr | ENum Int deriving (Eq, Show) |
| 278 | 276 | |
| 279 | 277 | -- Conversions for our Expr type |
| 288 | 286 | |
| 289 | 287 | -- Parser and serializer for our Atom type |
| 290 | 288 | pAtom :: Parser Atom |
| 291 |
pAtom = ((ANum . read . T.unpack) <$> |
|
| 289 | pAtom = ((ANum . read . T.unpack) <$> many1 isDigit) | |
| 292 | 290 | <|> (char "+" *> pure (AOp Add)) |
| 293 | 291 | <|> (char "-" *> pure (AOp Sub)) |
| 294 | 292 | <|> (char "*" *> pure (AOp Mul)) |
| 301 | 299 | |
| 302 | 300 | -- Our comment syntax |
| 303 | 301 | hsComment :: Parser () |
| 304 |
hsComment = string "--" >> |
|
| 302 | hsComment = string "--" >> manyTill newline >> return () | |
| 305 | 303 | |
| 306 | 304 | -- Our custom reader macro |
| 307 | 305 | hexReader :: Reader Atom |