Big final bunch of refactors/documentation bits in preparation for Hackage release
Getty Ritter
9 years ago
1 | {-# LANGUAGE OverloadedStrings #-} | |
2 | ||
3 | module Data.SCargot.Basic | |
4 | ( -- * Spec | |
5 | -- $descr | |
6 | basicSpec | |
7 | ) where | |
8 | ||
9 | import Control.Applicative ((<$>)) | |
10 | import Data.Char (isAlphaNum) | |
11 | import Text.Parsec (many1, satisfy) | |
12 | import Data.Text (Text, pack) | |
13 | ||
14 | import Data.SCargot.Repr.Basic (SExpr) | |
15 | import Data.SCargot.General ( SExprSpec | |
16 | , mkSpec | |
17 | ) | |
18 | import Data.SCargot.Comments (withLispComments) | |
19 | ||
20 | isAtomChar :: Char -> Bool | |
21 | isAtomChar c = isAlphaNum c | |
22 | || c == '-' || c == '*' || c == '/' | |
23 | || c == '+' || c == '<' || c == '>' | |
24 | || c == '=' || c == '!' || c == '?' | |
25 | ||
26 | -- $descr | |
27 | -- The 'basicSpec' describes S-expressions whose atoms are simply | |
28 | -- text strings that contain alphanumeric characters and a small | |
29 | -- set of punctuation. It does no parsing of numbers or other data | |
30 | -- types, and will accept tokens that typical Lisp implementations | |
31 | -- would find nonsensical (like @77foo@). | |
32 | -- | |
33 | -- Atoms recognized by the 'basicSpec' are any string matching the | |
34 | -- regular expression @[A-Za-z0-9+*<>/=!?-]+@. | |
35 | ||
36 | -- | A 'SExprSpec' that understands atoms to be sequences of | |
37 | -- alphanumeric characters as well as the punctuation | |
38 | -- characters @[-*/+<>=!?]@, and does no processing of them. | |
39 | basicSpec :: SExprSpec Text (SExpr Text) | |
40 | basicSpec = mkSpec pToken id | |
41 | where pToken = pack <$> many1 (satisfy isAtomChar) |
4 | 4 | ( -- $intro |
5 | 5 | |
6 | 6 | -- * Lisp-Style Syntax |
7 | ||
7 | 8 | -- $lisp |
8 | 9 | withLispComments |
9 | 10 | -- * Other Existing Comment Syntaxes |
35 | 36 | , string |
36 | 37 | ) |
37 | 38 | |
38 | import Data.SCargot.General ( Comment | |
39 | , SExprSpec | |
40 | , setComment | |
41 | ) | |
39 | import Data.SCargot.Parse ( Comment | |
40 | , SExprParser | |
41 | , setComment | |
42 | ) | |
42 | 43 | |
43 | 44 | -- | Given a string, produce a comment parser that matches that |
44 | 45 | -- initial string and ignores everything until the end of the |
73 | 74 | -- | Lisp-style line-oriented comments start with @;@ and last |
74 | 75 | -- until the end of the line. This is usually the comment |
75 | 76 | -- syntax you want. |
76 |
withLispComments :: SExpr |
|
77 | withLispComments :: SExprParser t a -> SExprParser t a | |
77 | 78 | withLispComments = setComment (lineComment ";") |
78 | 79 | |
79 | 80 | -- | C++-like line-oriented comment start with @//@ and last |
80 | 81 | -- until the end of the line. |
81 |
withCLikeLineComments :: SExpr |
|
82 | withCLikeLineComments :: SExprParser t a -> SExprParser t a | |
82 | 83 | withCLikeLineComments = setComment (lineComment "//") |
83 | 84 | |
84 | 85 | -- | C-like block comments start with @/*@ and end with @*/@. |
85 | 86 | -- They do not nest. |
86 |
withCLikeBlockComments :: SExpr |
|
87 | withCLikeBlockComments :: SExprParser t a -> SExprParser t a | |
87 | 88 | withCLikeBlockComments = setComment (simpleBlockComment "/*" "*/") |
88 | 89 | |
89 | 90 | -- | C-like comments include both line- and block-comments, the |
90 | 91 | -- former starting with @//@ and the latter contained within |
91 | 92 | -- @//* ... *//@. |
92 |
withCLikeComments :: SExpr |
|
93 | withCLikeComments :: SExprParser t a -> SExprParser t a | |
93 | 94 | withCLikeComments = setComment (lineComment "//" <|> |
94 | 95 | simpleBlockComment "/*" "*/") |
95 | 96 | |
96 | 97 | -- | Haskell line-oriented comments start with @--@ and last |
97 | 98 | -- until the end of the line. |
98 |
withHaskellLineComments :: SExpr |
|
99 | withHaskellLineComments :: SExprParser t a -> SExprParser t a | |
99 | 100 | withHaskellLineComments = setComment (lineComment "--") |
100 | 101 | |
101 | 102 | -- | Haskell block comments start with @{-@ and end with @-}@. |
102 | 103 | -- They do not nest. |
103 |
withHaskellBlockComments :: SExpr |
|
104 | withHaskellBlockComments :: SExprParser t a -> SExprParser t a | |
104 | 105 | withHaskellBlockComments = setComment (simpleBlockComment "{-" "-}") |
105 | 106 | |
106 | 107 | -- | Haskell comments include both the line-oriented @--@ comments |
107 | 108 | -- and the block-oriented @{- ... -}@ comments |
108 |
withHaskellComments :: SExpr |
|
109 | withHaskellComments :: SExprParser t a -> SExprParser t a | |
109 | 110 | withHaskellComments = setComment (lineComment "--" <|> |
110 | 111 | simpleBlockComment "{-" "-}") |
111 | 112 | |
112 | 113 | -- | Many scripting and shell languages use these, which begin with |
113 | 114 | -- @#@ and last until the end of the line. |
114 |
withOctothorpeComments :: SExpr |
|
115 | withOctothorpeComments :: SExprParser t a -> SExprParser t a | |
115 | 116 | withOctothorpeComments = setComment (lineComment "#") |
116 | 117 | |
117 | 118 | |
118 | 119 | {- $intro |
119 | 120 | |
120 |
By default a 'SExpr |
|
121 | By default a 'SExprParser' will not understand any kind of comment | |
121 | 122 | syntax. Most varieties of s-expression will, however, want some kind |
122 | 123 | of commenting capability, so the below functions will produce a new |
123 |
'SExpr |
|
124 | 'SExprParser' which understands various kinds of comment syntaxes. | |
124 | 125 | |
125 | 126 | For example: |
126 | 127 | |
127 | > mySpec :: SExprSpec Text (SExpr Text) | |
128 | > mySpec = asWellFormed $ mkSpec (pack <$> many1 alphaNum) id | |
128 | > mySpec :: SExprParser Text (SExpr Text) | |
129 | > mySpec = asWellFormed $ mkParser (pack <$> many1 alphaNum) | |
129 | 130 | > |
130 |
> myLispySpec :: SExpr |
|
131 | > myLispySpec :: SExprParser Text (SExpr Text) | |
131 | 132 | > myLispySpec = withLispComments mySpec |
132 | 133 | > |
133 |
> myCLikeSpec :: SExpr |
|
134 | > myCLikeSpec :: SExprParser Text (SExpr Text) | |
134 | 135 | > myCLikeSpec = withCLikeComment mySpec |
135 | 136 | |
136 | 137 | We can then use these to parse s-expressions with different kinds of |
137 | 138 | comment syntaxes: |
138 | 139 | |
139 | > > decode mySpec "(foo ; a lisp comment\n bar)\n" | |
140 | > Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom" | |
141 | > > decode myLispySpec "(foo ; a lisp comment\n bar)\n" | |
142 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
143 | > > decode mySpec "(foo /* a c-like\n comment */ bar)\n" | |
144 | > Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom" | |
145 | > > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" | |
146 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
140 | >>> decode mySpec "(foo ; a lisp comment\n bar)\n" | |
141 | Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom" | |
142 | >>> decode myLispySpec "(foo ; a lisp comment\n bar)\n" | |
143 | Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
144 | >>> decode mySpec "(foo /* a c-like\n comment */ bar)\n" | |
145 | Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom" | |
146 | >>> decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" | |
147 | Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
147 | 148 | |
148 | 149 | -} |
149 | 150 |
4 | 4 | , parseR6RSIdent |
5 | 5 | , parseR7RSIdent |
6 | 6 | -- * Numeric Literal Parsers |
7 | , signed | |
8 | , prefixedNumber | |
9 | , signedPrefixedNumber | |
7 | 10 | , binNumber |
8 | 11 | , signedBinNumber |
9 | 12 | , octNumber |
14 | 17 | , signedDozNumber |
15 | 18 | , hexNumber |
16 | 19 | , signedHexNumber |
17 | , signed | |
18 | 20 | ) where |
19 | 21 | |
20 | 22 | import Data.Char |
42 | 44 | |
43 | 45 | -- | Parse an identifier according to the R6RS Scheme standard. An |
44 | 46 | -- R6RS identifier may include inline hexadecimal escape sequences |
45 |
-- so that, for example, @foo@ is equivalent to @f\ |
|
47 | -- so that, for example, @foo@ is equivalent to @f\\x6f;o@, and is | |
46 | 48 | -- more liberal than R5RS as to which Unicode characters it may |
47 | 49 | -- accept. |
48 | 50 | parseR6RSIdent :: Parser Text |
141 | 143 | signed :: Num a => Parser a -> Parser a |
142 | 144 | signed p = ($) <$> sign <*> p |
143 | 145 | |
146 | -- | Parses a number in the same way as 'prefixedNumber', with an optional | |
147 | -- leading @+@ or @-@. | |
148 | signedPrefixedNumber :: Parser Integer | |
149 | signedPrefixedNumber = signed prefixedNumber | |
150 | ||
151 | -- | Parses a number, determining which numeric base to use by examining | |
152 | -- the literal's prefix: @0x@ for a hexadecimal number, @0z@ for a | |
153 | -- dozenal number, @0o@ for an octal number, and @0b@ for a binary | |
154 | -- number (as well as the upper-case versions of the same.) If the | |
155 | -- base is omitted entirely, then it is treated as a decimal number. | |
156 | prefixedNumber :: Parser Integer | |
157 | prefixedNumber = (string "0x" <|> string "0X") *> hexNumber | |
158 | <|> (string "0o" <|> string "0O") *> octNumber | |
159 | <|> (string "0z" <|> string "0Z") *> dozNumber | |
160 | <|> (string "0b" <|> string "0B") *> binNumber | |
161 | <|> decNumber | |
162 | ||
144 | 163 | -- | A parser for non-signed binary numbers |
145 | 164 | binNumber :: Parser Integer |
146 | 165 | binNumber = number 2 (char '0' <|> char '1') |
147 | 166 | |
148 | 167 | -- | A parser for signed binary numbers, with an optional leading @+@ or @-@. |
149 | 168 | signedBinNumber :: Parser Integer |
150 |
signedBinNumber = |
|
169 | signedBinNumber = signed binNumber | |
151 | 170 | |
152 | 171 | -- | A parser for non-signed octal numbers |
153 | 172 | octNumber :: Parser Integer |
1 | {-# LANGUAGE ViewPatterns #-} | |
2 | {-# LANGUAGE OverloadedStrings #-} | |
3 | ||
4 | module Data.SCargot.General | |
5 | ( -- * SExprSpec | |
6 | SExprSpec | |
7 | , mkSpec | |
8 | , convertSpec | |
9 | , addReader | |
10 | , setComment | |
11 | -- * Specific SExprSpec Conversions | |
12 | , asRich | |
13 | , asWellFormed | |
14 | , withQuote | |
15 | -- * Using a SExprSpec | |
16 | , decode | |
17 | , decodeOne | |
18 | , encode | |
19 | , encodeOne | |
20 | -- * Useful Type Aliases | |
21 | , Reader | |
22 | , Comment | |
23 | , Serializer | |
24 | ) where | |
25 | ||
26 | import Control.Applicative ((<*), (*>), (<*>), (<$>), pure) | |
27 | import Control.Monad ((>=>)) | |
28 | import Data.Char (isAlpha, isDigit, isAlphaNum) | |
29 | import Data.Map.Strict (Map) | |
30 | import qualified Data.Map.Strict as M | |
31 | import Data.Maybe (fromJust) | |
32 | import Data.Monoid ((<>)) | |
33 | import Data.String (IsString) | |
34 | import Data.Text (Text, pack, unpack) | |
35 | import qualified Data.Text as T | |
36 | import Text.Parsec ( (<|>) | |
37 | , (<?>) | |
38 | , char | |
39 | , eof | |
40 | , lookAhead | |
41 | , many1 | |
42 | , runParser | |
43 | , skipMany | |
44 | ) | |
45 | import Text.Parsec.Char (anyChar, space) | |
46 | import Text.Parsec.Text (Parser) | |
47 | ||
48 | import Data.SCargot.Repr ( SExpr(..) | |
49 | , RichSExpr | |
50 | , WellFormedSExpr | |
51 | , fromRich | |
52 | , toRich | |
53 | , fromWellFormed | |
54 | , toWellFormed | |
55 | ) | |
56 | ||
57 | type ReaderMacroMap atom = Map Char (Reader atom) | |
58 | ||
59 | -- | A 'Reader' represents a reader macro: it takes a parser for | |
60 | -- the S-Expression type and performs as much or as little | |
61 | -- parsing as it would like, and then returns an S-expression. | |
62 | type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom)) | |
63 | ||
64 | -- | A 'Comment' represents any kind of skippable comment. This | |
65 | -- parser __must__ be able to fail if a comment is not being | |
66 | -- recognized, and it __must__ not consume any input. | |
67 | type Comment = Parser () | |
68 | ||
69 | -- | A 'Serializer' is any function which can serialize an Atom | |
70 | -- to 'Text'. | |
71 | type Serializer atom = atom -> Text | |
72 | ||
73 | -- | A 'SExprSpec' describes a parser and emitter for a particular | |
74 | -- variant of S-Expressions. The @atom@ type corresponds to a | |
75 | -- Haskell type used to represent the atoms, and the @carrier@ | |
76 | -- type corresponds to the parsed S-Expression structure. The | |
77 | -- 'SExprSpec' type is deliberately opaque so that it must be | |
78 | -- constructed and modified with other helper functions. | |
79 | data SExprSpec atom carrier = SExprSpec | |
80 | { sesPAtom :: Parser atom | |
81 | , sesSAtom :: Serializer atom | |
82 | , readerMap :: ReaderMacroMap atom | |
83 | , comment :: Maybe Comment | |
84 | , postparse :: SExpr atom -> Either String carrier | |
85 | , preserial :: carrier -> SExpr atom | |
86 | } | |
87 | ||
88 | -- | Create a basic 'SExprSpec' when given a parser and serializer | |
89 | -- for an atom type. A small minimal 'SExprSpec' that recognizes | |
90 | -- any alphanumeric sequence as a valid atom looks like: | |
91 | -- | |
92 | -- > simpleSpec :: SExprSpec Text (SExpr Text) | |
93 | -- > simpleSpec = mkSpec (pack <$> many1 isAlphaNum) id | |
94 | mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom) | |
95 | mkSpec p s = SExprSpec | |
96 | { sesPAtom = p <?> "atom" | |
97 | , sesSAtom = s | |
98 | , readerMap = M.empty | |
99 | , comment = Nothing | |
100 | , postparse = return | |
101 | , preserial = id | |
102 | } | |
103 | ||
104 | -- | Modify the carrier type for a 'SExprSpec'. This is | |
105 | -- used internally to convert between various 'SExpr' representations, | |
106 | -- but could also be used externally to add an extra conversion layer | |
107 | -- onto a 'SExprSpec'. | |
108 | -- | |
109 | -- The following defines an S-expression spec that recognizes the | |
110 | -- language of binary addition trees. It does so by first transforming | |
111 | -- the internal S-expression representation using 'asWellFormed', and | |
112 | -- then providing a conversion between the 'WellFormedSExpr' type and | |
113 | -- an @Expr@ AST. Notice that the below parser uses 'String' as its | |
114 | -- underlying atom type, instead of some token type. | |
115 | -- | |
116 | -- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show) | |
117 | -- > | |
118 | -- > toExpr :: WellFormedSExpr String -> Either String Expr | |
119 | -- > toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
120 | -- > toExpr (A c) | all isDigit c = pure (Num (read c)) | |
121 | -- > toExpr c = Left ("Invalid expr: " ++ show c) | |
122 | -- > | |
123 | -- > fromExpr :: Expr -> WellFormedSExpr String | |
124 | -- > fromExpr (Add l r) = L [A "+", fromExpr l, fromExpr r] | |
125 | -- > fromExpr (Num n) = A (show n) | |
126 | -- > | |
127 | -- > mySpec :: SExprSpec String Expr | |
128 | -- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack | |
129 | -- > where parser = many1 (satisfy isValidChar) | |
130 | -- > isValidChar c = isDigit c || c == '+' | |
131 | convertSpec :: (b -> Either String c) -> (c -> b) | |
132 | -> SExprSpec a b -> SExprSpec a c | |
133 | convertSpec f g spec = spec | |
134 | { postparse = postparse spec >=> f | |
135 | , preserial = preserial spec . g | |
136 | } | |
137 | ||
138 | -- | Convert the final output representation from the 'SExpr' type | |
139 | -- to the 'RichSExpr' type. | |
140 | asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b) | |
141 | asRich = convertSpec (return . toRich) fromRich | |
142 | ||
143 | -- | Convert the final output representation from the 'SExpr' type | |
144 | -- to the 'WellFormedSExpr' type. | |
145 | asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b) | |
146 | asWellFormed = convertSpec toWellFormed fromWellFormed | |
147 | ||
148 | -- | Add the ability to execute some particular reader macro, as | |
149 | -- defined by its initial character and the 'Parser' which returns | |
150 | -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which | |
151 | -- can be recursively called to parse more S-Expressions, and begins | |
152 | -- parsing after the reader character has been removed from the | |
153 | -- stream. | |
154 | -- | |
155 | -- The following defines an S-expression variant that treats | |
156 | -- @'expr@ as being sugar for @(quote expr)@. Note that this is done | |
157 | -- already in a more general way by the 'withQuote' function, but | |
158 | -- it is a good illustration of using reader macros in practice: | |
159 | -- | |
160 | -- > mySpec :: SExprSpec String (SExpr Text) | |
161 | -- > mySpec = addReader '\'' reader $ mkSpec (many1 alphaNum) pack | |
162 | -- > where reader p = quote <$> p | |
163 | -- > quote e = SCons (SAtom "quote") (SCons e SNil) | |
164 | addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c | |
165 | addReader c reader spec = spec | |
166 | { readerMap = M.insert c reader (readerMap spec) } | |
167 | ||
168 | -- | Add the ability to ignore some kind of comment. This gets | |
169 | -- factored into whitespace parsing, and it's very important that | |
170 | -- the parser supplied __be able to fail__ (as otherwise it will | |
171 | -- cause an infinite loop), and also that it __not consume any input__ | |
172 | -- (which may require it to be wrapped in 'try'.) | |
173 | -- | |
174 | -- The following code defines an S-expression variant that skips | |
175 | -- C++-style comments, i.e. those which begin with @//@ and last | |
176 | -- until the end of a line: | |
177 | -- | |
178 | -- > t :: SExprSpec String (SExpr Text) | |
179 | -- > t = setComment comm $ mkSpec (many1 alphaNum) pack | |
180 | -- > where comm = try (string "//" *> manyTill newline *> pure ()) | |
181 | ||
182 | setComment :: Comment -> SExprSpec a c -> SExprSpec a c | |
183 | setComment c spec = spec { comment = Just (c <?> "comment") } | |
184 | ||
185 | -- | Add the ability to understand a quoted S-Expression. In general, | |
186 | -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is | |
187 | -- a convenience function which allows you to easily add quoted | |
188 | -- expressions to a 'SExprSpec', provided that you supply which | |
189 | -- atom you want substituted in for the symbol @quote@. | |
190 | withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t) | |
191 | withQuote = addReader '\'' (fmap go) | |
192 | where go s = SCons "quote" (SCons s SNil) | |
193 | ||
194 | peekChar :: Parser (Maybe Char) | |
195 | peekChar = Just <$> lookAhead anyChar <|> pure Nothing | |
196 | ||
197 | parseGenericSExpr :: | |
198 | Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) | |
199 | parseGenericSExpr atom reader skip = do | |
200 | let sExpr = parseGenericSExpr atom reader skip <?> "s-expr" | |
201 | skip | |
202 | c <- peekChar | |
203 | r <- case c of | |
204 | Nothing -> fail "Unexpected end of input" | |
205 | Just '(' -> char '(' >> skip >> parseList sExpr skip | |
206 | Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr | |
207 | _ -> SAtom `fmap` atom | |
208 | skip | |
209 | return r | |
210 | ||
211 | parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom) | |
212 | parseList sExpr skip = do | |
213 | i <- peekChar | |
214 | case i of | |
215 | Nothing -> fail "Unexpected end of input" | |
216 | Just ')' -> char ')' >> return SNil | |
217 | _ -> do | |
218 | car <- sExpr | |
219 | skip | |
220 | c <- peekChar | |
221 | case c of | |
222 | Just '.' -> do | |
223 | char '.' | |
224 | cdr <- sExpr | |
225 | skip | |
226 | char ')' | |
227 | skip | |
228 | return (SCons car cdr) | |
229 | Just ')' -> do | |
230 | char ')' | |
231 | skip | |
232 | return (SCons car SNil) | |
233 | _ -> do | |
234 | cdr <- parseList sExpr skip | |
235 | return (SCons car cdr) | |
236 | ||
237 | -- | Given a CommentMap, create the corresponding parser to | |
238 | -- skip those comments (if they exist). | |
239 | buildSkip :: Maybe (Parser ()) -> Parser () | |
240 | buildSkip Nothing = skipMany space | |
241 | buildSkip (Just c) = alternate | |
242 | where alternate = skipMany space >> ((c >> alternate) <|> return ()) | |
243 | ||
244 | doParse :: Parser a -> Text -> Either String a | |
245 | doParse p t = case runParser p () "" t of | |
246 | Left err -> Left (show err) | |
247 | Right x -> Right x | |
248 | ||
249 | -- | Decode a single S-expression. If any trailing input is left after | |
250 | -- the S-expression (ignoring comments or whitespace) then this | |
251 | -- will fail: for those cases, use 'decode', which returns a list of | |
252 | -- all the S-expressions found at the top level. | |
253 | decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier | |
254 | decodeOne spec = doParse (parser <* eof) >=> (postparse spec) | |
255 | where parser = parseGenericSExpr | |
256 | (sesPAtom spec) | |
257 | (readerMap spec) | |
258 | (buildSkip (comment spec)) | |
259 | ||
260 | -- | Decode several S-expressions according to a given 'SExprSpec'. This | |
261 | -- will return a list of every S-expression that appears at the top-level | |
262 | -- of the document. | |
263 | decode :: SExprSpec atom carrier -> Text -> Either String [carrier] | |
264 | decode spec = | |
265 | doParse (many1 parser <* eof) >=> mapM (postparse spec) | |
266 | where parser = parseGenericSExpr | |
267 | (sesPAtom spec) | |
268 | (readerMap spec) | |
269 | (buildSkip (comment spec)) | |
270 | ||
271 | -- | Encode (without newlines) a single S-expression. | |
272 | encodeSExpr :: SExpr atom -> (atom -> Text) -> Text | |
273 | encodeSExpr SNil _ = "()" | |
274 | encodeSExpr (SAtom s) t = t s | |
275 | encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t) | |
276 | where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")" | |
277 | go SNil rs = "(" <> rs <> ")" | |
278 | go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t) | |
279 | ||
280 | -- | Emit an S-Expression in a machine-readable way. This does no | |
281 | -- pretty-printing or indentation, and produces no comments. | |
282 | encodeOne :: SExprSpec atom carrier -> carrier -> Text | |
283 | encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec) | |
284 | ||
285 | encode :: SExprSpec atom carrier -> [carrier] -> Text | |
286 | encode spec cs = T.concat (map (encodeOne spec) cs) |
1 | {-# LANGUAGE OverloadedStrings #-} | |
2 | ||
3 | module Data.SCargot.HaskLike ( -- $info | |
4 | haskLikeSpec | |
5 | , HaskLikeAtom(..) | |
6 | ) where | |
7 | ||
8 | import Control.Applicative ((<$>), (<*>), (<$)) | |
9 | import Data.Maybe (catMaybes) | |
10 | import Data.String (IsString(..)) | |
11 | import Data.Text (Text, pack) | |
12 | import Text.Parsec | |
13 | import Text.Parsec.Text (Parser) | |
14 | ||
15 | import Prelude hiding (concatMap) | |
16 | ||
17 | import Data.SCargot.Common | |
18 | import Data.SCargot.Repr.Basic (SExpr) | |
19 | import Data.SCargot.General (SExprSpec, mkSpec) | |
20 | ||
21 | {- $info | |
22 | ||
23 | This module is intended for simple, ad-hoc configuration or data formats | |
24 | that might not need their on rich structure but might benefit from a few | |
25 | various kinds of literals. The 'haskLikeSpec' understands identifiers as | |
26 | defined by R5RS, as well as string, integer, and floating-point literals | |
27 | as defined by the Haskell spec. It does _not_ natively understand other | |
28 | data types, such as booleans, vectors, bitstrings, or the like. | |
29 | ||
30 | -} | |
31 | ||
32 | ||
33 | -- | An atom type that understands Haskell-like values as well as | |
34 | -- Scheme-like identifiers. | |
35 | data HaskLikeAtom | |
36 | = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme | |
37 | -- standard | |
38 | | HSString Text -- ^ A string, parsed according to the syntax for string | |
39 | -- literals in the Haskell report | |
40 | | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to | |
41 | -- the syntax for integer literals in the Haskell report | |
42 | | HSFloat Double -- ^ A double-precision floating-point value, parsed | |
43 | -- according to the syntax for floats in the Haskell | |
44 | -- report | |
45 | deriving (Eq, Show) | |
46 | ||
47 | instance IsString HaskLikeAtom where | |
48 | fromString = HSIdent . fromString | |
49 | ||
50 | pString :: Parser Text | |
51 | pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc)) | |
52 | where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026') | |
53 | esc = do char '\\' | |
54 | Nothing <$ (gap <|> char '&') <|> | |
55 | Just <$> code | |
56 | gap = many1 space >> char '\\' | |
57 | code = eEsc <|> eNum <|> eCtrl <|> eAscii | |
58 | eCtrl = char '^' >> unCtrl <$> upper | |
59 | eNum = (toEnum . fromInteger) <$> | |
60 | (decNumber <|> (char 'o' >> octNumber) | |
61 | <|> (char 'x' >> hexNumber)) | |
62 | eEsc = choice [ char a >> return b | (a, b) <- escMap ] | |
63 | eAscii = choice [ try (string a >> return b) | |
64 | | (a, b) <- asciiMap ] | |
65 | unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1) | |
66 | ||
67 | escMap :: [(Char, Char)] | |
68 | escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'" | |
69 | ||
70 | asciiMap :: [(String, Char)] | |
71 | asciiMap = zip | |
72 | ["BS","HT","LF","VT","FF","CR","SO","SI","EM" | |
73 | ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX" | |
74 | ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3" | |
75 | ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"] | |
76 | ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++ | |
77 | "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++ | |
78 | "\SYN\ETB\CAN\SUB\ESC\DEL") | |
79 | ||
80 | pFloat :: Parser Double | |
81 | pFloat = do | |
82 | n <- decNumber | |
83 | withDot n <|> noDot n | |
84 | where withDot n = do | |
85 | char '.' | |
86 | m <- decNumber | |
87 | e <- option 1.0 exponent | |
88 | return ((fromIntegral n + asDec m 0) * e) | |
89 | noDot n = do | |
90 | e <- exponent | |
91 | return (fromIntegral n * e) | |
92 | exponent = do | |
93 | oneOf "eE" | |
94 | s <- power | |
95 | x <- decNumber | |
96 | return (10 ** s (fromIntegral x)) | |
97 | asDec 0 k = k | |
98 | asDec n k = | |
99 | asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1) | |
100 | ||
101 | power :: Num a => Parser (a -> a) | |
102 | power = negate <$ char '-' <|> id <$ char '+' <|> return id | |
103 | ||
104 | pInt :: Parser Integer | |
105 | pInt = do | |
106 | s <- power | |
107 | n <- pZeroNum <|> decNumber | |
108 | return (fromIntegral (s n)) | |
109 | ||
110 | pZeroNum :: Parser Integer | |
111 | pZeroNum = char '0' >> | |
112 | ( (oneOf "xX" >> hexNumber) | |
113 | <|> (oneOf "oO" >> octNumber) | |
114 | <|> decNumber | |
115 | <|> return 0 | |
116 | ) | |
117 | ||
118 | pHaskLikeAtom :: Parser HaskLikeAtom | |
119 | pHaskLikeAtom | |
120 | = HSFloat <$> (try pFloat <?> "float") | |
121 | <|> HSInt <$> (try pInt <?> "integer") | |
122 | <|> HSString <$> (pString <?> "string literal") | |
123 | <|> HSIdent <$> (parseR5RSIdent <?> "token") | |
124 | ||
125 | sHaskLikeAtom :: HaskLikeAtom -> Text | |
126 | sHaskLikeAtom (HSIdent t) = t | |
127 | sHaskLikeAtom (HSString s) = pack (show s) | |
128 | sHaskLikeAtom (HSInt i) = pack (show i) | |
129 | sHaskLikeAtom (HSFloat f) = pack (show f) | |
130 | ||
131 | -- | This `SExprSpec` understands s-expressions that contain | |
132 | -- Scheme-like tokens, as well as string literals, integer | |
133 | -- literals, and floating-point literals. Each of these values | |
134 | -- is parsed according to the lexical rules in the Haskell | |
135 | -- report, so the same set of string escapes, numeric bases, | |
136 | -- and floating-point options are available. This spec does | |
137 | -- not parse comments and does not understand any reader | |
138 | -- macros. | |
139 | haskLikeSpec :: SExprSpec HaskLikeAtom (SExpr HaskLikeAtom) | |
140 | haskLikeSpec = mkSpec pHaskLikeAtom sHaskLikeAtom |
1 | {-# LANGUAGE OverloadedStrings #-} | |
2 | ||
3 | module Data.SCargot.Language.Basic | |
4 | ( -- * Spec | |
5 | -- $descr | |
6 | basicParser | |
7 | , basicPrinter | |
8 | ) where | |
9 | ||
10 | import Control.Applicative ((<$>)) | |
11 | import Data.Char (isAlphaNum) | |
12 | import Text.Parsec (many1, satisfy) | |
13 | import Data.Text (Text, pack) | |
14 | ||
15 | import Data.SCargot.Repr.Basic (SExpr) | |
16 | import Data.SCargot ( SExprParser | |
17 | , SExprPrinter | |
18 | , mkParser | |
19 | , flatPrint | |
20 | ) | |
21 | import Data.SCargot.Comments (withLispComments) | |
22 | ||
23 | isAtomChar :: Char -> Bool | |
24 | isAtomChar c = isAlphaNum c | |
25 | || c == '-' || c == '*' || c == '/' | |
26 | || c == '+' || c == '<' || c == '>' | |
27 | || c == '=' || c == '!' || c == '?' | |
28 | ||
29 | -- $descr | |
30 | -- The 'basicSpec' describes S-expressions whose atoms are simply | |
31 | -- text strings that contain alphanumeric characters and a small | |
32 | -- set of punctuation. It does no parsing of numbers or other data | |
33 | -- types, and will accept tokens that typical Lisp implementations | |
34 | -- would find nonsensical (like @77foo@). | |
35 | -- | |
36 | -- Atoms recognized by the 'basicSpec' are any string matching the | |
37 | -- regular expression @[A-Za-z0-9+*<>/=!?-]+@. | |
38 | ||
39 | -- | A 'SExprParser' that understands atoms to be sequences of | |
40 | -- alphanumeric characters as well as the punctuation | |
41 | -- characters @[-*/+<>=!?]@, and does no processing of them. | |
42 | -- | |
43 | -- >>> decode basicParser "(1 elephant)" | |
44 | -- Right [SCons (SAtom "1") (SCons (SAtom "elephant") SNil)] | |
45 | basicParser :: SExprParser Text (SExpr Text) | |
46 | basicParser = mkParser pToken | |
47 | where pToken = pack <$> many1 (satisfy isAtomChar) | |
48 | ||
49 | -- | A 'SExprPrinter' that prints textual atoms directly (without quoting | |
50 | -- or any other processing) onto a single line. | |
51 | -- | |
52 | -- >>> encode basicPrinter [L [A "1", A "elephant"]] | |
53 | -- "(1 elephant)" | |
54 | basicPrinter :: SExprPrinter Text (SExpr Text) | |
55 | basicPrinter = flatPrint id |
1 | {-# LANGUAGE OverloadedStrings #-} | |
2 | ||
3 | module Data.SCargot.Language.HaskLike | |
4 | ( -- $info | |
5 | HaskLikeAtom(..) | |
6 | , haskLikeParser | |
7 | , haskLikePrinter | |
8 | ) where | |
9 | ||
10 | import Control.Applicative ((<$>), (<*>), (<$)) | |
11 | import Data.Maybe (catMaybes) | |
12 | import Data.String (IsString(..)) | |
13 | import Data.Text (Text, pack) | |
14 | import Text.Parsec | |
15 | import Text.Parsec.Text (Parser) | |
16 | ||
17 | import Prelude hiding (concatMap) | |
18 | ||
19 | import Data.SCargot.Common | |
20 | import Data.SCargot.Repr.Basic (SExpr) | |
21 | import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint) | |
22 | ||
23 | {- $info | |
24 | ||
25 | This module is intended for simple, ad-hoc configuration or data formats | |
26 | that might not need their on rich structure but might benefit from a few | |
27 | various kinds of literals. The 'haskLikeParser' understands identifiers as | |
28 | defined by R5RS, as well as string, integer, and floating-point literals | |
29 | as defined by the Haskell spec. It does __not__ natively understand other | |
30 | data types, such as booleans, vectors, bitstrings. | |
31 | ||
32 | -} | |
33 | ||
34 | ||
35 | -- | An atom type that understands Haskell-like values as well as | |
36 | -- Scheme-like identifiers. | |
37 | data HaskLikeAtom | |
38 | = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme | |
39 | -- standard | |
40 | | HSString Text -- ^ A string, parsed according to the syntax for string | |
41 | -- literals in the Haskell report | |
42 | | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to | |
43 | -- the syntax for integer literals in the Haskell report | |
44 | | HSFloat Double -- ^ A double-precision floating-point value, parsed | |
45 | -- according to the syntax for floats in the Haskell | |
46 | -- report | |
47 | deriving (Eq, Show) | |
48 | ||
49 | instance IsString HaskLikeAtom where | |
50 | fromString = HSIdent . fromString | |
51 | ||
52 | pString :: Parser Text | |
53 | pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc)) | |
54 | where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026') | |
55 | esc = do char '\\' | |
56 | Nothing <$ (gap <|> char '&') <|> | |
57 | Just <$> code | |
58 | gap = many1 space >> char '\\' | |
59 | code = eEsc <|> eNum <|> eCtrl <|> eAscii | |
60 | eCtrl = char '^' >> unCtrl <$> upper | |
61 | eNum = (toEnum . fromInteger) <$> | |
62 | (decNumber <|> (char 'o' >> octNumber) | |
63 | <|> (char 'x' >> hexNumber)) | |
64 | eEsc = choice [ char a >> return b | (a, b) <- escMap ] | |
65 | eAscii = choice [ try (string a >> return b) | |
66 | | (a, b) <- asciiMap ] | |
67 | unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1) | |
68 | ||
69 | escMap :: [(Char, Char)] | |
70 | escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'" | |
71 | ||
72 | asciiMap :: [(String, Char)] | |
73 | asciiMap = zip | |
74 | ["BS","HT","LF","VT","FF","CR","SO","SI","EM" | |
75 | ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX" | |
76 | ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3" | |
77 | ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"] | |
78 | ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++ | |
79 | "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++ | |
80 | "\SYN\ETB\CAN\SUB\ESC\DEL") | |
81 | ||
82 | pFloat :: Parser Double | |
83 | pFloat = do | |
84 | n <- decNumber | |
85 | withDot n <|> noDot n | |
86 | where withDot n = do | |
87 | char '.' | |
88 | m <- decNumber | |
89 | e <- option 1.0 exponent | |
90 | return ((fromIntegral n + asDec m 0) * e) | |
91 | noDot n = do | |
92 | e <- exponent | |
93 | return (fromIntegral n * e) | |
94 | exponent = do | |
95 | oneOf "eE" | |
96 | s <- power | |
97 | x <- decNumber | |
98 | return (10 ** s (fromIntegral x)) | |
99 | asDec 0 k = k | |
100 | asDec n k = | |
101 | asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1) | |
102 | ||
103 | power :: Num a => Parser (a -> a) | |
104 | power = negate <$ char '-' <|> id <$ char '+' <|> return id | |
105 | ||
106 | pInt :: Parser Integer | |
107 | pInt = do | |
108 | s <- power | |
109 | n <- pZeroNum <|> decNumber | |
110 | return (fromIntegral (s n)) | |
111 | ||
112 | pZeroNum :: Parser Integer | |
113 | pZeroNum = char '0' >> | |
114 | ( (oneOf "xX" >> hexNumber) | |
115 | <|> (oneOf "oO" >> octNumber) | |
116 | <|> decNumber | |
117 | <|> return 0 | |
118 | ) | |
119 | ||
120 | pHaskLikeAtom :: Parser HaskLikeAtom | |
121 | pHaskLikeAtom | |
122 | = HSFloat <$> (try pFloat <?> "float") | |
123 | <|> HSInt <$> (try pInt <?> "integer") | |
124 | <|> HSString <$> (pString <?> "string literal") | |
125 | <|> HSIdent <$> (parseR5RSIdent <?> "token") | |
126 | ||
127 | sHaskLikeAtom :: HaskLikeAtom -> Text | |
128 | sHaskLikeAtom (HSIdent t) = t | |
129 | sHaskLikeAtom (HSString s) = pack (show s) | |
130 | sHaskLikeAtom (HSInt i) = pack (show i) | |
131 | sHaskLikeAtom (HSFloat f) = pack (show f) | |
132 | ||
133 | -- | This `SExprParser` understands s-expressions that contain | |
134 | -- Scheme-like tokens, as well as string literals, integer | |
135 | -- literals, and floating-point literals. Each of these values | |
136 | -- is parsed according to the lexical rules in the Haskell | |
137 | -- report, so the same set of string escapes, numeric bases, | |
138 | -- and floating-point options are available. This spec does | |
139 | -- not parse comments and does not understand any reader | |
140 | -- macros. | |
141 | -- | |
142 | -- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")" | |
143 | -- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)] | |
144 | haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom) | |
145 | haskLikeParser = mkParser pHaskLikeAtom | |
146 | ||
147 | -- | This 'SExprPrinter' emits s-expressions that contain Scheme-like | |
148 | -- tokens as well as string literals, integer literals, and floating-point | |
149 | -- literals, which will be emitted as the literals produced by Haskell's | |
150 | -- 'show' function. This printer will produce a flat s-expression with | |
151 | -- no indentation of any kind. | |
152 | -- | |
153 | -- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]] | |
154 | -- "(1 \"elephant\")" | |
155 | haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom) | |
156 | haskLikePrinter = flatPrint sHaskLikeAtom |
1 | {-# LANGUAGE ViewPatterns #-} | |
2 | {-# LANGUAGE OverloadedStrings #-} | |
3 | ||
4 | module Data.SCargot.Parse | |
5 | ( -- * Parsing | |
6 | decode | |
7 | , decodeOne | |
8 | -- * Parsing Control | |
9 | , SExprParser | |
10 | , Reader | |
11 | , Comment | |
12 | , mkParser | |
13 | , setCarrier | |
14 | , addReader | |
15 | , setComment | |
16 | -- * Specific SExprParser Conversions | |
17 | , asRich | |
18 | , asWellFormed | |
19 | , withQuote | |
20 | ) where | |
21 | ||
22 | import Control.Applicative ((<*), (*>), (<*>), (<$>), pure) | |
23 | import Control.Monad ((>=>)) | |
24 | import Data.Char (isAlpha, isDigit, isAlphaNum) | |
25 | import Data.Map.Strict (Map) | |
26 | import qualified Data.Map.Strict as M | |
27 | import Data.Maybe (fromJust) | |
28 | import Data.Monoid ((<>)) | |
29 | import Data.String (IsString) | |
30 | import Data.Text (Text, pack, unpack) | |
31 | import qualified Data.Text as T | |
32 | import Text.Parsec ( (<|>) | |
33 | , (<?>) | |
34 | , char | |
35 | , eof | |
36 | , lookAhead | |
37 | , many1 | |
38 | , runParser | |
39 | , skipMany | |
40 | ) | |
41 | import Text.Parsec.Char (anyChar, space) | |
42 | import Text.Parsec.Text (Parser) | |
43 | ||
44 | import Data.SCargot.Repr ( SExpr(..) | |
45 | , RichSExpr | |
46 | , WellFormedSExpr | |
47 | , fromRich | |
48 | , toRich | |
49 | , fromWellFormed | |
50 | , toWellFormed | |
51 | ) | |
52 | ||
53 | type ReaderMacroMap atom = Map Char (Reader atom) | |
54 | ||
55 | -- | A 'Reader' represents a reader macro: it takes a parser for | |
56 | -- the S-Expression type and performs as much or as little | |
57 | -- parsing as it would like, and then returns an S-expression. | |
58 | type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom)) | |
59 | ||
60 | -- | A 'Comment' represents any kind of skippable comment. This | |
61 | -- parser __must__ be able to fail if a comment is not being | |
62 | -- recognized, and it __must__ not consume any input in case | |
63 | -- of failure. | |
64 | type Comment = Parser () | |
65 | ||
66 | -- | A 'SExprParser' describes a parser for a particular value | |
67 | -- that has been serialized as an s-expression. The @atom@ parameter | |
68 | -- corresponds to a Haskell type used to represent the atoms, | |
69 | -- and the @carrier@ parameter corresponds to the parsed S-Expression | |
70 | -- structure. | |
71 | data SExprParser atom carrier = SExprParser | |
72 | { sesPAtom :: Parser atom | |
73 | , readerMap :: ReaderMacroMap atom | |
74 | , comment :: Maybe Comment | |
75 | , postparse :: SExpr atom -> Either String carrier | |
76 | } | |
77 | ||
78 | -- | Create a basic 'SExprParser' when given a parser | |
79 | -- for an atom type. | |
80 | -- | |
81 | -- >>> import Text.Parsec (alphaNum, many1) | |
82 | -- >>> let parser = mkParser (many1 alphaNum) | |
83 | -- >>> decode parser "(ele phant)" | |
84 | -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)] | |
85 | mkParser :: Parser atom -> SExprParser atom (SExpr atom) | |
86 | mkParser parser = SExprParser | |
87 | { sesPAtom = parser | |
88 | , readerMap = M.empty | |
89 | , comment = Nothing | |
90 | , postparse = return | |
91 | } | |
92 | ||
93 | -- | Modify the carrier type for a 'SExprParser'. This is | |
94 | -- used internally to convert between various 'SExpr' representations, | |
95 | -- but could also be used externally to add an extra conversion layer | |
96 | -- onto a 'SExprParser'. | |
97 | -- | |
98 | -- >>> import Text.Parsec (alphaNum, many1) | |
99 | -- >>> import Data.SCargot.Repr (toRich) | |
100 | -- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum)) | |
101 | -- >>> decode parser "(ele phant)" | |
102 | -- Right [RSlist [RSAtom "ele",RSAtom "phant"]] | |
103 | setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c | |
104 | setCarrier f spec = spec { postparse = postparse spec >=> f } | |
105 | ||
106 | -- | Convert the final output representation from the 'SExpr' type | |
107 | -- to the 'RichSExpr' type. | |
108 | -- | |
109 | -- >>> import Text.Parsec (alphaNum, many1) | |
110 | -- >>> let parser = asRich (mkParser (many1 alphaNum)) | |
111 | -- >>> decode parser "(ele phant)" | |
112 | -- Right [RSlist [RSAtom "ele",RSAtom "phant"]] | |
113 | asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b) | |
114 | asRich = setCarrier (return . toRich) | |
115 | ||
116 | -- | Convert the final output representation from the 'SExpr' type | |
117 | -- to the 'WellFormedSExpr' type. | |
118 | -- | |
119 | -- >>> import Text.Parsec (alphaNum, many1) | |
120 | -- >>> let parser = asWellFormed (mkParser (many1 alphaNum)) | |
121 | -- >>> decode parser "(ele phant)" | |
122 | -- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]] | |
123 | asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b) | |
124 | asWellFormed = setCarrier toWellFormed | |
125 | ||
126 | -- | Add the ability to execute some particular reader macro, as | |
127 | -- defined by its initial character and the 'Parser' which returns | |
128 | -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which | |
129 | -- can be recursively called to parse more S-Expressions, and begins | |
130 | -- parsing after the reader character has been removed from the | |
131 | -- stream. | |
132 | -- | |
133 | -- >>> import Text.Parsec (alphaNum, char, many1) | |
134 | -- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p) | |
135 | -- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum)) | |
136 | -- >>> decode parser "(an [ele phant])" | |
137 | -- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)] | |
138 | ||
139 | addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c | |
140 | addReader c reader spec = spec | |
141 | { readerMap = M.insert c reader (readerMap spec) } | |
142 | ||
143 | -- | Add the ability to ignore some kind of comment. This gets | |
144 | -- factored into whitespace parsing, and it's very important that | |
145 | -- the parser supplied __be able to fail__ (as otherwise it will | |
146 | -- cause an infinite loop), and also that it __not consume any input__ | |
147 | -- (which may require it to be wrapped in 'try'.) | |
148 | -- | |
149 | -- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string) | |
150 | -- >>> let comment = string "//" *> manyTill anyChar newline *> pure () | |
151 | -- >>> let parser = setComment comment (mkParser (many1 alphaNum)) | |
152 | -- >>> decode parser "(ele //a comment\n phant)" | |
153 | -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)] | |
154 | ||
155 | setComment :: Comment -> SExprParser a c -> SExprParser a c | |
156 | setComment c spec = spec { comment = Just (c <?> "comment") } | |
157 | ||
158 | -- | Add the ability to understand a quoted S-Expression. | |
159 | -- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This | |
160 | -- assumes that the underlying atom type implements the "IsString" | |
161 | -- class, and will create the @quote@ atom using @fromString "quote"@. | |
162 | -- | |
163 | -- >>> import Text.Parsec (alphaNum, many1) | |
164 | -- >>> let parser = withQuote (mkParser (many1 alphaNum)) | |
165 | -- >>> decode parser "'elephant" | |
166 | -- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)] | |
167 | withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t) | |
168 | withQuote = addReader '\'' (fmap go) | |
169 | where go s = SCons "quote" (SCons s SNil) | |
170 | ||
171 | peekChar :: Parser (Maybe Char) | |
172 | peekChar = Just <$> lookAhead anyChar <|> pure Nothing | |
173 | ||
174 | parseGenericSExpr :: | |
175 | Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) | |
176 | parseGenericSExpr atom reader skip = do | |
177 | let sExpr = parseGenericSExpr atom reader skip <?> "s-expr" | |
178 | skip | |
179 | c <- peekChar | |
180 | r <- case c of | |
181 | Nothing -> fail "Unexpected end of input" | |
182 | Just '(' -> char '(' >> skip >> parseList sExpr skip | |
183 | Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr | |
184 | _ -> SAtom `fmap` atom | |
185 | skip | |
186 | return r | |
187 | ||
188 | parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom) | |
189 | parseList sExpr skip = do | |
190 | i <- peekChar | |
191 | case i of | |
192 | Nothing -> fail "Unexpected end of input" | |
193 | Just ')' -> char ')' >> return SNil | |
194 | _ -> do | |
195 | car <- sExpr | |
196 | skip | |
197 | c <- peekChar | |
198 | case c of | |
199 | Just '.' -> do | |
200 | char '.' | |
201 | cdr <- sExpr | |
202 | skip | |
203 | char ')' | |
204 | skip | |
205 | return (SCons car cdr) | |
206 | Just ')' -> do | |
207 | char ')' | |
208 | skip | |
209 | return (SCons car SNil) | |
210 | _ -> do | |
211 | cdr <- parseList sExpr skip | |
212 | return (SCons car cdr) | |
213 | ||
214 | -- | Given a CommentMap, create the corresponding parser to | |
215 | -- skip those comments (if they exist). | |
216 | buildSkip :: Maybe (Parser ()) -> Parser () | |
217 | buildSkip Nothing = skipMany space | |
218 | buildSkip (Just c) = alternate | |
219 | where alternate = skipMany space >> ((c >> alternate) <|> return ()) | |
220 | ||
221 | doParse :: Parser a -> Text -> Either String a | |
222 | doParse p t = case runParser p () "" t of | |
223 | Left err -> Left (show err) | |
224 | Right x -> Right x | |
225 | ||
226 | -- | Decode a single S-expression. If any trailing input is left after | |
227 | -- the S-expression (ignoring comments or whitespace) then this | |
228 | -- will fail: for those cases, use 'decode', which returns a list of | |
229 | -- all the S-expressions found at the top level. | |
230 | decodeOne :: SExprParser atom carrier -> Text -> Either String carrier | |
231 | decodeOne spec = doParse (parser <* eof) >=> (postparse spec) | |
232 | where parser = parseGenericSExpr | |
233 | (sesPAtom spec) | |
234 | (readerMap spec) | |
235 | (buildSkip (comment spec)) | |
236 | ||
237 | -- | Decode several S-expressions according to a given 'SExprParser'. This | |
238 | -- will return a list of every S-expression that appears at the top-level | |
239 | -- of the document. | |
240 | decode :: SExprParser atom carrier -> Text -> Either String [carrier] | |
241 | decode spec = | |
242 | doParse (many1 parser <* eof) >=> mapM (postparse spec) | |
243 | where parser = parseGenericSExpr | |
244 | (sesPAtom spec) | |
245 | (readerMap spec) | |
246 | (buildSkip (comment spec)) | |
247 | ||
248 | {- | |
249 | -- | Encode (without newlines) a single S-expression. | |
250 | encodeSExpr :: SExpr atom -> (atom -> Text) -> Text | |
251 | encodeSExpr SNil _ = "()" | |
252 | encodeSExpr (SAtom s) t = t s | |
253 | encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t) | |
254 | where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")" | |
255 | go SNil rs = "(" <> rs <> ")" | |
256 | go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t) | |
257 | ||
258 | -- | Emit an S-Expression in a machine-readable way. This does no | |
259 | -- pretty-printing or indentation, and produces no comments. | |
260 | encodeOne :: SExprParser atom carrier -> carrier -> Text | |
261 | encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec) | |
262 | ||
263 | encode :: SExprParser atom carrier -> [carrier] -> Text | |
264 | encode spec cs = T.concat (map (encodeOne spec) cs) | |
265 | -} |
1 | {-# LANGUAGE RecordWildCards #-} | |
2 | {-# LANGUAGE OverloadedStrings #-} | |
3 | {-# LANGUAGE ScopedTypeVariables #-} | |
4 | ||
5 | module Data.SCargot.Pretty | |
6 | ( -- * Pretty-Printing | |
7 | prettyPrintSExpr | |
8 | -- * Pretty-Printing Control | |
9 | , LayoutOptions(..) | |
10 | , Indent(..) | |
11 | -- * Default Printing Strategies | |
12 | , basicPrint | |
13 | , flatPrint | |
14 | ) where | |
15 | ||
16 | import Data.Monoid ((<>)) | |
17 | import Data.Text (Text) | |
18 | import qualified Data.Text as T | |
19 | ||
20 | import Data.SCargot.Repr | |
21 | ||
22 | -- | The 'Indent' type is used to determine how to indent subsequent | |
23 | -- s-expressions in a list, after printing the head of the list. | |
24 | data Indent | |
25 | = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed | |
26 | -- amount more than the current line. | |
27 | -- | |
28 | -- > (foo | |
29 | -- > bar | |
30 | -- > baz | |
31 | -- > quux) | |
32 | | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the | |
33 | -- first @n@ expressions after the head on the same | |
34 | -- line as the head, and all after will be swung. | |
35 | -- 'SwingAfter' @0@ is equivalent to 'Swing'. | |
36 | -- | |
37 | -- > (foo bar | |
38 | -- > baz | |
39 | -- > quux) | |
40 | | Align -- ^ An 'Align' indent will print the first expression after | |
41 | -- the head on the same line, and subsequent expressions will | |
42 | -- be aligned with that one. | |
43 | -- | |
44 | -- > (foo bar | |
45 | -- > baz | |
46 | -- > quux) | |
47 | deriving (Eq, Show) | |
48 | ||
49 | -- | A 'LayoutOptions' value describes the strategy taken in | |
50 | -- pretty-printing a 'SExpr'. | |
51 | data LayoutOptions a = LayoutOptions | |
52 | { atomPrinter :: a -> Text | |
53 | -- ^ How to serialize a given atom to 'Text'. | |
54 | , swingIndent :: SExpr a -> Indent | |
55 | -- ^ How to indent subsequent expressions, as determined by | |
56 | -- the head of the list. | |
57 | , indentAmount :: Int | |
58 | -- ^ How much to indent after a swung indentation. | |
59 | , maxWidth :: Maybe Int | |
60 | -- ^ The maximum width (if any) If this is 'None' then | |
61 | -- the resulting s-expression will always be printed | |
62 | -- on a single line. | |
63 | } | |
64 | ||
65 | -- | A default 'LayoutOptions' struct that will always print a 'SExpr' | |
66 | -- as a single line. | |
67 | flatPrint :: (a -> Text) -> LayoutOptions a | |
68 | flatPrint printer = LayoutOptions | |
69 | { atomPrinter = printer | |
70 | , swingIndent = const Swing | |
71 | , indentAmount = 2 | |
72 | , maxWidth = Nothing | |
73 | } | |
74 | ||
75 | -- | A default 'LayoutOptions' struct that will always swing subsequent | |
76 | -- expressions onto later lines if they're too long, indenting them | |
77 | -- by two spaces. | |
78 | basicPrint :: (a -> Text) -> LayoutOptions a | |
79 | basicPrint printer = LayoutOptions | |
80 | { atomPrinter = printer | |
81 | , swingIndent = const Swing | |
82 | , indentAmount = 2 | |
83 | , maxWidth = Just 80 | |
84 | } | |
85 | ||
86 | -- Sort of like 'unlines' but without the trailing newline | |
87 | joinLines :: [Text] -> Text | |
88 | joinLines = T.intercalate "\n" | |
89 | ||
90 | -- Indents a line by n spaces | |
91 | indent :: Int -> Text -> Text | |
92 | indent n ts = T.replicate n " " <> ts | |
93 | ||
94 | -- Indents every line n spaces, and adds a newline to the beginning | |
95 | -- used in swung indents | |
96 | indentAll :: Int -> [Text] -> Text | |
97 | indentAll n = ("\n" <>) . joinLines . map (indent n) | |
98 | ||
99 | -- Indents every line but the first by some amount | |
100 | -- used in aligned indents | |
101 | indentSubsequent :: Int -> [Text] -> Text | |
102 | indentSubsequent _ [] = "" | |
103 | indentSubsequent _ [t] = t | |
104 | indentSubsequent n (t:ts) = joinLines (t : go ts) | |
105 | where go = map (indent n) | |
106 | ||
107 | -- oh god this code is so disgusting | |
108 | -- i'm sorry to everyone i let down by writing this | |
109 | -- i swear i'll do better in the future i promise i have to | |
110 | -- for my sake and for everyone's | |
111 | ||
112 | -- | Pretty-print a 'SExpr' according to the options in a | |
113 | -- 'LayoutOptions' value. | |
114 | prettyPrintSExpr :: LayoutOptions a -> SExpr a -> Text | |
115 | prettyPrintSExpr LayoutOptions { .. } = pHead 0 | |
116 | where pHead _ SNil = "()" | |
117 | pHead _ (SAtom a) = atomPrinter a | |
118 | pHead ind (SCons x xs) = gather ind x xs id | |
119 | gather _ _ (SAtom _) _ = error "no dotted pretty printing yet!" | |
120 | gather ind h (SCons x xs) k = gather ind h xs (k . (x:)) | |
121 | gather ind h SNil k = "(" <> hd <> body <> ")" | |
122 | where hd = indentSubsequent ind [pHead (ind+1) h] | |
123 | lst = k [] | |
124 | flat = T.unwords (map (pHead (ind+1)) lst) | |
125 | headWidth = T.length hd + 1 | |
126 | indented = | |
127 | case swingIndent h of | |
128 | SwingAfter n -> | |
129 | let (l, ls) = splitAt n lst | |
130 | t = T.unwords (map (pHead (ind+1)) l) | |
131 | ts = indentAll (ind + indentAmount) | |
132 | (map (pHead (ind + indentAmount)) ls) | |
133 | in t <> ts | |
134 | Swing -> | |
135 | indentAll (ind + indentAmount) | |
136 | (map (pHead (ind + indentAmount)) lst) | |
137 | Align -> | |
138 | indentSubsequent (ind + headWidth + 1) | |
139 | (map (pHead (ind + headWidth + 1)) lst) | |
140 | body | |
141 | | length lst == 0 = "" | |
142 | | Just maxAmt <- maxWidth | |
143 | , T.length flat + ind > maxAmt = " " <> indented | |
144 | | otherwise = " " <> flat |
1 | {-# LANGUAGE RecordWildCards #-} | |
2 | {-# LANGUAGE OverloadedStrings #-} | |
3 | {-# LANGUAGE ScopedTypeVariables #-} | |
4 | ||
5 | module Data.SCargot.Print | |
6 | ( -- * Pretty-Printing | |
7 | encodeOne | |
8 | , encode | |
9 | -- * Pretty-Printing Control | |
10 | , SExprPrinter | |
11 | , Indent(..) | |
12 | , setFromCarrier | |
13 | , setMaxWidth | |
14 | , removeMaxWidth | |
15 | , setIndentAmount | |
16 | , setIndentStrategy | |
17 | -- * Default Printing Strategies | |
18 | , basicPrint | |
19 | , flatPrint | |
20 | ) where | |
21 | ||
22 | import Data.Monoid ((<>)) | |
23 | import Data.Text (Text) | |
24 | import qualified Data.Text as T | |
25 | ||
26 | import Data.SCargot.Repr | |
27 | ||
28 | -- | The 'Indent' type is used to determine how to indent subsequent | |
29 | -- s-expressions in a list, after printing the head of the list. | |
30 | data Indent | |
31 | = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed | |
32 | -- amount more than the current line. | |
33 | -- | |
34 | -- > (foo | |
35 | -- > bar | |
36 | -- > baz | |
37 | -- > quux) | |
38 | | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the | |
39 | -- first @n@ expressions after the head on the same | |
40 | -- line as the head, and all after will be swung. | |
41 | -- 'SwingAfter' @0@ is equivalent to 'Swing'. | |
42 | -- | |
43 | -- > (foo bar | |
44 | -- > baz | |
45 | -- > quux) | |
46 | | Align -- ^ An 'Align' indent will print the first expression after | |
47 | -- the head on the same line, and subsequent expressions will | |
48 | -- be aligned with that one. | |
49 | -- | |
50 | -- > (foo bar | |
51 | -- > baz | |
52 | -- > quux) | |
53 | deriving (Eq, Show) | |
54 | ||
55 | -- | A 'SExprPrinter' value describes how to print a given value as an | |
56 | -- s-expression. The @carrier@ type parameter indicates the value | |
57 | -- that will be printed, and the @atom@ parameter indicates the type | |
58 | -- that will represent tokens in an s-expression structure. | |
59 | data SExprPrinter atom carrier = SExprPrinter | |
60 | { atomPrinter :: atom -> Text | |
61 | -- ^ How to serialize a given atom to 'Text'. | |
62 | , fromCarrier :: carrier -> SExpr atom | |
63 | -- ^ How to turn a carrier type back into a 'Sexpr'. | |
64 | , swingIndent :: SExpr atom -> Indent | |
65 | -- ^ How to indent subsequent expressions, as determined by | |
66 | -- the head of the list. | |
67 | , indentAmount :: Int | |
68 | -- ^ How much to indent after a swung indentation. | |
69 | , maxWidth :: Maybe Int | |
70 | -- ^ The maximum width (if any) If this is 'None' then | |
71 | -- the resulting s-expression will always be printed | |
72 | -- on a single line. | |
73 | } | |
74 | ||
75 | -- | A default 'LayoutOptions' struct that will always print a 'SExpr' | |
76 | -- as a single line. | |
77 | flatPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom) | |
78 | flatPrint printer = SExprPrinter | |
79 | { atomPrinter = printer | |
80 | , fromCarrier = id | |
81 | , swingIndent = const Swing | |
82 | , indentAmount = 2 | |
83 | , maxWidth = Nothing | |
84 | } | |
85 | ||
86 | -- | A default 'LayoutOptions' struct that will always swing subsequent | |
87 | -- expressions onto later lines if they're too long, indenting them | |
88 | -- by two spaces. | |
89 | basicPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom) | |
90 | basicPrint printer = SExprPrinter | |
91 | { atomPrinter = printer | |
92 | , fromCarrier = id | |
93 | , swingIndent = const Swing | |
94 | , indentAmount = 2 | |
95 | , maxWidth = Just 80 | |
96 | } | |
97 | ||
98 | -- | Modify the carrier type of a 'SExprPrinter' by describing how | |
99 | -- to convert the new type back to the previous type. For example, | |
100 | -- to pretty-print a well-formed s-expression, we can modify the | |
101 | -- 'SExprPrinter' value as follows: | |
102 | -- | |
103 | -- >>> let printer = setFromCarrier fromWellFormed (basicPrint id) | |
104 | -- >>> encodeOne printer (WFSList [WFSAtom "ele", WFSAtom "phant"]) | |
105 | -- "(ele phant)" | |
106 | setFromCarrier :: (c -> b) -> SExprPrinter a b -> SExprPrinter a c | |
107 | setFromCarrier fc pr = pr { fromCarrier = fromCarrier pr . fc } | |
108 | ||
109 | -- | Dictate a maximum width for pretty-printed s-expressions. | |
110 | -- | |
111 | -- >>> let printer = setMaxWidth 8 (basicPrint id) | |
112 | -- >>> encodeOne printer (L [A "one", A "two", A "three"]) | |
113 | -- "(one \n two\n three)" | |
114 | setMaxWidth :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier | |
115 | setMaxWidth n pr = pr { maxWidth = Just n } | |
116 | ||
117 | -- | Allow the serialized s-expression to be arbitrarily wide. This | |
118 | -- makes all pretty-printing happen on a single line. | |
119 | -- | |
120 | -- >>> let printer = removeMaxWidth (basicPrint id) | |
121 | -- >>> encodeOne printer (L [A "one", A "two", A "three"]) | |
122 | -- "(one two three)" | |
123 | removeMaxWidth :: SExprPrinter atom carrier -> SExprPrinter atom carrier | |
124 | removeMaxWidth pr = pr { maxWidth = Nothing } | |
125 | ||
126 | -- | Set the number of spaces that a subsequent line will be indented | |
127 | -- after a swing indentation. | |
128 | -- | |
129 | -- >>> let printer = setMaxWidth 12 (basicPrint id) | |
130 | -- >>> encodeOne printer (L [A "elephant", A "pachyderm"]) | |
131 | -- "(elephant \n pachyderm)" | |
132 | -- >>> encodeOne (setIndentAmount 4) (L [A "elephant", A "pachyderm"]) | |
133 | -- "(elephant \n pachyderm)" | |
134 | setIndentAmount :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier | |
135 | setIndentAmount n pr = pr { indentAmount = n } | |
136 | ||
137 | -- | Dictate how to indent subsequent lines based on the leading | |
138 | -- subexpression in an s-expression. For details on how this works, | |
139 | -- consult the documentation of the 'Indent' type. | |
140 | -- | |
141 | -- >>> let indent (A "def") = SwingAfter 1; indent _ = Swing | |
142 | -- >>> let printer = setIndentStrategy indent (setMaxWidth 8 (basicPrint id)) | |
143 | -- >>> encodeOne printer (L [ A "def", L [ A "func", A "arg" ], A "body" ]) | |
144 | -- "(def (func arg)\n body)" | |
145 | -- >>> encodeOne printer (L [ A "elephant", A "among", A "pachyderms" ]) | |
146 | -- "(elephant \n among\n pachyderms)" | |
147 | setIndentStrategy :: (SExpr atom -> Indent) -> SExprPrinter atom carrier -> SExprPrinter atom carrier | |
148 | setIndentStrategy st pr = pr { swingIndent = st } | |
149 | ||
150 | -- Sort of like 'unlines' but without the trailing newline | |
151 | joinLines :: [Text] -> Text | |
152 | joinLines = T.intercalate "\n" | |
153 | ||
154 | -- Indents a line by n spaces | |
155 | indent :: Int -> Text -> Text | |
156 | indent n ts = T.replicate n " " <> ts | |
157 | ||
158 | -- Indents every line n spaces, and adds a newline to the beginning | |
159 | -- used in swung indents | |
160 | indentAll :: Int -> [Text] -> Text | |
161 | indentAll n = ("\n" <>) . joinLines . map (indent n) | |
162 | ||
163 | -- Indents every line but the first by some amount | |
164 | -- used in aligned indents | |
165 | indentSubsequent :: Int -> [Text] -> Text | |
166 | indentSubsequent _ [] = "" | |
167 | indentSubsequent _ [t] = t | |
168 | indentSubsequent n (t:ts) = joinLines (t : go ts) | |
169 | where go = map (indent n) | |
170 | ||
171 | -- oh god this code is so disgusting | |
172 | -- i'm sorry to everyone i let down by writing this | |
173 | -- i swear i'll do better in the future i promise i have to | |
174 | -- for my sake and for everyone's | |
175 | ||
176 | -- | Pretty-print a 'SExpr' according to the options in a | |
177 | -- 'LayoutOptions' value. | |
178 | prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text | |
179 | prettyPrintSExpr SExprPrinter { .. } = pHead 0 | |
180 | where pHead _ SNil = "()" | |
181 | pHead _ (SAtom a) = atomPrinter a | |
182 | pHead ind (SCons x xs) = gather ind x xs id | |
183 | gather _ _ (SAtom _) _ = error "no dotted pretty printing yet!" | |
184 | gather ind h (SCons x xs) k = gather ind h xs (k . (x:)) | |
185 | gather ind h SNil k = "(" <> hd <> body <> ")" | |
186 | where hd = indentSubsequent ind [pHead (ind+1) h] | |
187 | lst = k [] | |
188 | flat = T.unwords (map (pHead (ind+1)) lst) | |
189 | headWidth = T.length hd + 1 | |
190 | indented = | |
191 | case swingIndent h of | |
192 | SwingAfter n -> | |
193 | let (l, ls) = splitAt n lst | |
194 | t = T.unwords (map (pHead (ind+1)) l) | |
195 | ts = indentAll (ind + indentAmount) | |
196 | (map (pHead (ind + indentAmount)) ls) | |
197 | in t <> ts | |
198 | Swing -> | |
199 | indentAll (ind + indentAmount) | |
200 | (map (pHead (ind + indentAmount)) lst) | |
201 | Align -> | |
202 | indentSubsequent (ind + headWidth + 1) | |
203 | (map (pHead (ind + headWidth + 1)) lst) | |
204 | body | |
205 | | length lst == 0 = "" | |
206 | | Just maxAmt <- maxWidth | |
207 | , T.length flat + ind > maxAmt = " " <> indented | |
208 | | otherwise = " " <> flat | |
209 | ||
210 | -- | Turn a single s-expression into a string according to a given | |
211 | -- 'SExprPrinter'. | |
212 | encodeOne :: SExprPrinter atom carrier -> carrier -> Text | |
213 | encodeOne s@(SExprPrinter { .. }) = | |
214 | prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier | |
215 | ||
216 | -- | Turn a list of s-expressions into a single string according to | |
217 | -- a given 'SExprPrinter'. | |
218 | encode :: SExprPrinter atom carrier -> [carrier] -> Text | |
219 | encode spec = T.intercalate "\n\n" . map (encodeOne spec) |
5 | 5 | {-# LANGUAGE TypeFamilies #-} |
6 | 6 | |
7 | 7 | module Data.SCargot.Repr |
8 |
( -- |
|
8 | ( -- $reprs | |
9 | -- * Elementary SExpr representation | |
9 | 10 | SExpr(..) |
10 | 11 | -- * Rich SExpr representation |
11 | 12 | , RichSExpr(..) |
51 | 52 | -- represent a well-formed cons list, and 'RSDotted' |
52 | 53 | -- to represent an improper list of the form |
53 | 54 | -- @(a b c . d)@. This representation is based on |
54 | -- the shape of the parsed S-Expression, and not on | |
55 | -- how it was represented, so @(a . (b))@ is going to | |
55 | -- the structure of the parsed S-Expression, and not on | |
56 | -- how it was originally represented: thus, @(a . (b))@ is going to | |
56 | 57 | -- be represented as @RSList[RSAtom a, RSAtom b]@ |
57 | 58 | -- despite having been originally represented as a |
58 | 59 | -- dotted list. |
138 | 139 | fromWellFormed (WFSAtom a) = SAtom a |
139 | 140 | fromWellFormed (WFSList xs) = |
140 | 141 | foldr SCons SNil (map fromWellFormed xs) |
142 | ||
143 | {- $reprs | |
144 | ||
145 | This module contains several different representations for | |
146 | s-expressions. The s-cargot library underlying uses the | |
147 | 'SExpr' type as its representation type, which is a binary | |
148 | tree representation with an arbitrary type for its leaves. | |
149 | ||
150 | This type is not always convenient to manipulate in Haskell | |
151 | code, this module defines two alternate representations | |
152 | which turn a sequence of nested right-branching cons pairs | |
153 | into Haskell lists: that is to say, they transform between | |
154 | ||
155 | @ | |
156 | SCons a (SCons b (SCons c SNil)) \<=\> RSList [a, b, c] | |
157 | @ | |
158 | ||
159 | These two types differ in how they handle non-well-formed | |
160 | lists, i.e. lists that end with an atom. The 'RichSExpr' | |
161 | format handles this with a special constructor for lists | |
162 | that end in an atom: | |
163 | ||
164 | @ | |
165 | SCons a (SCons b (SAtom c)) \<=\> RSDotted [a, b] c | |
166 | @ | |
167 | ||
168 | On the other hand, the 'WellFormedSExpr' type elects | |
169 | not to handle this case. This is unusual for Lisp source code, | |
170 | but is a reasonable choice for configuration or data | |
171 | storage formats that use s-expressions, where | |
172 | non-well-formed lists would be an unnecessary | |
173 | complication. | |
174 | ||
175 | To make working with these types less verbose, there are other | |
176 | modules that export pattern aliases and helper functions: these | |
177 | can be found at "Data.SCargot.Repr.Basic", | |
178 | "Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed". | |
179 | -} |
1 | 1 | module Data.SCargot |
2 | ( module Data.SCargot.General | |
3 | ) where | |
2 | ( -- * SCargot Basics | |
4 | 3 | |
5 | import Data.SCargot.General | |
4 | -- $intro | |
5 | ||
6 | -- * Parsing and Printing | |
7 | decode | |
8 | , decodeOne | |
9 | , encode | |
10 | , encodeOne | |
11 | -- * Parser Construction | |
12 | -- ** Specifying a Parser | |
13 | , SExprParser | |
14 | , Reader | |
15 | , Comment | |
16 | , mkParser | |
17 | , setCarrier | |
18 | , addReader | |
19 | , setComment | |
20 | , asRich | |
21 | , asWellFormed | |
22 | , withQuote | |
23 | -- * Printer Construction | |
24 | -- * Specifying a Pretty-Printer | |
25 | , SExprPrinter | |
26 | , Indent(..) | |
27 | , basicPrint | |
28 | , flatPrint | |
29 | , setFromCarrier | |
30 | , setMaxWidth | |
31 | , removeMaxWidth | |
32 | , setIndentAmount | |
33 | , setIndentStrategy | |
34 | ) where | |
35 | ||
36 | import Data.SCargot.Parse | |
37 | import Data.SCargot.Print | |
38 | ||
39 | {- $intro | |
40 | ||
41 | The S-Cargot library is a library for parsing and emitting | |
42 | <https://en.wikipedia.org/wiki/S-expression s-expressions>, designed | |
43 | to be as flexible as possible. Despite some efforts at | |
44 | <http://people.csail.mit.edu/rivest/Sexp.txt standardization>, | |
45 | s-expressions are a general approach to describing a data format | |
46 | that can very often differ in subtle, incompatible ways: the | |
47 | s-expressions understood by Common Lisp are different from the | |
48 | s-expressions understood by Scheme, and even the different | |
49 | revisions of the Scheme language understand s-expressions in a | |
50 | slightly different way. To accomodate this, the S-Cargot library | |
51 | provides a toolbox for defining variations on s-expressions, | |
52 | complete with the ability to select various comment syntaxes, reader | |
53 | macros, and atom types. | |
54 | ||
55 | If all you want is to read some s-expressions and don't care about | |
56 | the edge cases of the format, or all you want is a new configuration | |
57 | format, try the "Data.SCargot.Language.Basic" or "Data.SCargot.Language.HaskLike" | |
58 | modules, which define an s-expression language whose atoms are | |
59 | plain strings and Haskell literals, respectively. | |
60 | ||
61 | The S-Cargot library works by specifying values which contain all | |
62 | the information needed to either parse or print an s-expression. | |
63 | The actual s-expression structure is parsed as a structure of | |
64 | <https://en.wikipedia.org/wiki/Cons cons cells> as represented | |
65 | by the 'SExpr' type, but can alternately be exposed as the | |
66 | isomorphic 'RichSExpr' type or the less expressive but | |
67 | easier-to-work-with 'WellFormedSExpr' type. Modules devoted | |
68 | to each representation type (in "Data.SCargot.Repr.Basic", | |
69 | "Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed") | |
70 | provide helper functions, lenses, and pattern synonyms to make | |
71 | creating and processing these values easier. | |
72 | ||
73 | The details of how to parse a given structure are represented | |
74 | by building up a 'SExprParser' value, which is defined in | |
75 | "Data.SCargot.Parse" and re-exported here. A minimal | |
76 | 'SExprParser' defines only how to parse the atoms of the | |
77 | language; helper functions can define comment syntaxes, | |
78 | reader macros, and transformations over the parsed structure. | |
79 | ||
80 | The details of how to print a given structure are represented | |
81 | by building up a 'SExprPrinter' value, which is defined in | |
82 | "Data.SCargot.Print" and re-exported here. A minimal | |
83 | 'SExprPrinter' defines only how to print the atoms of the | |
84 | language; helper functions help with the layout of the | |
85 | pretty-printed s-expression in terms of how to indent the | |
86 | surrounding expression. | |
87 | ||
88 | Other helper modules define useful primitives for building up | |
89 | s-expression languages: the "Data.SCargot.Common" module provides | |
90 | parsers for common literals, while the "Data.SCargot.Comments" | |
91 | module provides parsers for comment syntaxes borrowed from | |
92 | various other languages. | |
93 | ||
94 | -} |
1 | 1 | name: s-cargot |
2 | 2 | version: 0.1.0.0 |
3 | 3 | synopsis: A flexible, extensible s-expression library. |
4 | homepage: https://github.com/aisamanra/s-cargot | |
4 | 5 | description: S-Cargot is a library for working with s-expressions in |
5 | 6 | a modular and extensible way, opting for genericity and |
6 | 7 | flexibility instead of speed. Instead of understanding |
18 | 19 | build-type: Simple |
19 | 20 | cabal-version: >=1.10 |
20 | 21 | |
22 | source-repository head | |
23 | type: git | |
24 | location: git://github.com/aisamanra/s-cargot.git | |
25 | ||
21 | 26 | library |
22 | 27 | exposed-modules: Data.SCargot, |
23 | 28 | Data.SCargot.Repr, |
24 | 29 | Data.SCargot.Repr.Basic, |
25 | 30 | Data.SCargot.Repr.Rich, |
26 | 31 | Data.SCargot.Repr.WellFormed, |
27 | Data.SCargot.General, | |
28 | Data.SCargot.Pretty, | |
29 |
Data.SCargot. |
|
32 | Data.SCargot.Parse, | |
33 | Data.SCargot.Print, | |
30 | 34 | Data.SCargot.Comments, |
31 | 35 | Data.SCargot.Common, |
32 |
Data.SCargot. |
|
36 | Data.SCargot.Language.Basic, | |
37 | Data.SCargot.Language.HaskLike | |
33 | 38 | build-depends: base >=4.7 && <5, |
34 | 39 | parsec, |
35 | 40 | text, |