gdritter repos s-cargot / ed1b3db
Big final bunch of refactors/documentation bits in preparation for Hackage release Getty Ritter 8 years ago
13 changed file(s) with 887 addition(s) and 650 deletion(s). Collapse all Expand all
+0
-41
Data/SCargot/Basic.hs less more
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Data.SCargot.Basic
4 ( -- * Spec
5 -- $descr
6 basicSpec
7 ) where
8
9 import Control.Applicative ((<$>))
10 import Data.Char (isAlphaNum)
11 import Text.Parsec (many1, satisfy)
12 import Data.Text (Text, pack)
13
14 import Data.SCargot.Repr.Basic (SExpr)
15 import Data.SCargot.General ( SExprSpec
16 , mkSpec
17 )
18 import Data.SCargot.Comments (withLispComments)
19
20 isAtomChar :: Char -> Bool
21 isAtomChar c = isAlphaNum c
22 || c == '-' || c == '*' || c == '/'
23 || c == '+' || c == '<' || c == '>'
24 || c == '=' || c == '!' || c == '?'
25
26 -- $descr
27 -- The 'basicSpec' describes S-expressions whose atoms are simply
28 -- text strings that contain alphanumeric characters and a small
29 -- set of punctuation. It does no parsing of numbers or other data
30 -- types, and will accept tokens that typical Lisp implementations
31 -- would find nonsensical (like @77foo@).
32 --
33 -- Atoms recognized by the 'basicSpec' are any string matching the
34 -- regular expression @[A-Za-z0-9+*<>/=!?-]+@.
35
36 -- | A 'SExprSpec' that understands atoms to be sequences of
37 -- alphanumeric characters as well as the punctuation
38 -- characters @[-*/+<>=!?]@, and does no processing of them.
39 basicSpec :: SExprSpec Text (SExpr Text)
40 basicSpec = mkSpec pToken id
41 where pToken = pack <$> many1 (satisfy isAtomChar)
44 ( -- $intro
55
66 -- * Lisp-Style Syntax
7
78 -- $lisp
89 withLispComments
910 -- * Other Existing Comment Syntaxes
3536 , string
3637 )
3738
38 import Data.SCargot.General ( Comment
39 , SExprSpec
40 , setComment
41 )
39 import Data.SCargot.Parse ( Comment
40 , SExprParser
41 , setComment
42 )
4243
4344 -- | Given a string, produce a comment parser that matches that
4445 -- initial string and ignores everything until the end of the
7374 -- | Lisp-style line-oriented comments start with @;@ and last
7475 -- until the end of the line. This is usually the comment
7576 -- syntax you want.
76 withLispComments :: SExprSpec t a -> SExprSpec t a
77 withLispComments :: SExprParser t a -> SExprParser t a
7778 withLispComments = setComment (lineComment ";")
7879
7980 -- | C++-like line-oriented comment start with @//@ and last
8081 -- until the end of the line.
81 withCLikeLineComments :: SExprSpec t a -> SExprSpec t a
82 withCLikeLineComments :: SExprParser t a -> SExprParser t a
8283 withCLikeLineComments = setComment (lineComment "//")
8384
8485 -- | C-like block comments start with @/*@ and end with @*/@.
8586 -- They do not nest.
86 withCLikeBlockComments :: SExprSpec t a -> SExprSpec t a
87 withCLikeBlockComments :: SExprParser t a -> SExprParser t a
8788 withCLikeBlockComments = setComment (simpleBlockComment "/*" "*/")
8889
8990 -- | C-like comments include both line- and block-comments, the
9091 -- former starting with @//@ and the latter contained within
9192 -- @//* ... *//@.
92 withCLikeComments :: SExprSpec t a -> SExprSpec t a
93 withCLikeComments :: SExprParser t a -> SExprParser t a
9394 withCLikeComments = setComment (lineComment "//" <|>
9495 simpleBlockComment "/*" "*/")
9596
9697 -- | Haskell line-oriented comments start with @--@ and last
9798 -- until the end of the line.
98 withHaskellLineComments :: SExprSpec t a -> SExprSpec t a
99 withHaskellLineComments :: SExprParser t a -> SExprParser t a
99100 withHaskellLineComments = setComment (lineComment "--")
100101
101102 -- | Haskell block comments start with @{-@ and end with @-}@.
102103 -- They do not nest.
103 withHaskellBlockComments :: SExprSpec t a -> SExprSpec t a
104 withHaskellBlockComments :: SExprParser t a -> SExprParser t a
104105 withHaskellBlockComments = setComment (simpleBlockComment "{-" "-}")
105106
106107 -- | Haskell comments include both the line-oriented @--@ comments
107108 -- and the block-oriented @{- ... -}@ comments
108 withHaskellComments :: SExprSpec t a -> SExprSpec t a
109 withHaskellComments :: SExprParser t a -> SExprParser t a
109110 withHaskellComments = setComment (lineComment "--" <|>
110111 simpleBlockComment "{-" "-}")
111112
112113 -- | Many scripting and shell languages use these, which begin with
113114 -- @#@ and last until the end of the line.
114 withOctothorpeComments :: SExprSpec t a -> SExprSpec t a
115 withOctothorpeComments :: SExprParser t a -> SExprParser t a
115116 withOctothorpeComments = setComment (lineComment "#")
116117
117118
118119 {- $intro
119120
120 By default a 'SExprSpec' will not understand any kind of comment
121 By default a 'SExprParser' will not understand any kind of comment
121122 syntax. Most varieties of s-expression will, however, want some kind
122123 of commenting capability, so the below functions will produce a new
123 'SExprSpec' which understands various kinds of comment syntaxes.
124 'SExprParser' which understands various kinds of comment syntaxes.
124125
125126 For example:
126127
127 > mySpec :: SExprSpec Text (SExpr Text)
128 > mySpec = asWellFormed $ mkSpec (pack <$> many1 alphaNum) id
128 > mySpec :: SExprParser Text (SExpr Text)
129 > mySpec = asWellFormed $ mkParser (pack <$> many1 alphaNum)
129130 >
130 > myLispySpec :: SExprSpec Text (SExpr Text)
131 > myLispySpec :: SExprParser Text (SExpr Text)
131132 > myLispySpec = withLispComments mySpec
132133 >
133 > myCLikeSpec :: SExprSpec Text (SExpr Text)
134 > myCLikeSpec :: SExprParser Text (SExpr Text)
134135 > myCLikeSpec = withCLikeComment mySpec
135136
136137 We can then use these to parse s-expressions with different kinds of
137138 comment syntaxes:
138139
139 > > decode mySpec "(foo ; a lisp comment\n bar)\n"
140 > Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom"
141 > > decode myLispySpec "(foo ; a lisp comment\n bar)\n"
142 > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
143 > > decode mySpec "(foo /* a c-like\n comment */ bar)\n"
144 > Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom"
145 > > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n"
146 > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
140 >>> decode mySpec "(foo ; a lisp comment\n bar)\n"
141 Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom"
142 >>> decode myLispySpec "(foo ; a lisp comment\n bar)\n"
143 Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
144 >>> decode mySpec "(foo /* a c-like\n comment */ bar)\n"
145 Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom"
146 >>> decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n"
147 Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
147148
148149 -}
149150
44 , parseR6RSIdent
55 , parseR7RSIdent
66 -- * Numeric Literal Parsers
7 , signed
8 , prefixedNumber
9 , signedPrefixedNumber
710 , binNumber
811 , signedBinNumber
912 , octNumber
1417 , signedDozNumber
1518 , hexNumber
1619 , signedHexNumber
17 , signed
1820 ) where
1921
2022 import Data.Char
4244
4345 -- | Parse an identifier according to the R6RS Scheme standard. An
4446 -- R6RS identifier may include inline hexadecimal escape sequences
45 -- so that, for example, @foo@ is equivalent to @f\x6f;o@, and is
47 -- so that, for example, @foo@ is equivalent to @f\\x6f;o@, and is
4648 -- more liberal than R5RS as to which Unicode characters it may
4749 -- accept.
4850 parseR6RSIdent :: Parser Text
141143 signed :: Num a => Parser a -> Parser a
142144 signed p = ($) <$> sign <*> p
143145
146 -- | Parses a number in the same way as 'prefixedNumber', with an optional
147 -- leading @+@ or @-@.
148 signedPrefixedNumber :: Parser Integer
149 signedPrefixedNumber = signed prefixedNumber
150
151 -- | Parses a number, determining which numeric base to use by examining
152 -- the literal's prefix: @0x@ for a hexadecimal number, @0z@ for a
153 -- dozenal number, @0o@ for an octal number, and @0b@ for a binary
154 -- number (as well as the upper-case versions of the same.) If the
155 -- base is omitted entirely, then it is treated as a decimal number.
156 prefixedNumber :: Parser Integer
157 prefixedNumber = (string "0x" <|> string "0X") *> hexNumber
158 <|> (string "0o" <|> string "0O") *> octNumber
159 <|> (string "0z" <|> string "0Z") *> dozNumber
160 <|> (string "0b" <|> string "0B") *> binNumber
161 <|> decNumber
162
144163 -- | A parser for non-signed binary numbers
145164 binNumber :: Parser Integer
146165 binNumber = number 2 (char '0' <|> char '1')
147166
148167 -- | A parser for signed binary numbers, with an optional leading @+@ or @-@.
149168 signedBinNumber :: Parser Integer
150 signedBinNumber = ($) <$> sign <*> binNumber
169 signedBinNumber = signed binNumber
151170
152171 -- | A parser for non-signed octal numbers
153172 octNumber :: Parser Integer
+0
-286
Data/SCargot/General.hs less more
1 {-# LANGUAGE ViewPatterns #-}
2 {-# LANGUAGE OverloadedStrings #-}
3
4 module Data.SCargot.General
5 ( -- * SExprSpec
6 SExprSpec
7 , mkSpec
8 , convertSpec
9 , addReader
10 , setComment
11 -- * Specific SExprSpec Conversions
12 , asRich
13 , asWellFormed
14 , withQuote
15 -- * Using a SExprSpec
16 , decode
17 , decodeOne
18 , encode
19 , encodeOne
20 -- * Useful Type Aliases
21 , Reader
22 , Comment
23 , Serializer
24 ) where
25
26 import Control.Applicative ((<*), (*>), (<*>), (<$>), pure)
27 import Control.Monad ((>=>))
28 import Data.Char (isAlpha, isDigit, isAlphaNum)
29 import Data.Map.Strict (Map)
30 import qualified Data.Map.Strict as M
31 import Data.Maybe (fromJust)
32 import Data.Monoid ((<>))
33 import Data.String (IsString)
34 import Data.Text (Text, pack, unpack)
35 import qualified Data.Text as T
36 import Text.Parsec ( (<|>)
37 , (<?>)
38 , char
39 , eof
40 , lookAhead
41 , many1
42 , runParser
43 , skipMany
44 )
45 import Text.Parsec.Char (anyChar, space)
46 import Text.Parsec.Text (Parser)
47
48 import Data.SCargot.Repr ( SExpr(..)
49 , RichSExpr
50 , WellFormedSExpr
51 , fromRich
52 , toRich
53 , fromWellFormed
54 , toWellFormed
55 )
56
57 type ReaderMacroMap atom = Map Char (Reader atom)
58
59 -- | A 'Reader' represents a reader macro: it takes a parser for
60 -- the S-Expression type and performs as much or as little
61 -- parsing as it would like, and then returns an S-expression.
62 type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
63
64 -- | A 'Comment' represents any kind of skippable comment. This
65 -- parser __must__ be able to fail if a comment is not being
66 -- recognized, and it __must__ not consume any input.
67 type Comment = Parser ()
68
69 -- | A 'Serializer' is any function which can serialize an Atom
70 -- to 'Text'.
71 type Serializer atom = atom -> Text
72
73 -- | A 'SExprSpec' describes a parser and emitter for a particular
74 -- variant of S-Expressions. The @atom@ type corresponds to a
75 -- Haskell type used to represent the atoms, and the @carrier@
76 -- type corresponds to the parsed S-Expression structure. The
77 -- 'SExprSpec' type is deliberately opaque so that it must be
78 -- constructed and modified with other helper functions.
79 data SExprSpec atom carrier = SExprSpec
80 { sesPAtom :: Parser atom
81 , sesSAtom :: Serializer atom
82 , readerMap :: ReaderMacroMap atom
83 , comment :: Maybe Comment
84 , postparse :: SExpr atom -> Either String carrier
85 , preserial :: carrier -> SExpr atom
86 }
87
88 -- | Create a basic 'SExprSpec' when given a parser and serializer
89 -- for an atom type. A small minimal 'SExprSpec' that recognizes
90 -- any alphanumeric sequence as a valid atom looks like:
91 --
92 -- > simpleSpec :: SExprSpec Text (SExpr Text)
93 -- > simpleSpec = mkSpec (pack <$> many1 isAlphaNum) id
94 mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
95 mkSpec p s = SExprSpec
96 { sesPAtom = p <?> "atom"
97 , sesSAtom = s
98 , readerMap = M.empty
99 , comment = Nothing
100 , postparse = return
101 , preserial = id
102 }
103
104 -- | Modify the carrier type for a 'SExprSpec'. This is
105 -- used internally to convert between various 'SExpr' representations,
106 -- but could also be used externally to add an extra conversion layer
107 -- onto a 'SExprSpec'.
108 --
109 -- The following defines an S-expression spec that recognizes the
110 -- language of binary addition trees. It does so by first transforming
111 -- the internal S-expression representation using 'asWellFormed', and
112 -- then providing a conversion between the 'WellFormedSExpr' type and
113 -- an @Expr@ AST. Notice that the below parser uses 'String' as its
114 -- underlying atom type, instead of some token type.
115 --
116 -- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show)
117 -- >
118 -- > toExpr :: WellFormedSExpr String -> Either String Expr
119 -- > toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r
120 -- > toExpr (A c) | all isDigit c = pure (Num (read c))
121 -- > toExpr c = Left ("Invalid expr: " ++ show c)
122 -- >
123 -- > fromExpr :: Expr -> WellFormedSExpr String
124 -- > fromExpr (Add l r) = L [A "+", fromExpr l, fromExpr r]
125 -- > fromExpr (Num n) = A (show n)
126 -- >
127 -- > mySpec :: SExprSpec String Expr
128 -- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack
129 -- > where parser = many1 (satisfy isValidChar)
130 -- > isValidChar c = isDigit c || c == '+'
131 convertSpec :: (b -> Either String c) -> (c -> b)
132 -> SExprSpec a b -> SExprSpec a c
133 convertSpec f g spec = spec
134 { postparse = postparse spec >=> f
135 , preserial = preserial spec . g
136 }
137
138 -- | Convert the final output representation from the 'SExpr' type
139 -- to the 'RichSExpr' type.
140 asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b)
141 asRich = convertSpec (return . toRich) fromRich
142
143 -- | Convert the final output representation from the 'SExpr' type
144 -- to the 'WellFormedSExpr' type.
145 asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b)
146 asWellFormed = convertSpec toWellFormed fromWellFormed
147
148 -- | Add the ability to execute some particular reader macro, as
149 -- defined by its initial character and the 'Parser' which returns
150 -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
151 -- can be recursively called to parse more S-Expressions, and begins
152 -- parsing after the reader character has been removed from the
153 -- stream.
154 --
155 -- The following defines an S-expression variant that treats
156 -- @'expr@ as being sugar for @(quote expr)@. Note that this is done
157 -- already in a more general way by the 'withQuote' function, but
158 -- it is a good illustration of using reader macros in practice:
159 --
160 -- > mySpec :: SExprSpec String (SExpr Text)
161 -- > mySpec = addReader '\'' reader $ mkSpec (many1 alphaNum) pack
162 -- > where reader p = quote <$> p
163 -- > quote e = SCons (SAtom "quote") (SCons e SNil)
164 addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
165 addReader c reader spec = spec
166 { readerMap = M.insert c reader (readerMap spec) }
167
168 -- | Add the ability to ignore some kind of comment. This gets
169 -- factored into whitespace parsing, and it's very important that
170 -- the parser supplied __be able to fail__ (as otherwise it will
171 -- cause an infinite loop), and also that it __not consume any input__
172 -- (which may require it to be wrapped in 'try'.)
173 --
174 -- The following code defines an S-expression variant that skips
175 -- C++-style comments, i.e. those which begin with @//@ and last
176 -- until the end of a line:
177 --
178 -- > t :: SExprSpec String (SExpr Text)
179 -- > t = setComment comm $ mkSpec (many1 alphaNum) pack
180 -- > where comm = try (string "//" *> manyTill newline *> pure ())
181
182 setComment :: Comment -> SExprSpec a c -> SExprSpec a c
183 setComment c spec = spec { comment = Just (c <?> "comment") }
184
185 -- | Add the ability to understand a quoted S-Expression. In general,
186 -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
187 -- a convenience function which allows you to easily add quoted
188 -- expressions to a 'SExprSpec', provided that you supply which
189 -- atom you want substituted in for the symbol @quote@.
190 withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t)
191 withQuote = addReader '\'' (fmap go)
192 where go s = SCons "quote" (SCons s SNil)
193
194 peekChar :: Parser (Maybe Char)
195 peekChar = Just <$> lookAhead anyChar <|> pure Nothing
196
197 parseGenericSExpr ::
198 Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
199 parseGenericSExpr atom reader skip = do
200 let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
201 skip
202 c <- peekChar
203 r <- case c of
204 Nothing -> fail "Unexpected end of input"
205 Just '(' -> char '(' >> skip >> parseList sExpr skip
206 Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
207 _ -> SAtom `fmap` atom
208 skip
209 return r
210
211 parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
212 parseList sExpr skip = do
213 i <- peekChar
214 case i of
215 Nothing -> fail "Unexpected end of input"
216 Just ')' -> char ')' >> return SNil
217 _ -> do
218 car <- sExpr
219 skip
220 c <- peekChar
221 case c of
222 Just '.' -> do
223 char '.'
224 cdr <- sExpr
225 skip
226 char ')'
227 skip
228 return (SCons car cdr)
229 Just ')' -> do
230 char ')'
231 skip
232 return (SCons car SNil)
233 _ -> do
234 cdr <- parseList sExpr skip
235 return (SCons car cdr)
236
237 -- | Given a CommentMap, create the corresponding parser to
238 -- skip those comments (if they exist).
239 buildSkip :: Maybe (Parser ()) -> Parser ()
240 buildSkip Nothing = skipMany space
241 buildSkip (Just c) = alternate
242 where alternate = skipMany space >> ((c >> alternate) <|> return ())
243
244 doParse :: Parser a -> Text -> Either String a
245 doParse p t = case runParser p () "" t of
246 Left err -> Left (show err)
247 Right x -> Right x
248
249 -- | Decode a single S-expression. If any trailing input is left after
250 -- the S-expression (ignoring comments or whitespace) then this
251 -- will fail: for those cases, use 'decode', which returns a list of
252 -- all the S-expressions found at the top level.
253 decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
254 decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
255 where parser = parseGenericSExpr
256 (sesPAtom spec)
257 (readerMap spec)
258 (buildSkip (comment spec))
259
260 -- | Decode several S-expressions according to a given 'SExprSpec'. This
261 -- will return a list of every S-expression that appears at the top-level
262 -- of the document.
263 decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
264 decode spec =
265 doParse (many1 parser <* eof) >=> mapM (postparse spec)
266 where parser = parseGenericSExpr
267 (sesPAtom spec)
268 (readerMap spec)
269 (buildSkip (comment spec))
270
271 -- | Encode (without newlines) a single S-expression.
272 encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
273 encodeSExpr SNil _ = "()"
274 encodeSExpr (SAtom s) t = t s
275 encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
276 where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
277 go SNil rs = "(" <> rs <> ")"
278 go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)
279
280 -- | Emit an S-Expression in a machine-readable way. This does no
281 -- pretty-printing or indentation, and produces no comments.
282 encodeOne :: SExprSpec atom carrier -> carrier -> Text
283 encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
284
285 encode :: SExprSpec atom carrier -> [carrier] -> Text
286 encode spec cs = T.concat (map (encodeOne spec) cs)
+0
-140
Data/SCargot/HaskLike.hs less more
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Data.SCargot.HaskLike ( -- $info
4 haskLikeSpec
5 , HaskLikeAtom(..)
6 ) where
7
8 import Control.Applicative ((<$>), (<*>), (<$))
9 import Data.Maybe (catMaybes)
10 import Data.String (IsString(..))
11 import Data.Text (Text, pack)
12 import Text.Parsec
13 import Text.Parsec.Text (Parser)
14
15 import Prelude hiding (concatMap)
16
17 import Data.SCargot.Common
18 import Data.SCargot.Repr.Basic (SExpr)
19 import Data.SCargot.General (SExprSpec, mkSpec)
20
21 {- $info
22
23 This module is intended for simple, ad-hoc configuration or data formats
24 that might not need their on rich structure but might benefit from a few
25 various kinds of literals. The 'haskLikeSpec' understands identifiers as
26 defined by R5RS, as well as string, integer, and floating-point literals
27 as defined by the Haskell spec. It does _not_ natively understand other
28 data types, such as booleans, vectors, bitstrings, or the like.
29
30 -}
31
32
33 -- | An atom type that understands Haskell-like values as well as
34 -- Scheme-like identifiers.
35 data HaskLikeAtom
36 = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme
37 -- standard
38 | HSString Text -- ^ A string, parsed according to the syntax for string
39 -- literals in the Haskell report
40 | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to
41 -- the syntax for integer literals in the Haskell report
42 | HSFloat Double -- ^ A double-precision floating-point value, parsed
43 -- according to the syntax for floats in the Haskell
44 -- report
45 deriving (Eq, Show)
46
47 instance IsString HaskLikeAtom where
48 fromString = HSIdent . fromString
49
50 pString :: Parser Text
51 pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
52 where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
53 esc = do char '\\'
54 Nothing <$ (gap <|> char '&') <|>
55 Just <$> code
56 gap = many1 space >> char '\\'
57 code = eEsc <|> eNum <|> eCtrl <|> eAscii
58 eCtrl = char '^' >> unCtrl <$> upper
59 eNum = (toEnum . fromInteger) <$>
60 (decNumber <|> (char 'o' >> octNumber)
61 <|> (char 'x' >> hexNumber))
62 eEsc = choice [ char a >> return b | (a, b) <- escMap ]
63 eAscii = choice [ try (string a >> return b)
64 | (a, b) <- asciiMap ]
65 unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1)
66
67 escMap :: [(Char, Char)]
68 escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
69
70 asciiMap :: [(String, Char)]
71 asciiMap = zip
72 ["BS","HT","LF","VT","FF","CR","SO","SI","EM"
73 ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX"
74 ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3"
75 ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"]
76 ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++
77 "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
78 "\SYN\ETB\CAN\SUB\ESC\DEL")
79
80 pFloat :: Parser Double
81 pFloat = do
82 n <- decNumber
83 withDot n <|> noDot n
84 where withDot n = do
85 char '.'
86 m <- decNumber
87 e <- option 1.0 exponent
88 return ((fromIntegral n + asDec m 0) * e)
89 noDot n = do
90 e <- exponent
91 return (fromIntegral n * e)
92 exponent = do
93 oneOf "eE"
94 s <- power
95 x <- decNumber
96 return (10 ** s (fromIntegral x))
97 asDec 0 k = k
98 asDec n k =
99 asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1)
100
101 power :: Num a => Parser (a -> a)
102 power = negate <$ char '-' <|> id <$ char '+' <|> return id
103
104 pInt :: Parser Integer
105 pInt = do
106 s <- power
107 n <- pZeroNum <|> decNumber
108 return (fromIntegral (s n))
109
110 pZeroNum :: Parser Integer
111 pZeroNum = char '0' >>
112 ( (oneOf "xX" >> hexNumber)
113 <|> (oneOf "oO" >> octNumber)
114 <|> decNumber
115 <|> return 0
116 )
117
118 pHaskLikeAtom :: Parser HaskLikeAtom
119 pHaskLikeAtom
120 = HSFloat <$> (try pFloat <?> "float")
121 <|> HSInt <$> (try pInt <?> "integer")
122 <|> HSString <$> (pString <?> "string literal")
123 <|> HSIdent <$> (parseR5RSIdent <?> "token")
124
125 sHaskLikeAtom :: HaskLikeAtom -> Text
126 sHaskLikeAtom (HSIdent t) = t
127 sHaskLikeAtom (HSString s) = pack (show s)
128 sHaskLikeAtom (HSInt i) = pack (show i)
129 sHaskLikeAtom (HSFloat f) = pack (show f)
130
131 -- | This `SExprSpec` understands s-expressions that contain
132 -- Scheme-like tokens, as well as string literals, integer
133 -- literals, and floating-point literals. Each of these values
134 -- is parsed according to the lexical rules in the Haskell
135 -- report, so the same set of string escapes, numeric bases,
136 -- and floating-point options are available. This spec does
137 -- not parse comments and does not understand any reader
138 -- macros.
139 haskLikeSpec :: SExprSpec HaskLikeAtom (SExpr HaskLikeAtom)
140 haskLikeSpec = mkSpec pHaskLikeAtom sHaskLikeAtom
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Data.SCargot.Language.Basic
4 ( -- * Spec
5 -- $descr
6 basicParser
7 , basicPrinter
8 ) where
9
10 import Control.Applicative ((<$>))
11 import Data.Char (isAlphaNum)
12 import Text.Parsec (many1, satisfy)
13 import Data.Text (Text, pack)
14
15 import Data.SCargot.Repr.Basic (SExpr)
16 import Data.SCargot ( SExprParser
17 , SExprPrinter
18 , mkParser
19 , flatPrint
20 )
21 import Data.SCargot.Comments (withLispComments)
22
23 isAtomChar :: Char -> Bool
24 isAtomChar c = isAlphaNum c
25 || c == '-' || c == '*' || c == '/'
26 || c == '+' || c == '<' || c == '>'
27 || c == '=' || c == '!' || c == '?'
28
29 -- $descr
30 -- The 'basicSpec' describes S-expressions whose atoms are simply
31 -- text strings that contain alphanumeric characters and a small
32 -- set of punctuation. It does no parsing of numbers or other data
33 -- types, and will accept tokens that typical Lisp implementations
34 -- would find nonsensical (like @77foo@).
35 --
36 -- Atoms recognized by the 'basicSpec' are any string matching the
37 -- regular expression @[A-Za-z0-9+*<>/=!?-]+@.
38
39 -- | A 'SExprParser' that understands atoms to be sequences of
40 -- alphanumeric characters as well as the punctuation
41 -- characters @[-*/+<>=!?]@, and does no processing of them.
42 --
43 -- >>> decode basicParser "(1 elephant)"
44 -- Right [SCons (SAtom "1") (SCons (SAtom "elephant") SNil)]
45 basicParser :: SExprParser Text (SExpr Text)
46 basicParser = mkParser pToken
47 where pToken = pack <$> many1 (satisfy isAtomChar)
48
49 -- | A 'SExprPrinter' that prints textual atoms directly (without quoting
50 -- or any other processing) onto a single line.
51 --
52 -- >>> encode basicPrinter [L [A "1", A "elephant"]]
53 -- "(1 elephant)"
54 basicPrinter :: SExprPrinter Text (SExpr Text)
55 basicPrinter = flatPrint id
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Data.SCargot.Language.HaskLike
4 ( -- $info
5 HaskLikeAtom(..)
6 , haskLikeParser
7 , haskLikePrinter
8 ) where
9
10 import Control.Applicative ((<$>), (<*>), (<$))
11 import Data.Maybe (catMaybes)
12 import Data.String (IsString(..))
13 import Data.Text (Text, pack)
14 import Text.Parsec
15 import Text.Parsec.Text (Parser)
16
17 import Prelude hiding (concatMap)
18
19 import Data.SCargot.Common
20 import Data.SCargot.Repr.Basic (SExpr)
21 import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint)
22
23 {- $info
24
25 This module is intended for simple, ad-hoc configuration or data formats
26 that might not need their on rich structure but might benefit from a few
27 various kinds of literals. The 'haskLikeParser' understands identifiers as
28 defined by R5RS, as well as string, integer, and floating-point literals
29 as defined by the Haskell spec. It does __not__ natively understand other
30 data types, such as booleans, vectors, bitstrings.
31
32 -}
33
34
35 -- | An atom type that understands Haskell-like values as well as
36 -- Scheme-like identifiers.
37 data HaskLikeAtom
38 = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme
39 -- standard
40 | HSString Text -- ^ A string, parsed according to the syntax for string
41 -- literals in the Haskell report
42 | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to
43 -- the syntax for integer literals in the Haskell report
44 | HSFloat Double -- ^ A double-precision floating-point value, parsed
45 -- according to the syntax for floats in the Haskell
46 -- report
47 deriving (Eq, Show)
48
49 instance IsString HaskLikeAtom where
50 fromString = HSIdent . fromString
51
52 pString :: Parser Text
53 pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
54 where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
55 esc = do char '\\'
56 Nothing <$ (gap <|> char '&') <|>
57 Just <$> code
58 gap = many1 space >> char '\\'
59 code = eEsc <|> eNum <|> eCtrl <|> eAscii
60 eCtrl = char '^' >> unCtrl <$> upper
61 eNum = (toEnum . fromInteger) <$>
62 (decNumber <|> (char 'o' >> octNumber)
63 <|> (char 'x' >> hexNumber))
64 eEsc = choice [ char a >> return b | (a, b) <- escMap ]
65 eAscii = choice [ try (string a >> return b)
66 | (a, b) <- asciiMap ]
67 unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1)
68
69 escMap :: [(Char, Char)]
70 escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
71
72 asciiMap :: [(String, Char)]
73 asciiMap = zip
74 ["BS","HT","LF","VT","FF","CR","SO","SI","EM"
75 ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX"
76 ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3"
77 ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"]
78 ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++
79 "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
80 "\SYN\ETB\CAN\SUB\ESC\DEL")
81
82 pFloat :: Parser Double
83 pFloat = do
84 n <- decNumber
85 withDot n <|> noDot n
86 where withDot n = do
87 char '.'
88 m <- decNumber
89 e <- option 1.0 exponent
90 return ((fromIntegral n + asDec m 0) * e)
91 noDot n = do
92 e <- exponent
93 return (fromIntegral n * e)
94 exponent = do
95 oneOf "eE"
96 s <- power
97 x <- decNumber
98 return (10 ** s (fromIntegral x))
99 asDec 0 k = k
100 asDec n k =
101 asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1)
102
103 power :: Num a => Parser (a -> a)
104 power = negate <$ char '-' <|> id <$ char '+' <|> return id
105
106 pInt :: Parser Integer
107 pInt = do
108 s <- power
109 n <- pZeroNum <|> decNumber
110 return (fromIntegral (s n))
111
112 pZeroNum :: Parser Integer
113 pZeroNum = char '0' >>
114 ( (oneOf "xX" >> hexNumber)
115 <|> (oneOf "oO" >> octNumber)
116 <|> decNumber
117 <|> return 0
118 )
119
120 pHaskLikeAtom :: Parser HaskLikeAtom
121 pHaskLikeAtom
122 = HSFloat <$> (try pFloat <?> "float")
123 <|> HSInt <$> (try pInt <?> "integer")
124 <|> HSString <$> (pString <?> "string literal")
125 <|> HSIdent <$> (parseR5RSIdent <?> "token")
126
127 sHaskLikeAtom :: HaskLikeAtom -> Text
128 sHaskLikeAtom (HSIdent t) = t
129 sHaskLikeAtom (HSString s) = pack (show s)
130 sHaskLikeAtom (HSInt i) = pack (show i)
131 sHaskLikeAtom (HSFloat f) = pack (show f)
132
133 -- | This `SExprParser` understands s-expressions that contain
134 -- Scheme-like tokens, as well as string literals, integer
135 -- literals, and floating-point literals. Each of these values
136 -- is parsed according to the lexical rules in the Haskell
137 -- report, so the same set of string escapes, numeric bases,
138 -- and floating-point options are available. This spec does
139 -- not parse comments and does not understand any reader
140 -- macros.
141 --
142 -- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")"
143 -- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)]
144 haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom)
145 haskLikeParser = mkParser pHaskLikeAtom
146
147 -- | This 'SExprPrinter' emits s-expressions that contain Scheme-like
148 -- tokens as well as string literals, integer literals, and floating-point
149 -- literals, which will be emitted as the literals produced by Haskell's
150 -- 'show' function. This printer will produce a flat s-expression with
151 -- no indentation of any kind.
152 --
153 -- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]]
154 -- "(1 \"elephant\")"
155 haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom)
156 haskLikePrinter = flatPrint sHaskLikeAtom
1 {-# LANGUAGE ViewPatterns #-}
2 {-# LANGUAGE OverloadedStrings #-}
3
4 module Data.SCargot.Parse
5 ( -- * Parsing
6 decode
7 , decodeOne
8 -- * Parsing Control
9 , SExprParser
10 , Reader
11 , Comment
12 , mkParser
13 , setCarrier
14 , addReader
15 , setComment
16 -- * Specific SExprParser Conversions
17 , asRich
18 , asWellFormed
19 , withQuote
20 ) where
21
22 import Control.Applicative ((<*), (*>), (<*>), (<$>), pure)
23 import Control.Monad ((>=>))
24 import Data.Char (isAlpha, isDigit, isAlphaNum)
25 import Data.Map.Strict (Map)
26 import qualified Data.Map.Strict as M
27 import Data.Maybe (fromJust)
28 import Data.Monoid ((<>))
29 import Data.String (IsString)
30 import Data.Text (Text, pack, unpack)
31 import qualified Data.Text as T
32 import Text.Parsec ( (<|>)
33 , (<?>)
34 , char
35 , eof
36 , lookAhead
37 , many1
38 , runParser
39 , skipMany
40 )
41 import Text.Parsec.Char (anyChar, space)
42 import Text.Parsec.Text (Parser)
43
44 import Data.SCargot.Repr ( SExpr(..)
45 , RichSExpr
46 , WellFormedSExpr
47 , fromRich
48 , toRich
49 , fromWellFormed
50 , toWellFormed
51 )
52
53 type ReaderMacroMap atom = Map Char (Reader atom)
54
55 -- | A 'Reader' represents a reader macro: it takes a parser for
56 -- the S-Expression type and performs as much or as little
57 -- parsing as it would like, and then returns an S-expression.
58 type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
59
60 -- | A 'Comment' represents any kind of skippable comment. This
61 -- parser __must__ be able to fail if a comment is not being
62 -- recognized, and it __must__ not consume any input in case
63 -- of failure.
64 type Comment = Parser ()
65
66 -- | A 'SExprParser' describes a parser for a particular value
67 -- that has been serialized as an s-expression. The @atom@ parameter
68 -- corresponds to a Haskell type used to represent the atoms,
69 -- and the @carrier@ parameter corresponds to the parsed S-Expression
70 -- structure.
71 data SExprParser atom carrier = SExprParser
72 { sesPAtom :: Parser atom
73 , readerMap :: ReaderMacroMap atom
74 , comment :: Maybe Comment
75 , postparse :: SExpr atom -> Either String carrier
76 }
77
78 -- | Create a basic 'SExprParser' when given a parser
79 -- for an atom type.
80 --
81 -- >>> import Text.Parsec (alphaNum, many1)
82 -- >>> let parser = mkParser (many1 alphaNum)
83 -- >>> decode parser "(ele phant)"
84 -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
85 mkParser :: Parser atom -> SExprParser atom (SExpr atom)
86 mkParser parser = SExprParser
87 { sesPAtom = parser
88 , readerMap = M.empty
89 , comment = Nothing
90 , postparse = return
91 }
92
93 -- | Modify the carrier type for a 'SExprParser'. This is
94 -- used internally to convert between various 'SExpr' representations,
95 -- but could also be used externally to add an extra conversion layer
96 -- onto a 'SExprParser'.
97 --
98 -- >>> import Text.Parsec (alphaNum, many1)
99 -- >>> import Data.SCargot.Repr (toRich)
100 -- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
101 -- >>> decode parser "(ele phant)"
102 -- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
103 setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
104 setCarrier f spec = spec { postparse = postparse spec >=> f }
105
106 -- | Convert the final output representation from the 'SExpr' type
107 -- to the 'RichSExpr' type.
108 --
109 -- >>> import Text.Parsec (alphaNum, many1)
110 -- >>> let parser = asRich (mkParser (many1 alphaNum))
111 -- >>> decode parser "(ele phant)"
112 -- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
113 asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
114 asRich = setCarrier (return . toRich)
115
116 -- | Convert the final output representation from the 'SExpr' type
117 -- to the 'WellFormedSExpr' type.
118 --
119 -- >>> import Text.Parsec (alphaNum, many1)
120 -- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
121 -- >>> decode parser "(ele phant)"
122 -- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
123 asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
124 asWellFormed = setCarrier toWellFormed
125
126 -- | Add the ability to execute some particular reader macro, as
127 -- defined by its initial character and the 'Parser' which returns
128 -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
129 -- can be recursively called to parse more S-Expressions, and begins
130 -- parsing after the reader character has been removed from the
131 -- stream.
132 --
133 -- >>> import Text.Parsec (alphaNum, char, many1)
134 -- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
135 -- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
136 -- >>> decode parser "(an [ele phant])"
137 -- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]
138
139 addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
140 addReader c reader spec = spec
141 { readerMap = M.insert c reader (readerMap spec) }
142
143 -- | Add the ability to ignore some kind of comment. This gets
144 -- factored into whitespace parsing, and it's very important that
145 -- the parser supplied __be able to fail__ (as otherwise it will
146 -- cause an infinite loop), and also that it __not consume any input__
147 -- (which may require it to be wrapped in 'try'.)
148 --
149 -- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
150 -- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
151 -- >>> let parser = setComment comment (mkParser (many1 alphaNum))
152 -- >>> decode parser "(ele //a comment\n phant)"
153 -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
154
155 setComment :: Comment -> SExprParser a c -> SExprParser a c
156 setComment c spec = spec { comment = Just (c <?> "comment") }
157
158 -- | Add the ability to understand a quoted S-Expression.
159 -- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
160 -- assumes that the underlying atom type implements the "IsString"
161 -- class, and will create the @quote@ atom using @fromString "quote"@.
162 --
163 -- >>> import Text.Parsec (alphaNum, many1)
164 -- >>> let parser = withQuote (mkParser (many1 alphaNum))
165 -- >>> decode parser "'elephant"
166 -- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
167 withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
168 withQuote = addReader '\'' (fmap go)
169 where go s = SCons "quote" (SCons s SNil)
170
171 peekChar :: Parser (Maybe Char)
172 peekChar = Just <$> lookAhead anyChar <|> pure Nothing
173
174 parseGenericSExpr ::
175 Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
176 parseGenericSExpr atom reader skip = do
177 let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
178 skip
179 c <- peekChar
180 r <- case c of
181 Nothing -> fail "Unexpected end of input"
182 Just '(' -> char '(' >> skip >> parseList sExpr skip
183 Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
184 _ -> SAtom `fmap` atom
185 skip
186 return r
187
188 parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
189 parseList sExpr skip = do
190 i <- peekChar
191 case i of
192 Nothing -> fail "Unexpected end of input"
193 Just ')' -> char ')' >> return SNil
194 _ -> do
195 car <- sExpr
196 skip
197 c <- peekChar
198 case c of
199 Just '.' -> do
200 char '.'
201 cdr <- sExpr
202 skip
203 char ')'
204 skip
205 return (SCons car cdr)
206 Just ')' -> do
207 char ')'
208 skip
209 return (SCons car SNil)
210 _ -> do
211 cdr <- parseList sExpr skip
212 return (SCons car cdr)
213
214 -- | Given a CommentMap, create the corresponding parser to
215 -- skip those comments (if they exist).
216 buildSkip :: Maybe (Parser ()) -> Parser ()
217 buildSkip Nothing = skipMany space
218 buildSkip (Just c) = alternate
219 where alternate = skipMany space >> ((c >> alternate) <|> return ())
220
221 doParse :: Parser a -> Text -> Either String a
222 doParse p t = case runParser p () "" t of
223 Left err -> Left (show err)
224 Right x -> Right x
225
226 -- | Decode a single S-expression. If any trailing input is left after
227 -- the S-expression (ignoring comments or whitespace) then this
228 -- will fail: for those cases, use 'decode', which returns a list of
229 -- all the S-expressions found at the top level.
230 decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
231 decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
232 where parser = parseGenericSExpr
233 (sesPAtom spec)
234 (readerMap spec)
235 (buildSkip (comment spec))
236
237 -- | Decode several S-expressions according to a given 'SExprParser'. This
238 -- will return a list of every S-expression that appears at the top-level
239 -- of the document.
240 decode :: SExprParser atom carrier -> Text -> Either String [carrier]
241 decode spec =
242 doParse (many1 parser <* eof) >=> mapM (postparse spec)
243 where parser = parseGenericSExpr
244 (sesPAtom spec)
245 (readerMap spec)
246 (buildSkip (comment spec))
247
248 {-
249 -- | Encode (without newlines) a single S-expression.
250 encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
251 encodeSExpr SNil _ = "()"
252 encodeSExpr (SAtom s) t = t s
253 encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
254 where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
255 go SNil rs = "(" <> rs <> ")"
256 go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)
257
258 -- | Emit an S-Expression in a machine-readable way. This does no
259 -- pretty-printing or indentation, and produces no comments.
260 encodeOne :: SExprParser atom carrier -> carrier -> Text
261 encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
262
263 encode :: SExprParser atom carrier -> [carrier] -> Text
264 encode spec cs = T.concat (map (encodeOne spec) cs)
265 -}
+0
-144
Data/SCargot/Pretty.hs less more
1 {-# LANGUAGE RecordWildCards #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE ScopedTypeVariables #-}
4
5 module Data.SCargot.Pretty
6 ( -- * Pretty-Printing
7 prettyPrintSExpr
8 -- * Pretty-Printing Control
9 , LayoutOptions(..)
10 , Indent(..)
11 -- * Default Printing Strategies
12 , basicPrint
13 , flatPrint
14 ) where
15
16 import Data.Monoid ((<>))
17 import Data.Text (Text)
18 import qualified Data.Text as T
19
20 import Data.SCargot.Repr
21
22 -- | The 'Indent' type is used to determine how to indent subsequent
23 -- s-expressions in a list, after printing the head of the list.
24 data Indent
25 = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed
26 -- amount more than the current line.
27 --
28 -- > (foo
29 -- > bar
30 -- > baz
31 -- > quux)
32 | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the
33 -- first @n@ expressions after the head on the same
34 -- line as the head, and all after will be swung.
35 -- 'SwingAfter' @0@ is equivalent to 'Swing'.
36 --
37 -- > (foo bar
38 -- > baz
39 -- > quux)
40 | Align -- ^ An 'Align' indent will print the first expression after
41 -- the head on the same line, and subsequent expressions will
42 -- be aligned with that one.
43 --
44 -- > (foo bar
45 -- > baz
46 -- > quux)
47 deriving (Eq, Show)
48
49 -- | A 'LayoutOptions' value describes the strategy taken in
50 -- pretty-printing a 'SExpr'.
51 data LayoutOptions a = LayoutOptions
52 { atomPrinter :: a -> Text
53 -- ^ How to serialize a given atom to 'Text'.
54 , swingIndent :: SExpr a -> Indent
55 -- ^ How to indent subsequent expressions, as determined by
56 -- the head of the list.
57 , indentAmount :: Int
58 -- ^ How much to indent after a swung indentation.
59 , maxWidth :: Maybe Int
60 -- ^ The maximum width (if any) If this is 'None' then
61 -- the resulting s-expression will always be printed
62 -- on a single line.
63 }
64
65 -- | A default 'LayoutOptions' struct that will always print a 'SExpr'
66 -- as a single line.
67 flatPrint :: (a -> Text) -> LayoutOptions a
68 flatPrint printer = LayoutOptions
69 { atomPrinter = printer
70 , swingIndent = const Swing
71 , indentAmount = 2
72 , maxWidth = Nothing
73 }
74
75 -- | A default 'LayoutOptions' struct that will always swing subsequent
76 -- expressions onto later lines if they're too long, indenting them
77 -- by two spaces.
78 basicPrint :: (a -> Text) -> LayoutOptions a
79 basicPrint printer = LayoutOptions
80 { atomPrinter = printer
81 , swingIndent = const Swing
82 , indentAmount = 2
83 , maxWidth = Just 80
84 }
85
86 -- Sort of like 'unlines' but without the trailing newline
87 joinLines :: [Text] -> Text
88 joinLines = T.intercalate "\n"
89
90 -- Indents a line by n spaces
91 indent :: Int -> Text -> Text
92 indent n ts = T.replicate n " " <> ts
93
94 -- Indents every line n spaces, and adds a newline to the beginning
95 -- used in swung indents
96 indentAll :: Int -> [Text] -> Text
97 indentAll n = ("\n" <>) . joinLines . map (indent n)
98
99 -- Indents every line but the first by some amount
100 -- used in aligned indents
101 indentSubsequent :: Int -> [Text] -> Text
102 indentSubsequent _ [] = ""
103 indentSubsequent _ [t] = t
104 indentSubsequent n (t:ts) = joinLines (t : go ts)
105 where go = map (indent n)
106
107 -- oh god this code is so disgusting
108 -- i'm sorry to everyone i let down by writing this
109 -- i swear i'll do better in the future i promise i have to
110 -- for my sake and for everyone's
111
112 -- | Pretty-print a 'SExpr' according to the options in a
113 -- 'LayoutOptions' value.
114 prettyPrintSExpr :: LayoutOptions a -> SExpr a -> Text
115 prettyPrintSExpr LayoutOptions { .. } = pHead 0
116 where pHead _ SNil = "()"
117 pHead _ (SAtom a) = atomPrinter a
118 pHead ind (SCons x xs) = gather ind x xs id
119 gather _ _ (SAtom _) _ = error "no dotted pretty printing yet!"
120 gather ind h (SCons x xs) k = gather ind h xs (k . (x:))
121 gather ind h SNil k = "(" <> hd <> body <> ")"
122 where hd = indentSubsequent ind [pHead (ind+1) h]
123 lst = k []
124 flat = T.unwords (map (pHead (ind+1)) lst)
125 headWidth = T.length hd + 1
126 indented =
127 case swingIndent h of
128 SwingAfter n ->
129 let (l, ls) = splitAt n lst
130 t = T.unwords (map (pHead (ind+1)) l)
131 ts = indentAll (ind + indentAmount)
132 (map (pHead (ind + indentAmount)) ls)
133 in t <> ts
134 Swing ->
135 indentAll (ind + indentAmount)
136 (map (pHead (ind + indentAmount)) lst)
137 Align ->
138 indentSubsequent (ind + headWidth + 1)
139 (map (pHead (ind + headWidth + 1)) lst)
140 body
141 | length lst == 0 = ""
142 | Just maxAmt <- maxWidth
143 , T.length flat + ind > maxAmt = " " <> indented
144 | otherwise = " " <> flat
1 {-# LANGUAGE RecordWildCards #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE ScopedTypeVariables #-}
4
5 module Data.SCargot.Print
6 ( -- * Pretty-Printing
7 encodeOne
8 , encode
9 -- * Pretty-Printing Control
10 , SExprPrinter
11 , Indent(..)
12 , setFromCarrier
13 , setMaxWidth
14 , removeMaxWidth
15 , setIndentAmount
16 , setIndentStrategy
17 -- * Default Printing Strategies
18 , basicPrint
19 , flatPrint
20 ) where
21
22 import Data.Monoid ((<>))
23 import Data.Text (Text)
24 import qualified Data.Text as T
25
26 import Data.SCargot.Repr
27
28 -- | The 'Indent' type is used to determine how to indent subsequent
29 -- s-expressions in a list, after printing the head of the list.
30 data Indent
31 = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed
32 -- amount more than the current line.
33 --
34 -- > (foo
35 -- > bar
36 -- > baz
37 -- > quux)
38 | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the
39 -- first @n@ expressions after the head on the same
40 -- line as the head, and all after will be swung.
41 -- 'SwingAfter' @0@ is equivalent to 'Swing'.
42 --
43 -- > (foo bar
44 -- > baz
45 -- > quux)
46 | Align -- ^ An 'Align' indent will print the first expression after
47 -- the head on the same line, and subsequent expressions will
48 -- be aligned with that one.
49 --
50 -- > (foo bar
51 -- > baz
52 -- > quux)
53 deriving (Eq, Show)
54
55 -- | A 'SExprPrinter' value describes how to print a given value as an
56 -- s-expression. The @carrier@ type parameter indicates the value
57 -- that will be printed, and the @atom@ parameter indicates the type
58 -- that will represent tokens in an s-expression structure.
59 data SExprPrinter atom carrier = SExprPrinter
60 { atomPrinter :: atom -> Text
61 -- ^ How to serialize a given atom to 'Text'.
62 , fromCarrier :: carrier -> SExpr atom
63 -- ^ How to turn a carrier type back into a 'Sexpr'.
64 , swingIndent :: SExpr atom -> Indent
65 -- ^ How to indent subsequent expressions, as determined by
66 -- the head of the list.
67 , indentAmount :: Int
68 -- ^ How much to indent after a swung indentation.
69 , maxWidth :: Maybe Int
70 -- ^ The maximum width (if any) If this is 'None' then
71 -- the resulting s-expression will always be printed
72 -- on a single line.
73 }
74
75 -- | A default 'LayoutOptions' struct that will always print a 'SExpr'
76 -- as a single line.
77 flatPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
78 flatPrint printer = SExprPrinter
79 { atomPrinter = printer
80 , fromCarrier = id
81 , swingIndent = const Swing
82 , indentAmount = 2
83 , maxWidth = Nothing
84 }
85
86 -- | A default 'LayoutOptions' struct that will always swing subsequent
87 -- expressions onto later lines if they're too long, indenting them
88 -- by two spaces.
89 basicPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
90 basicPrint printer = SExprPrinter
91 { atomPrinter = printer
92 , fromCarrier = id
93 , swingIndent = const Swing
94 , indentAmount = 2
95 , maxWidth = Just 80
96 }
97
98 -- | Modify the carrier type of a 'SExprPrinter' by describing how
99 -- to convert the new type back to the previous type. For example,
100 -- to pretty-print a well-formed s-expression, we can modify the
101 -- 'SExprPrinter' value as follows:
102 --
103 -- >>> let printer = setFromCarrier fromWellFormed (basicPrint id)
104 -- >>> encodeOne printer (WFSList [WFSAtom "ele", WFSAtom "phant"])
105 -- "(ele phant)"
106 setFromCarrier :: (c -> b) -> SExprPrinter a b -> SExprPrinter a c
107 setFromCarrier fc pr = pr { fromCarrier = fromCarrier pr . fc }
108
109 -- | Dictate a maximum width for pretty-printed s-expressions.
110 --
111 -- >>> let printer = setMaxWidth 8 (basicPrint id)
112 -- >>> encodeOne printer (L [A "one", A "two", A "three"])
113 -- "(one \n two\n three)"
114 setMaxWidth :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
115 setMaxWidth n pr = pr { maxWidth = Just n }
116
117 -- | Allow the serialized s-expression to be arbitrarily wide. This
118 -- makes all pretty-printing happen on a single line.
119 --
120 -- >>> let printer = removeMaxWidth (basicPrint id)
121 -- >>> encodeOne printer (L [A "one", A "two", A "three"])
122 -- "(one two three)"
123 removeMaxWidth :: SExprPrinter atom carrier -> SExprPrinter atom carrier
124 removeMaxWidth pr = pr { maxWidth = Nothing }
125
126 -- | Set the number of spaces that a subsequent line will be indented
127 -- after a swing indentation.
128 --
129 -- >>> let printer = setMaxWidth 12 (basicPrint id)
130 -- >>> encodeOne printer (L [A "elephant", A "pachyderm"])
131 -- "(elephant \n pachyderm)"
132 -- >>> encodeOne (setIndentAmount 4) (L [A "elephant", A "pachyderm"])
133 -- "(elephant \n pachyderm)"
134 setIndentAmount :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
135 setIndentAmount n pr = pr { indentAmount = n }
136
137 -- | Dictate how to indent subsequent lines based on the leading
138 -- subexpression in an s-expression. For details on how this works,
139 -- consult the documentation of the 'Indent' type.
140 --
141 -- >>> let indent (A "def") = SwingAfter 1; indent _ = Swing
142 -- >>> let printer = setIndentStrategy indent (setMaxWidth 8 (basicPrint id))
143 -- >>> encodeOne printer (L [ A "def", L [ A "func", A "arg" ], A "body" ])
144 -- "(def (func arg)\n body)"
145 -- >>> encodeOne printer (L [ A "elephant", A "among", A "pachyderms" ])
146 -- "(elephant \n among\n pachyderms)"
147 setIndentStrategy :: (SExpr atom -> Indent) -> SExprPrinter atom carrier -> SExprPrinter atom carrier
148 setIndentStrategy st pr = pr { swingIndent = st }
149
150 -- Sort of like 'unlines' but without the trailing newline
151 joinLines :: [Text] -> Text
152 joinLines = T.intercalate "\n"
153
154 -- Indents a line by n spaces
155 indent :: Int -> Text -> Text
156 indent n ts = T.replicate n " " <> ts
157
158 -- Indents every line n spaces, and adds a newline to the beginning
159 -- used in swung indents
160 indentAll :: Int -> [Text] -> Text
161 indentAll n = ("\n" <>) . joinLines . map (indent n)
162
163 -- Indents every line but the first by some amount
164 -- used in aligned indents
165 indentSubsequent :: Int -> [Text] -> Text
166 indentSubsequent _ [] = ""
167 indentSubsequent _ [t] = t
168 indentSubsequent n (t:ts) = joinLines (t : go ts)
169 where go = map (indent n)
170
171 -- oh god this code is so disgusting
172 -- i'm sorry to everyone i let down by writing this
173 -- i swear i'll do better in the future i promise i have to
174 -- for my sake and for everyone's
175
176 -- | Pretty-print a 'SExpr' according to the options in a
177 -- 'LayoutOptions' value.
178 prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
179 prettyPrintSExpr SExprPrinter { .. } = pHead 0
180 where pHead _ SNil = "()"
181 pHead _ (SAtom a) = atomPrinter a
182 pHead ind (SCons x xs) = gather ind x xs id
183 gather _ _ (SAtom _) _ = error "no dotted pretty printing yet!"
184 gather ind h (SCons x xs) k = gather ind h xs (k . (x:))
185 gather ind h SNil k = "(" <> hd <> body <> ")"
186 where hd = indentSubsequent ind [pHead (ind+1) h]
187 lst = k []
188 flat = T.unwords (map (pHead (ind+1)) lst)
189 headWidth = T.length hd + 1
190 indented =
191 case swingIndent h of
192 SwingAfter n ->
193 let (l, ls) = splitAt n lst
194 t = T.unwords (map (pHead (ind+1)) l)
195 ts = indentAll (ind + indentAmount)
196 (map (pHead (ind + indentAmount)) ls)
197 in t <> ts
198 Swing ->
199 indentAll (ind + indentAmount)
200 (map (pHead (ind + indentAmount)) lst)
201 Align ->
202 indentSubsequent (ind + headWidth + 1)
203 (map (pHead (ind + headWidth + 1)) lst)
204 body
205 | length lst == 0 = ""
206 | Just maxAmt <- maxWidth
207 , T.length flat + ind > maxAmt = " " <> indented
208 | otherwise = " " <> flat
209
210 -- | Turn a single s-expression into a string according to a given
211 -- 'SExprPrinter'.
212 encodeOne :: SExprPrinter atom carrier -> carrier -> Text
213 encodeOne s@(SExprPrinter { .. }) =
214 prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
215
216 -- | Turn a list of s-expressions into a single string according to
217 -- a given 'SExprPrinter'.
218 encode :: SExprPrinter atom carrier -> [carrier] -> Text
219 encode spec = T.intercalate "\n\n" . map (encodeOne spec)
55 {-# LANGUAGE TypeFamilies #-}
66
77 module Data.SCargot.Repr
8 ( -- * Elementary SExpr representation
8 ( -- $reprs
9 -- * Elementary SExpr representation
910 SExpr(..)
1011 -- * Rich SExpr representation
1112 , RichSExpr(..)
5152 -- represent a well-formed cons list, and 'RSDotted'
5253 -- to represent an improper list of the form
5354 -- @(a b c . d)@. This representation is based on
54 -- the shape of the parsed S-Expression, and not on
55 -- how it was represented, so @(a . (b))@ is going to
55 -- the structure of the parsed S-Expression, and not on
56 -- how it was originally represented: thus, @(a . (b))@ is going to
5657 -- be represented as @RSList[RSAtom a, RSAtom b]@
5758 -- despite having been originally represented as a
5859 -- dotted list.
138139 fromWellFormed (WFSAtom a) = SAtom a
139140 fromWellFormed (WFSList xs) =
140141 foldr SCons SNil (map fromWellFormed xs)
142
143 {- $reprs
144
145 This module contains several different representations for
146 s-expressions. The s-cargot library underlying uses the
147 'SExpr' type as its representation type, which is a binary
148 tree representation with an arbitrary type for its leaves.
149
150 This type is not always convenient to manipulate in Haskell
151 code, this module defines two alternate representations
152 which turn a sequence of nested right-branching cons pairs
153 into Haskell lists: that is to say, they transform between
154
155 @
156 SCons a (SCons b (SCons c SNil)) \<=\> RSList [a, b, c]
157 @
158
159 These two types differ in how they handle non-well-formed
160 lists, i.e. lists that end with an atom. The 'RichSExpr'
161 format handles this with a special constructor for lists
162 that end in an atom:
163
164 @
165 SCons a (SCons b (SAtom c)) \<=\> RSDotted [a, b] c
166 @
167
168 On the other hand, the 'WellFormedSExpr' type elects
169 not to handle this case. This is unusual for Lisp source code,
170 but is a reasonable choice for configuration or data
171 storage formats that use s-expressions, where
172 non-well-formed lists would be an unnecessary
173 complication.
174
175 To make working with these types less verbose, there are other
176 modules that export pattern aliases and helper functions: these
177 can be found at "Data.SCargot.Repr.Basic",
178 "Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed".
179 -}
11 module Data.SCargot
2 ( module Data.SCargot.General
3 ) where
2 ( -- * SCargot Basics
43
5 import Data.SCargot.General
4 -- $intro
5
6 -- * Parsing and Printing
7 decode
8 , decodeOne
9 , encode
10 , encodeOne
11 -- * Parser Construction
12 -- ** Specifying a Parser
13 , SExprParser
14 , Reader
15 , Comment
16 , mkParser
17 , setCarrier
18 , addReader
19 , setComment
20 , asRich
21 , asWellFormed
22 , withQuote
23 -- * Printer Construction
24 -- * Specifying a Pretty-Printer
25 , SExprPrinter
26 , Indent(..)
27 , basicPrint
28 , flatPrint
29 , setFromCarrier
30 , setMaxWidth
31 , removeMaxWidth
32 , setIndentAmount
33 , setIndentStrategy
34 ) where
35
36 import Data.SCargot.Parse
37 import Data.SCargot.Print
38
39 {- $intro
40
41 The S-Cargot library is a library for parsing and emitting
42 <https://en.wikipedia.org/wiki/S-expression s-expressions>, designed
43 to be as flexible as possible. Despite some efforts at
44 <http://people.csail.mit.edu/rivest/Sexp.txt standardization>,
45 s-expressions are a general approach to describing a data format
46 that can very often differ in subtle, incompatible ways: the
47 s-expressions understood by Common Lisp are different from the
48 s-expressions understood by Scheme, and even the different
49 revisions of the Scheme language understand s-expressions in a
50 slightly different way. To accomodate this, the S-Cargot library
51 provides a toolbox for defining variations on s-expressions,
52 complete with the ability to select various comment syntaxes, reader
53 macros, and atom types.
54
55 If all you want is to read some s-expressions and don't care about
56 the edge cases of the format, or all you want is a new configuration
57 format, try the "Data.SCargot.Language.Basic" or "Data.SCargot.Language.HaskLike"
58 modules, which define an s-expression language whose atoms are
59 plain strings and Haskell literals, respectively.
60
61 The S-Cargot library works by specifying values which contain all
62 the information needed to either parse or print an s-expression.
63 The actual s-expression structure is parsed as a structure of
64 <https://en.wikipedia.org/wiki/Cons cons cells> as represented
65 by the 'SExpr' type, but can alternately be exposed as the
66 isomorphic 'RichSExpr' type or the less expressive but
67 easier-to-work-with 'WellFormedSExpr' type. Modules devoted
68 to each representation type (in "Data.SCargot.Repr.Basic",
69 "Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed")
70 provide helper functions, lenses, and pattern synonyms to make
71 creating and processing these values easier.
72
73 The details of how to parse a given structure are represented
74 by building up a 'SExprParser' value, which is defined in
75 "Data.SCargot.Parse" and re-exported here. A minimal
76 'SExprParser' defines only how to parse the atoms of the
77 language; helper functions can define comment syntaxes,
78 reader macros, and transformations over the parsed structure.
79
80 The details of how to print a given structure are represented
81 by building up a 'SExprPrinter' value, which is defined in
82 "Data.SCargot.Print" and re-exported here. A minimal
83 'SExprPrinter' defines only how to print the atoms of the
84 language; helper functions help with the layout of the
85 pretty-printed s-expression in terms of how to indent the
86 surrounding expression.
87
88 Other helper modules define useful primitives for building up
89 s-expression languages: the "Data.SCargot.Common" module provides
90 parsers for common literals, while the "Data.SCargot.Comments"
91 module provides parsers for comment syntaxes borrowed from
92 various other languages.
93
94 -}
11 name: s-cargot
22 version: 0.1.0.0
33 synopsis: A flexible, extensible s-expression library.
4 homepage: https://github.com/aisamanra/s-cargot
45 description: S-Cargot is a library for working with s-expressions in
56 a modular and extensible way, opting for genericity and
67 flexibility instead of speed. Instead of understanding
1819 build-type: Simple
1920 cabal-version: >=1.10
2021
22 source-repository head
23 type: git
24 location: git://github.com/aisamanra/s-cargot.git
25
2126 library
2227 exposed-modules: Data.SCargot,
2328 Data.SCargot.Repr,
2429 Data.SCargot.Repr.Basic,
2530 Data.SCargot.Repr.Rich,
2631 Data.SCargot.Repr.WellFormed,
27 Data.SCargot.General,
28 Data.SCargot.Pretty,
29 Data.SCargot.Basic,
32 Data.SCargot.Parse,
33 Data.SCargot.Print,
3034 Data.SCargot.Comments,
3135 Data.SCargot.Common,
32 Data.SCargot.HaskLike
36 Data.SCargot.Language.Basic,
37 Data.SCargot.Language.HaskLike
3338 build-depends: base >=4.7 && <5,
3439 parsec,
3540 text,