Several changes: new helper functions for decoding from various
reprs; new Comments module; new IsString instance for SExpr types...
Getty Ritter
9 years ago
6 | 6 | , asWellFormed |
7 | 7 | , addReader |
8 | 8 | , setComment |
9 |
, with |
|
9 | , withLispComments | |
10 | 10 | , withQuote |
11 | 11 | ) where |
12 | 12 | |
15 | 15 | import Data.Text (Text) |
16 | 16 | |
17 | 17 | import Data.SCargot.Repr.Basic |
18 |
import Data.SCargot.General |
|
18 | import Data.SCargot.General | |
19 | import Data.SCargot.Comments (withLispComments) | |
19 | 20 | |
20 | 21 | isAtomChar :: Char -> Bool |
21 | 22 | isAtomChar c = isAlphaNum c |
38 | 39 | -- storage or configuration formats. |
39 | 40 | basicSpec :: SExprSpec Text (SExpr Text) |
40 | 41 | basicSpec = mkSpec (takeWhile1 isAtomChar) id |
41 | ||
42 | -- | Add the ability to understand a quoted S-Expression. | |
43 | -- This means that @'sexpr@ becomes sugar for | |
44 | -- @(quote sexpr)@. This is a variation on the identically-named | |
45 | -- function in Data.SCargot.General that has been specialized | |
46 | -- for the Basic atom type. | |
47 | withQuote :: SExprSpec Text a -> SExprSpec Text a | |
48 | withQuote = addReader '\'' (fmap go) | |
49 | where go s = SCons (SAtom "quote") (SCons s SNil) |
1 | {-# LANGUAGE OverloadedStrings #-} | |
2 | ||
3 | module Data.SCargot.Comments | |
4 | ( -- * Comment Syntax | |
5 | -- $intro | |
6 | -- * Lisp Comments | |
7 | withLispComments | |
8 | -- * Other Existing Comment Syntaxes | |
9 | -- ** Scripting Language Syntax | |
10 | -- $script | |
11 | , withOctothorpeComments | |
12 | -- ** C-Like Syntax | |
13 | -- $clike | |
14 | , withCLikeLineComments | |
15 | , withCLikeBlockComments | |
16 | , withCLikeComments | |
17 | -- ** Haskell Syntax | |
18 | -- $haskell | |
19 | , withHaskellLineComments | |
20 | , withHaskellBlockComments | |
21 | , withHaskellComments | |
22 | -- * Comment Syntax Helper Functions | |
23 | , lineComment | |
24 | , simpleBlockComment | |
25 | ) where | |
26 | ||
27 | import Control.Applicative ((<|>)) | |
28 | import Control.Monad (void) | |
29 | import Data.Attoparsec.Text | |
30 | import Data.Text (Text) | |
31 | ||
32 | import Prelude hiding (takeWhile) | |
33 | ||
34 | import Data.SCargot.General | |
35 | ||
36 | -- | Given a string, produce a comment parser that matches that | |
37 | -- initial string and ignores everything until the end of the | |
38 | -- line. | |
39 | lineComment :: Text -> Comment | |
40 | lineComment s = string s >> takeWhile (/= '\n') >> return () | |
41 | ||
42 | -- | Given two strings, a begin and an end delimeter, produce a | |
43 | -- parser that matches the beginning delimeter and then ignores | |
44 | -- everything until it finds the end delimiter. This does not | |
45 | -- consider nesting, so, for example, a comment created with | |
46 | -- | |
47 | -- > curlyComment :: Comment | |
48 | -- > curlyComment = simpleBlockComment "{" "}" | |
49 | -- | |
50 | -- will consider | |
51 | -- | |
52 | -- > { this { comment } | |
53 | -- | |
54 | -- to be a complete comment, despite the improper nesting. This is | |
55 | -- analogous to standard C-style comments in which | |
56 | -- | |
57 | -- > /* this /* comment */ | |
58 | -- | |
59 | -- is a complete comment. | |
60 | simpleBlockComment :: Text -> Text -> Comment | |
61 | simpleBlockComment begin end = | |
62 | string begin >> | |
63 | manyTill anyChar (string end) >> | |
64 | return () | |
65 | ||
66 | -- | Lisp-style line-oriented comments start with @;@ and last | |
67 | -- until the end of the line. This is usually the comment | |
68 | -- syntax you want. | |
69 | withLispComments :: SExprSpec t a -> SExprSpec t a | |
70 | withLispComments = setComment (lineComment ";") | |
71 | ||
72 | -- | C++-like line-oriented comment start with @//@ and last | |
73 | -- until the end of the line. | |
74 | withCLikeLineComments :: SExprSpec t a -> SExprSpec t a | |
75 | withCLikeLineComments = setComment (lineComment "//") | |
76 | ||
77 | -- | C-like block comments start with @/*@ and end with @*/@. | |
78 | -- They do not nest. | |
79 | withCLikeBlockComments :: SExprSpec t a -> SExprSpec t a | |
80 | withCLikeBlockComments = setComment (simpleBlockComment "/*" "*/") | |
81 | ||
82 | -- | C-like comments include both line- and block-comments, the | |
83 | -- former starting with @//@ and the latter contained within | |
84 | -- @//* ... *//@. | |
85 | withCLikeComments :: SExprSpec t a -> SExprSpec t a | |
86 | withCLikeComments = setComment (lineComment "//" <|> | |
87 | simpleBlockComment "/*" "*/") | |
88 | ||
89 | -- | Haskell line-oriented comments start with @--@ and last | |
90 | -- until the end of the line. | |
91 | withHaskellLineComments :: SExprSpec t a -> SExprSpec t a | |
92 | withHaskellLineComments = setComment (lineComment "--") | |
93 | ||
94 | -- | Haskell block comments start with @{-@ and end with @-}@. | |
95 | -- They do not nest. | |
96 | withHaskellBlockComments :: SExprSpec t a -> SExprSpec t a | |
97 | withHaskellBlockComments = setComment (simpleBlockComment "{-" "-}") | |
98 | ||
99 | -- | Haskell comments include both the line-oriented @--@ comments | |
100 | -- and the block-oriented @{- ... -}@ comments | |
101 | withHaskellComments :: SExprSpec t a -> SExprSpec t a | |
102 | withHaskellComments = setComment (lineComment "--" <|> | |
103 | simpleBlockComment "{-" "-}") | |
104 | ||
105 | -- | Many scripting and shell languages use these, which begin with | |
106 | -- @#@ and last until the end of the line. | |
107 | withOctothorpeComments :: SExprSpec t a -> SExprSpec t a | |
108 | withOctothorpeComments = setComment (lineComment "#") | |
109 | ||
110 | ||
111 | {- $intro | |
112 | ||
113 | By default a 'SExprSpec' will not understand any kind of comment | |
114 | syntax. Most varieties of s-expression will, however, want some kind | |
115 | of commenting capability, so the below functions will produce a new | |
116 | 'SExprSpec' which understands various kinds of comment syntaxes. | |
117 | ||
118 | For example: | |
119 | ||
120 | > mySpec :: SExprSpec Text (SExpr Text) | |
121 | > mySpec = asWellFormed (mkSpec (takeWhile1 isAlphaNum) id) | |
122 | > | |
123 | > myLispySpec :: SExprSpec Text (SExpr Text) | |
124 | > myLispySpec = withLispComments mySpec | |
125 | > | |
126 | > myCLikeSpec :: SExprSpec Text (SExpr Text) | |
127 | > myCLikeSpec = withCLikeComment mySpec | |
128 | ||
129 | We can then use these to parse s-expressions with different kinds of | |
130 | comment syntaxes: | |
131 | ||
132 | > decode mySpec "(foo ; a lisp comment\n bar)\n" | |
133 | > Left "Failed reading: takeWhile1" | |
134 | > decode myLispySpec "(foo ; a lisp comment\n bar)\n" | |
135 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
136 | > decode mySpec "(foo /* a c-like\n comment */ bar)\n" | |
137 | > Left "Failed reading: takeWhile1" | |
138 | > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" | |
139 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
140 | ||
141 | -} | |
142 | ||
143 | {- $script | |
144 | > (one # a comment | |
145 | > two # another one | |
146 | > three) | |
147 | -} | |
148 | ||
149 | {- $clike | |
150 | > (one // a comment | |
151 | > two /* another | |
152 | > one */ | |
153 | > three) | |
154 | -} | |
155 | ||
156 | -- $haskell | |
157 | -- > (one -- a comment | |
158 | -- > two {- another | |
159 | -- > one -} | |
160 | -- > three) |
11 | 11 | -- * Specific SExprSpec Conversions |
12 | 12 | , asRich |
13 | 13 | , asWellFormed |
14 | , withSemicolonComments | |
15 | 14 | , withQuote |
16 | 15 | -- * Using a SExprSpec |
17 | 16 | , decode |
30 | 29 | import Data.Map.Strict (Map) |
31 | 30 | import qualified Data.Map.Strict as M |
32 | 31 | import Data.Monoid ((<>)) |
32 | import Data.String (IsString) | |
33 | 33 | import Data.Text (Text, pack, unpack) |
34 | 34 | import qualified Data.Text as T |
35 | 35 | |
163 | 163 | setComment :: Comment -> SExprSpec a c -> SExprSpec a c |
164 | 164 | setComment c spec = spec { comment = Just c } |
165 | 165 | |
166 | -- | Add the ability to skip line comments beginning with a semicolon. | |
167 | withSemicolonComments :: SExprSpec a c -> SExprSpec a c | |
168 | withSemicolonComments = setComment (char ';' >> takeWhile (/='\n') >> return ()) | |
169 | ||
170 | 166 | -- | Add the ability to understand a quoted S-Expression. In general, |
171 | 167 | -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is |
172 | 168 | -- a convenience function which allows you to easily add quoted |
173 | 169 | -- expressions to a 'SExprSpec', provided that you supply which |
174 | 170 | -- atom you want substituted in for the symbol @quote@. |
175 | withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a) | |
176 | withQuote q = addReader '\'' (fmap go) | |
177 |
|
|
171 | withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t) | |
172 | withQuote = addReader '\'' (fmap go) | |
173 | where go s = SCons "quote" (SCons s SNil) | |
178 | 174 | |
179 | 175 | parseGenericSExpr :: |
180 | 176 | Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) |
7 | 7 | , pattern (:::) |
8 | 8 | , pattern A |
9 | 9 | , pattern Nil |
10 | -- * Useful processing functions | |
11 | , fromPair | |
12 | , fromList | |
10 | 13 | ) where |
11 | 14 | |
15 | import Control.Applicative ((<$>), (<*>), pure) | |
12 | 16 | import Data.SCargot.Repr as R |
13 | 17 | |
14 | 18 | -- | A shorter infix alias for `SCons` |
19 | 23 | |
20 | 24 | -- | A (slightly) shorter alias for `SNil` |
21 | 25 | pattern Nil = SNil |
26 | ||
27 | ||
28 | type S t = R.SExpr t | |
29 | type Parse t a = R.SExpr t -> Either String a | |
30 | ||
31 | -- | Utility function for parsing a pair of things. | |
32 | fromPair :: Parse t a -> Parse t b -> Parse t (a, b) | |
33 | fromPair pl pr (l ::: r ::: Nil) = (,) <$> pl l <*> pr r | |
34 | fromPair _ _ sx = fail ("Expected two-element list") | |
35 | ||
36 | -- | Utility function for parsing a list of things. | |
37 | fromList :: Parse t a -> Parse t [a] | |
38 | fromList p (s ::: ss) = (:) <$> p s <*> fromList p ss | |
39 | fromList p Nil = pure [] | |
40 | fromList _ sx = fail ("Expected list") | |
41 | ||
42 | gatherList :: S t -> Either String [S t] | |
43 | gatherList (x ::: xs) = (:) <$> pure x <*> gatherList xs | |
44 | gatherList Nil = pure [] | |
45 | gatherList sx = fail ("Expected list") | |
46 | ||
47 | asPair :: ((S t, S t) -> Either String a) -> S t -> Either String a | |
48 | asPair f (l ::: r ::: SNil) = f (l, r) | |
49 | asPair _ sx = fail ("Expected two-element list") | |
50 | ||
51 | asList :: ([S t] -> Either String a) -> S t -> Either String a | |
52 | asList f ls = gatherList ls >>= f | |
53 | ||
54 | asSymbol :: (t -> Either String a) -> S t -> Either String a | |
55 | asSymbol f (A s) = f s | |
56 | asSymbol _ sx = fail ("Expected symbol") | |
57 | ||
58 | asAssoc :: ([(S t, S t)] -> Either String a) -> S t -> Either String a | |
59 | asAssoc f ss = gatherList ss >>= mapM go >>= f | |
60 | where go (a ::: b ::: Nil) = return (a, b) | |
61 | go sx = fail ("Expected two-element list") |
11 | 11 | , pattern L |
12 | 12 | , pattern DL |
13 | 13 | , pattern Nil |
14 | -- * Useful processing functions | |
15 | , fromPair | |
16 | , fromList | |
14 | 17 | ) where |
15 | 18 | |
19 | import Control.Applicative ((<$>), (<*>), pure) | |
16 | 20 | import Data.SCargot.Repr as R |
17 | 21 | |
18 | 22 | -- | A shorter infix alias to grab the head |
30 | 34 | |
31 | 35 | -- | A shorter alias for `RSList []` |
32 | 36 | pattern Nil = R.RSList [] |
37 | ||
38 | type S t = R.RichSExpr t | |
39 | type Parse t a = S t -> Either String a | |
40 | ||
41 | -- | Utility function for parsing a pair of things. | |
42 | fromPair :: Parse t a -> Parse t b -> Parse t (a, b) | |
43 | fromPair pl pr = asPair $ \(l,r) -> (,) <$> pl l <*> pr r | |
44 | ||
45 | -- | Utility function for parsing a list of things. | |
46 | fromList :: Parse t a -> Parse t [a] | |
47 | fromList p = asList $ \ss -> mapM p ss | |
48 | ||
49 | asPair :: ((S t, S t) -> Either String a) -> S t -> Either String a | |
50 | asPair f (L [l, r]) = f (l, r) | |
51 | asPair _ sx = fail ("Expected two-element list") | |
52 | ||
53 | asList :: ([S t] -> Either String a) -> S t -> Either String a | |
54 | asList f (L ls) = f ls | |
55 | asList _ sx = fail ("Expected list") | |
56 | ||
57 | asSymbol :: (t -> Either String a) -> S t -> Either String a | |
58 | asSymbol f (A s) = f s | |
59 | asSymbol _ sx = fail ("Expected symbol") | |
60 | ||
61 | asAssoc :: ([(S t, S t)] -> Either String a) -> S t -> Either String a | |
62 | asAssoc f (L ss) = gatherPairs ss >>= f | |
63 | where gatherPairs (L [a, b] : ss) = (:) <$> pure (a, b) <*> gatherPairs ss | |
64 | gatherPairs [] = pure [] | |
65 | gatherPairs _ = fail "..." | |
66 | asAssoc _ sx = fail ("Expected assoc list") |
10 | 10 | , pattern L |
11 | 11 | , pattern A |
12 | 12 | , pattern Nil |
13 | -- * Useful processing functions | |
14 | , fromPair | |
15 | , fromList | |
13 | 16 | ) where |
14 | 17 | |
18 | import Control.Applicative ((<$>), (<*>), pure) | |
15 | 19 | import Data.SCargot.Repr as R |
16 | 20 | |
17 | 21 | -- | A shorter infix alias to grab the head |
26 | 30 | |
27 | 31 | -- | A shorter alias for `WFSList []` |
28 | 32 | pattern Nil = R.WFSList [] |
33 | ||
34 | type S t = R.WellFormedSExpr t | |
35 | type Parse t a = R.WellFormedSExpr t -> Either String a | |
36 | ||
37 | -- | Utility function for parsing a pair of things. | |
38 | fromPair :: Parse t a -> Parse t b -> Parse t (a, b) | |
39 | fromPair pl pr (L [l, r]) = (,) <$> pl l <*> pr r | |
40 | fromPair _ _ sx = fail ("Expected two-element list") | |
41 | ||
42 | -- | Utility function for parsing a list of things. | |
43 | fromList :: Parse t a -> Parse t [a] | |
44 | fromList p (L ss) = mapM p ss | |
45 | fromList _ sx = fail ("Expected list") | |
46 | ||
47 | ||
48 | asPair :: ((S t, S t) -> Either String a) -> S t -> Either String a | |
49 | asPair f (L [l, r]) = f (l, r) | |
50 | asPair _ sx = fail ("Expected two-element list") | |
51 | ||
52 | asList :: ([S t] -> Either String a) -> S t -> Either String a | |
53 | asList f (L ls) = f ls | |
54 | asList _ sx = fail ("Expected list") | |
55 | ||
56 | asSymbol :: (t -> Either String a) -> S t -> Either String a | |
57 | asSymbol f (A s) = f s | |
58 | asSymbol _ sx = fail ("Expected symbol") | |
59 | ||
60 | asAssoc :: ([(S t, S t)] -> Either String a) -> S t -> Either String a | |
61 | asAssoc f (L ss) = gatherPairs ss >>= f | |
62 | where gatherPairs (L [a, b] : ss) = (:) <$> pure (a, b) <*> gatherPairs ss | |
63 | gatherPairs [] = pure [] | |
64 | gatherPairs _ = fail "..." | |
65 | asAssoc _ sx = fail ("Expected assoc list") |
13 | 13 | , fromWellFormed |
14 | 14 | ) where |
15 | 15 | |
16 | import Data.String (IsString(..)) | |
17 | ||
16 | 18 | -- | All S-Expressions can be understood as a sequence |
17 | 19 | -- of @cons@ cells (represented here by 'SCons'), the |
18 | 20 | -- empty list @nil@ (represented by 'SNil') or an |
22 | 24 | | SAtom atom |
23 | 25 | | SNil |
24 | 26 | deriving (Eq, Show, Read, Functor) |
27 | ||
28 | instance IsString atom => IsString (SExpr atom) where | |
29 | fromString = SAtom . fromString | |
25 | 30 | |
26 | 31 | -- | Sometimes, the cons-based interface is too low |
27 | 32 | -- level, and we'd rather have the lists themselves |
39 | 44 | | RSDotted [RichSExpr atom] atom |
40 | 45 | | RSAtom atom |
41 | 46 | deriving (Eq, Show, Read, Functor) |
47 | ||
48 | instance IsString atom => IsString (RichSExpr atom) where | |
49 | fromString = RSAtom . fromString | |
42 | 50 | |
43 | 51 | -- | It should always be true that |
44 | 52 | -- |
70 | 78 | | WFSAtom atom |
71 | 79 | deriving (Eq, Show, Read, Functor) |
72 | 80 | |
81 | instance IsString atom => IsString (WellFormedSExpr atom) where | |
82 | fromString = WFSAtom . fromString | |
83 | ||
73 | 84 | -- | This will be @Nothing@ if the argument contains an |
74 | 85 | -- improper list. It should hold that |
75 | 86 | -- |
77 | 88 | -- |
78 | 89 | -- and also (more tediously) that |
79 | 90 | -- |
80 |
-- > case |
|
91 | -- > case toWellFormed x of | |
81 | 92 | -- > Left _ -> True |
82 |
-- > Right y -> x == |
|
93 | -- > Right y -> x == fromWellFormed y | |
83 | 94 | toWellFormed :: SExpr atom -> Either String (WellFormedSExpr atom) |
84 | 95 | toWellFormed SNil = return (WFSList []) |
85 | 96 | toWellFormed (SAtom a) = return (WFSAtom a) |
7 | 7 | import qualified Data.Text as T |
8 | 8 | |
9 | 9 | pToken :: Parser ByteString |
10 |
pToken = |
|
10 | pToken = do | |
11 | x <- char (isAlpha || isTokenPunct) | |
12 | xs <- takeWhile1 (isAlpha || isDigit || isTokenPunct) | |
13 | ||
14 | isTokenPunct :: Char -> Bool | |
15 | isTokenPunct c = c `elem` "-./_:*+=" | |
16 | ||
17 | pWithLength :: Parser ByteString | |
18 | pWithLength = do | |
19 | n <- takeWhile1 isDigit | |
20 | pFindType (Just (read (T.unpack n))) | |
21 | ||
22 | pFindType :: Maybe Int -> Parser ByteString | |
23 | pFindType len = do | |
24 | c <- peekChar | |
25 | case c of | |
26 | ':' -> case len of | |
27 | Just l -> pVerbatim l | |
28 | Nothing -> fail "Verbatim encoding without length given" | |
29 | '"' -> pQuoted len | |
30 | '#' -> pHex len | |
31 | '{' -> pBase64Verbatim len | |
32 | '|' -> pBase64 len | |
33 | _ -> case len of | |
34 | Just _ -> fail "Unexpected length field" | |
35 | Nothing -> pToken | |
11 | 36 | |
12 | 37 | pQuoted :: Maybe Int -> Parser ByteString |
13 | 38 | pQuoted = do |
17 | 42 | return ss |
18 | 43 | |
19 | 44 | pHex :: Parser ByteString |
20 |
pHex = |
|
45 | pHex = do | |
46 | ||
21 | 47 | |
22 | 48 | pVerbatim :: Int -> Parser ByteString |
23 | 49 | pVerbatim = do |
24 | 50 | char ':' |
25 | 51 | take n |
26 | 52 | |
27 | pBase64Verbatim :: Parser ByteString | |
28 | pBase64 :: Parser ByteString | |
53 | pBase64Verbatim :: Maybe Int -> Parser ByteString | |
54 | pBase64Verbatim = undefined | |
55 | ||
56 | pBase64 :: Maybe Int -> Parser ByteString | |
57 | pBase64 = undefined |