Several changes: new helper functions for decoding from various
reprs; new Comments module; new IsString instance for SExpr types...
Getty Ritter
10 years ago
| 6 | 6 | , asWellFormed |
| 7 | 7 | , addReader |
| 8 | 8 | , setComment |
| 9 |
, with |
|
| 9 | , withLispComments | |
| 10 | 10 | , withQuote |
| 11 | 11 | ) where |
| 12 | 12 | |
| 15 | 15 | import Data.Text (Text) |
| 16 | 16 | |
| 17 | 17 | import Data.SCargot.Repr.Basic |
| 18 |
import Data.SCargot.General |
|
| 18 | import Data.SCargot.General | |
| 19 | import Data.SCargot.Comments (withLispComments) | |
| 19 | 20 | |
| 20 | 21 | isAtomChar :: Char -> Bool |
| 21 | 22 | isAtomChar c = isAlphaNum c |
| 38 | 39 | -- storage or configuration formats. |
| 39 | 40 | basicSpec :: SExprSpec Text (SExpr Text) |
| 40 | 41 | basicSpec = mkSpec (takeWhile1 isAtomChar) id |
| 41 | ||
| 42 | -- | Add the ability to understand a quoted S-Expression. | |
| 43 | -- This means that @'sexpr@ becomes sugar for | |
| 44 | -- @(quote sexpr)@. This is a variation on the identically-named | |
| 45 | -- function in Data.SCargot.General that has been specialized | |
| 46 | -- for the Basic atom type. | |
| 47 | withQuote :: SExprSpec Text a -> SExprSpec Text a | |
| 48 | withQuote = addReader '\'' (fmap go) | |
| 49 | where go s = SCons (SAtom "quote") (SCons s SNil) | |
| 1 | {-# LANGUAGE OverloadedStrings #-} | |
| 2 | ||
| 3 | module Data.SCargot.Comments | |
| 4 | ( -- * Comment Syntax | |
| 5 | -- $intro | |
| 6 | -- * Lisp Comments | |
| 7 | withLispComments | |
| 8 | -- * Other Existing Comment Syntaxes | |
| 9 | -- ** Scripting Language Syntax | |
| 10 | -- $script | |
| 11 | , withOctothorpeComments | |
| 12 | -- ** C-Like Syntax | |
| 13 | -- $clike | |
| 14 | , withCLikeLineComments | |
| 15 | , withCLikeBlockComments | |
| 16 | , withCLikeComments | |
| 17 | -- ** Haskell Syntax | |
| 18 | -- $haskell | |
| 19 | , withHaskellLineComments | |
| 20 | , withHaskellBlockComments | |
| 21 | , withHaskellComments | |
| 22 | -- * Comment Syntax Helper Functions | |
| 23 | , lineComment | |
| 24 | , simpleBlockComment | |
| 25 | ) where | |
| 26 | ||
| 27 | import Control.Applicative ((<|>)) | |
| 28 | import Control.Monad (void) | |
| 29 | import Data.Attoparsec.Text | |
| 30 | import Data.Text (Text) | |
| 31 | ||
| 32 | import Prelude hiding (takeWhile) | |
| 33 | ||
| 34 | import Data.SCargot.General | |
| 35 | ||
| 36 | -- | Given a string, produce a comment parser that matches that | |
| 37 | -- initial string and ignores everything until the end of the | |
| 38 | -- line. | |
| 39 | lineComment :: Text -> Comment | |
| 40 | lineComment s = string s >> takeWhile (/= '\n') >> return () | |
| 41 | ||
| 42 | -- | Given two strings, a begin and an end delimeter, produce a | |
| 43 | -- parser that matches the beginning delimeter and then ignores | |
| 44 | -- everything until it finds the end delimiter. This does not | |
| 45 | -- consider nesting, so, for example, a comment created with | |
| 46 | -- | |
| 47 | -- > curlyComment :: Comment | |
| 48 | -- > curlyComment = simpleBlockComment "{" "}" | |
| 49 | -- | |
| 50 | -- will consider | |
| 51 | -- | |
| 52 | -- > { this { comment } | |
| 53 | -- | |
| 54 | -- to be a complete comment, despite the improper nesting. This is | |
| 55 | -- analogous to standard C-style comments in which | |
| 56 | -- | |
| 57 | -- > /* this /* comment */ | |
| 58 | -- | |
| 59 | -- is a complete comment. | |
| 60 | simpleBlockComment :: Text -> Text -> Comment | |
| 61 | simpleBlockComment begin end = | |
| 62 | string begin >> | |
| 63 | manyTill anyChar (string end) >> | |
| 64 | return () | |
| 65 | ||
| 66 | -- | Lisp-style line-oriented comments start with @;@ and last | |
| 67 | -- until the end of the line. This is usually the comment | |
| 68 | -- syntax you want. | |
| 69 | withLispComments :: SExprSpec t a -> SExprSpec t a | |
| 70 | withLispComments = setComment (lineComment ";") | |
| 71 | ||
| 72 | -- | C++-like line-oriented comment start with @//@ and last | |
| 73 | -- until the end of the line. | |
| 74 | withCLikeLineComments :: SExprSpec t a -> SExprSpec t a | |
| 75 | withCLikeLineComments = setComment (lineComment "//") | |
| 76 | ||
| 77 | -- | C-like block comments start with @/*@ and end with @*/@. | |
| 78 | -- They do not nest. | |
| 79 | withCLikeBlockComments :: SExprSpec t a -> SExprSpec t a | |
| 80 | withCLikeBlockComments = setComment (simpleBlockComment "/*" "*/") | |
| 81 | ||
| 82 | -- | C-like comments include both line- and block-comments, the | |
| 83 | -- former starting with @//@ and the latter contained within | |
| 84 | -- @//* ... *//@. | |
| 85 | withCLikeComments :: SExprSpec t a -> SExprSpec t a | |
| 86 | withCLikeComments = setComment (lineComment "//" <|> | |
| 87 | simpleBlockComment "/*" "*/") | |
| 88 | ||
| 89 | -- | Haskell line-oriented comments start with @--@ and last | |
| 90 | -- until the end of the line. | |
| 91 | withHaskellLineComments :: SExprSpec t a -> SExprSpec t a | |
| 92 | withHaskellLineComments = setComment (lineComment "--") | |
| 93 | ||
| 94 | -- | Haskell block comments start with @{-@ and end with @-}@. | |
| 95 | -- They do not nest. | |
| 96 | withHaskellBlockComments :: SExprSpec t a -> SExprSpec t a | |
| 97 | withHaskellBlockComments = setComment (simpleBlockComment "{-" "-}") | |
| 98 | ||
| 99 | -- | Haskell comments include both the line-oriented @--@ comments | |
| 100 | -- and the block-oriented @{- ... -}@ comments | |
| 101 | withHaskellComments :: SExprSpec t a -> SExprSpec t a | |
| 102 | withHaskellComments = setComment (lineComment "--" <|> | |
| 103 | simpleBlockComment "{-" "-}") | |
| 104 | ||
| 105 | -- | Many scripting and shell languages use these, which begin with | |
| 106 | -- @#@ and last until the end of the line. | |
| 107 | withOctothorpeComments :: SExprSpec t a -> SExprSpec t a | |
| 108 | withOctothorpeComments = setComment (lineComment "#") | |
| 109 | ||
| 110 | ||
| 111 | {- $intro | |
| 112 | ||
| 113 | By default a 'SExprSpec' will not understand any kind of comment | |
| 114 | syntax. Most varieties of s-expression will, however, want some kind | |
| 115 | of commenting capability, so the below functions will produce a new | |
| 116 | 'SExprSpec' which understands various kinds of comment syntaxes. | |
| 117 | ||
| 118 | For example: | |
| 119 | ||
| 120 | > mySpec :: SExprSpec Text (SExpr Text) | |
| 121 | > mySpec = asWellFormed (mkSpec (takeWhile1 isAlphaNum) id) | |
| 122 | > | |
| 123 | > myLispySpec :: SExprSpec Text (SExpr Text) | |
| 124 | > myLispySpec = withLispComments mySpec | |
| 125 | > | |
| 126 | > myCLikeSpec :: SExprSpec Text (SExpr Text) | |
| 127 | > myCLikeSpec = withCLikeComment mySpec | |
| 128 | ||
| 129 | We can then use these to parse s-expressions with different kinds of | |
| 130 | comment syntaxes: | |
| 131 | ||
| 132 | > decode mySpec "(foo ; a lisp comment\n bar)\n" | |
| 133 | > Left "Failed reading: takeWhile1" | |
| 134 | > decode myLispySpec "(foo ; a lisp comment\n bar)\n" | |
| 135 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
| 136 | > decode mySpec "(foo /* a c-like\n comment */ bar)\n" | |
| 137 | > Left "Failed reading: takeWhile1" | |
| 138 | > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" | |
| 139 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
| 140 | ||
| 141 | -} | |
| 142 | ||
| 143 | {- $script | |
| 144 | > (one # a comment | |
| 145 | > two # another one | |
| 146 | > three) | |
| 147 | -} | |
| 148 | ||
| 149 | {- $clike | |
| 150 | > (one // a comment | |
| 151 | > two /* another | |
| 152 | > one */ | |
| 153 | > three) | |
| 154 | -} | |
| 155 | ||
| 156 | -- $haskell | |
| 157 | -- > (one -- a comment | |
| 158 | -- > two {- another | |
| 159 | -- > one -} | |
| 160 | -- > three) |
| 11 | 11 | -- * Specific SExprSpec Conversions |
| 12 | 12 | , asRich |
| 13 | 13 | , asWellFormed |
| 14 | , withSemicolonComments | |
| 15 | 14 | , withQuote |
| 16 | 15 | -- * Using a SExprSpec |
| 17 | 16 | , decode |
| 30 | 29 | import Data.Map.Strict (Map) |
| 31 | 30 | import qualified Data.Map.Strict as M |
| 32 | 31 | import Data.Monoid ((<>)) |
| 32 | import Data.String (IsString) | |
| 33 | 33 | import Data.Text (Text, pack, unpack) |
| 34 | 34 | import qualified Data.Text as T |
| 35 | 35 | |
| 163 | 163 | setComment :: Comment -> SExprSpec a c -> SExprSpec a c |
| 164 | 164 | setComment c spec = spec { comment = Just c } |
| 165 | 165 | |
| 166 | -- | Add the ability to skip line comments beginning with a semicolon. | |
| 167 | withSemicolonComments :: SExprSpec a c -> SExprSpec a c | |
| 168 | withSemicolonComments = setComment (char ';' >> takeWhile (/='\n') >> return ()) | |
| 169 | ||
| 170 | 166 | -- | Add the ability to understand a quoted S-Expression. In general, |
| 171 | 167 | -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is |
| 172 | 168 | -- a convenience function which allows you to easily add quoted |
| 173 | 169 | -- expressions to a 'SExprSpec', provided that you supply which |
| 174 | 170 | -- atom you want substituted in for the symbol @quote@. |
| 175 | withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a) | |
| 176 | withQuote q = addReader '\'' (fmap go) | |
| 177 |
|
|
| 171 | withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t) | |
| 172 | withQuote = addReader '\'' (fmap go) | |
| 173 | where go s = SCons "quote" (SCons s SNil) | |
| 178 | 174 | |
| 179 | 175 | parseGenericSExpr :: |
| 180 | 176 | Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) |
| 7 | 7 | , pattern (:::) |
| 8 | 8 | , pattern A |
| 9 | 9 | , pattern Nil |
| 10 | -- * Useful processing functions | |
| 11 | , fromPair | |
| 12 | , fromList | |
| 10 | 13 | ) where |
| 11 | 14 | |
| 15 | import Control.Applicative ((<$>), (<*>), pure) | |
| 12 | 16 | import Data.SCargot.Repr as R |
| 13 | 17 | |
| 14 | 18 | -- | A shorter infix alias for `SCons` |
| 19 | 23 | |
| 20 | 24 | -- | A (slightly) shorter alias for `SNil` |
| 21 | 25 | pattern Nil = SNil |
| 26 | ||
| 27 | ||
| 28 | type S t = R.SExpr t | |
| 29 | type Parse t a = R.SExpr t -> Either String a | |
| 30 | ||
| 31 | -- | Utility function for parsing a pair of things. | |
| 32 | fromPair :: Parse t a -> Parse t b -> Parse t (a, b) | |
| 33 | fromPair pl pr (l ::: r ::: Nil) = (,) <$> pl l <*> pr r | |
| 34 | fromPair _ _ sx = fail ("Expected two-element list") | |
| 35 | ||
| 36 | -- | Utility function for parsing a list of things. | |
| 37 | fromList :: Parse t a -> Parse t [a] | |
| 38 | fromList p (s ::: ss) = (:) <$> p s <*> fromList p ss | |
| 39 | fromList p Nil = pure [] | |
| 40 | fromList _ sx = fail ("Expected list") | |
| 41 | ||
| 42 | gatherList :: S t -> Either String [S t] | |
| 43 | gatherList (x ::: xs) = (:) <$> pure x <*> gatherList xs | |
| 44 | gatherList Nil = pure [] | |
| 45 | gatherList sx = fail ("Expected list") | |
| 46 | ||
| 47 | asPair :: ((S t, S t) -> Either String a) -> S t -> Either String a | |
| 48 | asPair f (l ::: r ::: SNil) = f (l, r) | |
| 49 | asPair _ sx = fail ("Expected two-element list") | |
| 50 | ||
| 51 | asList :: ([S t] -> Either String a) -> S t -> Either String a | |
| 52 | asList f ls = gatherList ls >>= f | |
| 53 | ||
| 54 | asSymbol :: (t -> Either String a) -> S t -> Either String a | |
| 55 | asSymbol f (A s) = f s | |
| 56 | asSymbol _ sx = fail ("Expected symbol") | |
| 57 | ||
| 58 | asAssoc :: ([(S t, S t)] -> Either String a) -> S t -> Either String a | |
| 59 | asAssoc f ss = gatherList ss >>= mapM go >>= f | |
| 60 | where go (a ::: b ::: Nil) = return (a, b) | |
| 61 | go sx = fail ("Expected two-element list") | |
| 11 | 11 | , pattern L |
| 12 | 12 | , pattern DL |
| 13 | 13 | , pattern Nil |
| 14 | -- * Useful processing functions | |
| 15 | , fromPair | |
| 16 | , fromList | |
| 14 | 17 | ) where |
| 15 | 18 | |
| 19 | import Control.Applicative ((<$>), (<*>), pure) | |
| 16 | 20 | import Data.SCargot.Repr as R |
| 17 | 21 | |
| 18 | 22 | -- | A shorter infix alias to grab the head |
| 30 | 34 | |
| 31 | 35 | -- | A shorter alias for `RSList []` |
| 32 | 36 | pattern Nil = R.RSList [] |
| 37 | ||
| 38 | type S t = R.RichSExpr t | |
| 39 | type Parse t a = S t -> Either String a | |
| 40 | ||
| 41 | -- | Utility function for parsing a pair of things. | |
| 42 | fromPair :: Parse t a -> Parse t b -> Parse t (a, b) | |
| 43 | fromPair pl pr = asPair $ \(l,r) -> (,) <$> pl l <*> pr r | |
| 44 | ||
| 45 | -- | Utility function for parsing a list of things. | |
| 46 | fromList :: Parse t a -> Parse t [a] | |
| 47 | fromList p = asList $ \ss -> mapM p ss | |
| 48 | ||
| 49 | asPair :: ((S t, S t) -> Either String a) -> S t -> Either String a | |
| 50 | asPair f (L [l, r]) = f (l, r) | |
| 51 | asPair _ sx = fail ("Expected two-element list") | |
| 52 | ||
| 53 | asList :: ([S t] -> Either String a) -> S t -> Either String a | |
| 54 | asList f (L ls) = f ls | |
| 55 | asList _ sx = fail ("Expected list") | |
| 56 | ||
| 57 | asSymbol :: (t -> Either String a) -> S t -> Either String a | |
| 58 | asSymbol f (A s) = f s | |
| 59 | asSymbol _ sx = fail ("Expected symbol") | |
| 60 | ||
| 61 | asAssoc :: ([(S t, S t)] -> Either String a) -> S t -> Either String a | |
| 62 | asAssoc f (L ss) = gatherPairs ss >>= f | |
| 63 | where gatherPairs (L [a, b] : ss) = (:) <$> pure (a, b) <*> gatherPairs ss | |
| 64 | gatherPairs [] = pure [] | |
| 65 | gatherPairs _ = fail "..." | |
| 66 | asAssoc _ sx = fail ("Expected assoc list") | |
| 10 | 10 | , pattern L |
| 11 | 11 | , pattern A |
| 12 | 12 | , pattern Nil |
| 13 | -- * Useful processing functions | |
| 14 | , fromPair | |
| 15 | , fromList | |
| 13 | 16 | ) where |
| 14 | 17 | |
| 18 | import Control.Applicative ((<$>), (<*>), pure) | |
| 15 | 19 | import Data.SCargot.Repr as R |
| 16 | 20 | |
| 17 | 21 | -- | A shorter infix alias to grab the head |
| 26 | 30 | |
| 27 | 31 | -- | A shorter alias for `WFSList []` |
| 28 | 32 | pattern Nil = R.WFSList [] |
| 33 | ||
| 34 | type S t = R.WellFormedSExpr t | |
| 35 | type Parse t a = R.WellFormedSExpr t -> Either String a | |
| 36 | ||
| 37 | -- | Utility function for parsing a pair of things. | |
| 38 | fromPair :: Parse t a -> Parse t b -> Parse t (a, b) | |
| 39 | fromPair pl pr (L [l, r]) = (,) <$> pl l <*> pr r | |
| 40 | fromPair _ _ sx = fail ("Expected two-element list") | |
| 41 | ||
| 42 | -- | Utility function for parsing a list of things. | |
| 43 | fromList :: Parse t a -> Parse t [a] | |
| 44 | fromList p (L ss) = mapM p ss | |
| 45 | fromList _ sx = fail ("Expected list") | |
| 46 | ||
| 47 | ||
| 48 | asPair :: ((S t, S t) -> Either String a) -> S t -> Either String a | |
| 49 | asPair f (L [l, r]) = f (l, r) | |
| 50 | asPair _ sx = fail ("Expected two-element list") | |
| 51 | ||
| 52 | asList :: ([S t] -> Either String a) -> S t -> Either String a | |
| 53 | asList f (L ls) = f ls | |
| 54 | asList _ sx = fail ("Expected list") | |
| 55 | ||
| 56 | asSymbol :: (t -> Either String a) -> S t -> Either String a | |
| 57 | asSymbol f (A s) = f s | |
| 58 | asSymbol _ sx = fail ("Expected symbol") | |
| 59 | ||
| 60 | asAssoc :: ([(S t, S t)] -> Either String a) -> S t -> Either String a | |
| 61 | asAssoc f (L ss) = gatherPairs ss >>= f | |
| 62 | where gatherPairs (L [a, b] : ss) = (:) <$> pure (a, b) <*> gatherPairs ss | |
| 63 | gatherPairs [] = pure [] | |
| 64 | gatherPairs _ = fail "..." | |
| 65 | asAssoc _ sx = fail ("Expected assoc list") | |
| 13 | 13 | , fromWellFormed |
| 14 | 14 | ) where |
| 15 | 15 | |
| 16 | import Data.String (IsString(..)) | |
| 17 | ||
| 16 | 18 | -- | All S-Expressions can be understood as a sequence |
| 17 | 19 | -- of @cons@ cells (represented here by 'SCons'), the |
| 18 | 20 | -- empty list @nil@ (represented by 'SNil') or an |
| 22 | 24 | | SAtom atom |
| 23 | 25 | | SNil |
| 24 | 26 | deriving (Eq, Show, Read, Functor) |
| 27 | ||
| 28 | instance IsString atom => IsString (SExpr atom) where | |
| 29 | fromString = SAtom . fromString | |
| 25 | 30 | |
| 26 | 31 | -- | Sometimes, the cons-based interface is too low |
| 27 | 32 | -- level, and we'd rather have the lists themselves |
| 39 | 44 | | RSDotted [RichSExpr atom] atom |
| 40 | 45 | | RSAtom atom |
| 41 | 46 | deriving (Eq, Show, Read, Functor) |
| 47 | ||
| 48 | instance IsString atom => IsString (RichSExpr atom) where | |
| 49 | fromString = RSAtom . fromString | |
| 42 | 50 | |
| 43 | 51 | -- | It should always be true that |
| 44 | 52 | -- |
| 70 | 78 | | WFSAtom atom |
| 71 | 79 | deriving (Eq, Show, Read, Functor) |
| 72 | 80 | |
| 81 | instance IsString atom => IsString (WellFormedSExpr atom) where | |
| 82 | fromString = WFSAtom . fromString | |
| 83 | ||
| 73 | 84 | -- | This will be @Nothing@ if the argument contains an |
| 74 | 85 | -- improper list. It should hold that |
| 75 | 86 | -- |
| 77 | 88 | -- |
| 78 | 89 | -- and also (more tediously) that |
| 79 | 90 | -- |
| 80 |
-- > case |
|
| 91 | -- > case toWellFormed x of | |
| 81 | 92 | -- > Left _ -> True |
| 82 |
-- > Right y -> x == |
|
| 93 | -- > Right y -> x == fromWellFormed y | |
| 83 | 94 | toWellFormed :: SExpr atom -> Either String (WellFormedSExpr atom) |
| 84 | 95 | toWellFormed SNil = return (WFSList []) |
| 85 | 96 | toWellFormed (SAtom a) = return (WFSAtom a) |
| 7 | 7 | import qualified Data.Text as T |
| 8 | 8 | |
| 9 | 9 | pToken :: Parser ByteString |
| 10 |
pToken = |
|
| 10 | pToken = do | |
| 11 | x <- char (isAlpha || isTokenPunct) | |
| 12 | xs <- takeWhile1 (isAlpha || isDigit || isTokenPunct) | |
| 13 | ||
| 14 | isTokenPunct :: Char -> Bool | |
| 15 | isTokenPunct c = c `elem` "-./_:*+=" | |
| 16 | ||
| 17 | pWithLength :: Parser ByteString | |
| 18 | pWithLength = do | |
| 19 | n <- takeWhile1 isDigit | |
| 20 | pFindType (Just (read (T.unpack n))) | |
| 21 | ||
| 22 | pFindType :: Maybe Int -> Parser ByteString | |
| 23 | pFindType len = do | |
| 24 | c <- peekChar | |
| 25 | case c of | |
| 26 | ':' -> case len of | |
| 27 | Just l -> pVerbatim l | |
| 28 | Nothing -> fail "Verbatim encoding without length given" | |
| 29 | '"' -> pQuoted len | |
| 30 | '#' -> pHex len | |
| 31 | '{' -> pBase64Verbatim len | |
| 32 | '|' -> pBase64 len | |
| 33 | _ -> case len of | |
| 34 | Just _ -> fail "Unexpected length field" | |
| 35 | Nothing -> pToken | |
| 11 | 36 | |
| 12 | 37 | pQuoted :: Maybe Int -> Parser ByteString |
| 13 | 38 | pQuoted = do |
| 17 | 42 | return ss |
| 18 | 43 | |
| 19 | 44 | pHex :: Parser ByteString |
| 20 |
pHex = |
|
| 45 | pHex = do | |
| 46 | ||
| 21 | 47 | |
| 22 | 48 | pVerbatim :: Int -> Parser ByteString |
| 23 | 49 | pVerbatim = do |
| 24 | 50 | char ':' |
| 25 | 51 | take n |
| 26 | 52 | |
| 27 | pBase64Verbatim :: Parser ByteString | |
| 28 | pBase64 :: Parser ByteString | |
| 53 | pBase64Verbatim :: Maybe Int -> Parser ByteString | |
| 54 | pBase64Verbatim = undefined | |
| 55 | ||
| 56 | pBase64 :: Maybe Int -> Parser ByteString | |
| 57 | pBase64 = undefined | |