Big final bunch of refactors/documentation bits in preparation for Hackage release
Getty Ritter
10 years ago
| 1 | {-# LANGUAGE OverloadedStrings #-} | |
| 2 | ||
| 3 | module Data.SCargot.Basic | |
| 4 | ( -- * Spec | |
| 5 | -- $descr | |
| 6 | basicSpec | |
| 7 | ) where | |
| 8 | ||
| 9 | import Control.Applicative ((<$>)) | |
| 10 | import Data.Char (isAlphaNum) | |
| 11 | import Text.Parsec (many1, satisfy) | |
| 12 | import Data.Text (Text, pack) | |
| 13 | ||
| 14 | import Data.SCargot.Repr.Basic (SExpr) | |
| 15 | import Data.SCargot.General ( SExprSpec | |
| 16 | , mkSpec | |
| 17 | ) | |
| 18 | import Data.SCargot.Comments (withLispComments) | |
| 19 | ||
| 20 | isAtomChar :: Char -> Bool | |
| 21 | isAtomChar c = isAlphaNum c | |
| 22 | || c == '-' || c == '*' || c == '/' | |
| 23 | || c == '+' || c == '<' || c == '>' | |
| 24 | || c == '=' || c == '!' || c == '?' | |
| 25 | ||
| 26 | -- $descr | |
| 27 | -- The 'basicSpec' describes S-expressions whose atoms are simply | |
| 28 | -- text strings that contain alphanumeric characters and a small | |
| 29 | -- set of punctuation. It does no parsing of numbers or other data | |
| 30 | -- types, and will accept tokens that typical Lisp implementations | |
| 31 | -- would find nonsensical (like @77foo@). | |
| 32 | -- | |
| 33 | -- Atoms recognized by the 'basicSpec' are any string matching the | |
| 34 | -- regular expression @[A-Za-z0-9+*<>/=!?-]+@. | |
| 35 | ||
| 36 | -- | A 'SExprSpec' that understands atoms to be sequences of | |
| 37 | -- alphanumeric characters as well as the punctuation | |
| 38 | -- characters @[-*/+<>=!?]@, and does no processing of them. | |
| 39 | basicSpec :: SExprSpec Text (SExpr Text) | |
| 40 | basicSpec = mkSpec pToken id | |
| 41 | where pToken = pack <$> many1 (satisfy isAtomChar) |
| 4 | 4 | ( -- $intro |
| 5 | 5 | |
| 6 | 6 | -- * Lisp-Style Syntax |
| 7 | ||
| 7 | 8 | -- $lisp |
| 8 | 9 | withLispComments |
| 9 | 10 | -- * Other Existing Comment Syntaxes |
| 35 | 36 | , string |
| 36 | 37 | ) |
| 37 | 38 | |
| 38 | import Data.SCargot.General ( Comment | |
| 39 | , SExprSpec | |
| 40 | , setComment | |
| 41 | ) | |
| 39 | import Data.SCargot.Parse ( Comment | |
| 40 | , SExprParser | |
| 41 | , setComment | |
| 42 | ) | |
| 42 | 43 | |
| 43 | 44 | -- | Given a string, produce a comment parser that matches that |
| 44 | 45 | -- initial string and ignores everything until the end of the |
| 73 | 74 | -- | Lisp-style line-oriented comments start with @;@ and last |
| 74 | 75 | -- until the end of the line. This is usually the comment |
| 75 | 76 | -- syntax you want. |
| 76 |
withLispComments :: SExpr |
|
| 77 | withLispComments :: SExprParser t a -> SExprParser t a | |
| 77 | 78 | withLispComments = setComment (lineComment ";") |
| 78 | 79 | |
| 79 | 80 | -- | C++-like line-oriented comment start with @//@ and last |
| 80 | 81 | -- until the end of the line. |
| 81 |
withCLikeLineComments :: SExpr |
|
| 82 | withCLikeLineComments :: SExprParser t a -> SExprParser t a | |
| 82 | 83 | withCLikeLineComments = setComment (lineComment "//") |
| 83 | 84 | |
| 84 | 85 | -- | C-like block comments start with @/*@ and end with @*/@. |
| 85 | 86 | -- They do not nest. |
| 86 |
withCLikeBlockComments :: SExpr |
|
| 87 | withCLikeBlockComments :: SExprParser t a -> SExprParser t a | |
| 87 | 88 | withCLikeBlockComments = setComment (simpleBlockComment "/*" "*/") |
| 88 | 89 | |
| 89 | 90 | -- | C-like comments include both line- and block-comments, the |
| 90 | 91 | -- former starting with @//@ and the latter contained within |
| 91 | 92 | -- @//* ... *//@. |
| 92 |
withCLikeComments :: SExpr |
|
| 93 | withCLikeComments :: SExprParser t a -> SExprParser t a | |
| 93 | 94 | withCLikeComments = setComment (lineComment "//" <|> |
| 94 | 95 | simpleBlockComment "/*" "*/") |
| 95 | 96 | |
| 96 | 97 | -- | Haskell line-oriented comments start with @--@ and last |
| 97 | 98 | -- until the end of the line. |
| 98 |
withHaskellLineComments :: SExpr |
|
| 99 | withHaskellLineComments :: SExprParser t a -> SExprParser t a | |
| 99 | 100 | withHaskellLineComments = setComment (lineComment "--") |
| 100 | 101 | |
| 101 | 102 | -- | Haskell block comments start with @{-@ and end with @-}@. |
| 102 | 103 | -- They do not nest. |
| 103 |
withHaskellBlockComments :: SExpr |
|
| 104 | withHaskellBlockComments :: SExprParser t a -> SExprParser t a | |
| 104 | 105 | withHaskellBlockComments = setComment (simpleBlockComment "{-" "-}") |
| 105 | 106 | |
| 106 | 107 | -- | Haskell comments include both the line-oriented @--@ comments |
| 107 | 108 | -- and the block-oriented @{- ... -}@ comments |
| 108 |
withHaskellComments :: SExpr |
|
| 109 | withHaskellComments :: SExprParser t a -> SExprParser t a | |
| 109 | 110 | withHaskellComments = setComment (lineComment "--" <|> |
| 110 | 111 | simpleBlockComment "{-" "-}") |
| 111 | 112 | |
| 112 | 113 | -- | Many scripting and shell languages use these, which begin with |
| 113 | 114 | -- @#@ and last until the end of the line. |
| 114 |
withOctothorpeComments :: SExpr |
|
| 115 | withOctothorpeComments :: SExprParser t a -> SExprParser t a | |
| 115 | 116 | withOctothorpeComments = setComment (lineComment "#") |
| 116 | 117 | |
| 117 | 118 | |
| 118 | 119 | {- $intro |
| 119 | 120 | |
| 120 |
By default a 'SExpr |
|
| 121 | By default a 'SExprParser' will not understand any kind of comment | |
| 121 | 122 | syntax. Most varieties of s-expression will, however, want some kind |
| 122 | 123 | of commenting capability, so the below functions will produce a new |
| 123 |
'SExpr |
|
| 124 | 'SExprParser' which understands various kinds of comment syntaxes. | |
| 124 | 125 | |
| 125 | 126 | For example: |
| 126 | 127 | |
| 127 | > mySpec :: SExprSpec Text (SExpr Text) | |
| 128 | > mySpec = asWellFormed $ mkSpec (pack <$> many1 alphaNum) id | |
| 128 | > mySpec :: SExprParser Text (SExpr Text) | |
| 129 | > mySpec = asWellFormed $ mkParser (pack <$> many1 alphaNum) | |
| 129 | 130 | > |
| 130 |
> myLispySpec :: SExpr |
|
| 131 | > myLispySpec :: SExprParser Text (SExpr Text) | |
| 131 | 132 | > myLispySpec = withLispComments mySpec |
| 132 | 133 | > |
| 133 |
> myCLikeSpec :: SExpr |
|
| 134 | > myCLikeSpec :: SExprParser Text (SExpr Text) | |
| 134 | 135 | > myCLikeSpec = withCLikeComment mySpec |
| 135 | 136 | |
| 136 | 137 | We can then use these to parse s-expressions with different kinds of |
| 137 | 138 | comment syntaxes: |
| 138 | 139 | |
| 139 | > > decode mySpec "(foo ; a lisp comment\n bar)\n" | |
| 140 | > Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom" | |
| 141 | > > decode myLispySpec "(foo ; a lisp comment\n bar)\n" | |
| 142 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
| 143 | > > decode mySpec "(foo /* a c-like\n comment */ bar)\n" | |
| 144 | > Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom" | |
| 145 | > > decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" | |
| 146 | > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
| 140 | >>> decode mySpec "(foo ; a lisp comment\n bar)\n" | |
| 141 | Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom" | |
| 142 | >>> decode myLispySpec "(foo ; a lisp comment\n bar)\n" | |
| 143 | Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
| 144 | >>> decode mySpec "(foo /* a c-like\n comment */ bar)\n" | |
| 145 | Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom" | |
| 146 | >>> decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" | |
| 147 | Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] | |
| 147 | 148 | |
| 148 | 149 | -} |
| 149 | 150 | |
| 4 | 4 | , parseR6RSIdent |
| 5 | 5 | , parseR7RSIdent |
| 6 | 6 | -- * Numeric Literal Parsers |
| 7 | , signed | |
| 8 | , prefixedNumber | |
| 9 | , signedPrefixedNumber | |
| 7 | 10 | , binNumber |
| 8 | 11 | , signedBinNumber |
| 9 | 12 | , octNumber |
| 14 | 17 | , signedDozNumber |
| 15 | 18 | , hexNumber |
| 16 | 19 | , signedHexNumber |
| 17 | , signed | |
| 18 | 20 | ) where |
| 19 | 21 | |
| 20 | 22 | import Data.Char |
| 42 | 44 | |
| 43 | 45 | -- | Parse an identifier according to the R6RS Scheme standard. An |
| 44 | 46 | -- R6RS identifier may include inline hexadecimal escape sequences |
| 45 |
-- so that, for example, @foo@ is equivalent to @f\ |
|
| 47 | -- so that, for example, @foo@ is equivalent to @f\\x6f;o@, and is | |
| 46 | 48 | -- more liberal than R5RS as to which Unicode characters it may |
| 47 | 49 | -- accept. |
| 48 | 50 | parseR6RSIdent :: Parser Text |
| 141 | 143 | signed :: Num a => Parser a -> Parser a |
| 142 | 144 | signed p = ($) <$> sign <*> p |
| 143 | 145 | |
| 146 | -- | Parses a number in the same way as 'prefixedNumber', with an optional | |
| 147 | -- leading @+@ or @-@. | |
| 148 | signedPrefixedNumber :: Parser Integer | |
| 149 | signedPrefixedNumber = signed prefixedNumber | |
| 150 | ||
| 151 | -- | Parses a number, determining which numeric base to use by examining | |
| 152 | -- the literal's prefix: @0x@ for a hexadecimal number, @0z@ for a | |
| 153 | -- dozenal number, @0o@ for an octal number, and @0b@ for a binary | |
| 154 | -- number (as well as the upper-case versions of the same.) If the | |
| 155 | -- base is omitted entirely, then it is treated as a decimal number. | |
| 156 | prefixedNumber :: Parser Integer | |
| 157 | prefixedNumber = (string "0x" <|> string "0X") *> hexNumber | |
| 158 | <|> (string "0o" <|> string "0O") *> octNumber | |
| 159 | <|> (string "0z" <|> string "0Z") *> dozNumber | |
| 160 | <|> (string "0b" <|> string "0B") *> binNumber | |
| 161 | <|> decNumber | |
| 162 | ||
| 144 | 163 | -- | A parser for non-signed binary numbers |
| 145 | 164 | binNumber :: Parser Integer |
| 146 | 165 | binNumber = number 2 (char '0' <|> char '1') |
| 147 | 166 | |
| 148 | 167 | -- | A parser for signed binary numbers, with an optional leading @+@ or @-@. |
| 149 | 168 | signedBinNumber :: Parser Integer |
| 150 |
signedBinNumber = |
|
| 169 | signedBinNumber = signed binNumber | |
| 151 | 170 | |
| 152 | 171 | -- | A parser for non-signed octal numbers |
| 153 | 172 | octNumber :: Parser Integer |
| 1 | {-# LANGUAGE ViewPatterns #-} | |
| 2 | {-# LANGUAGE OverloadedStrings #-} | |
| 3 | ||
| 4 | module Data.SCargot.General | |
| 5 | ( -- * SExprSpec | |
| 6 | SExprSpec | |
| 7 | , mkSpec | |
| 8 | , convertSpec | |
| 9 | , addReader | |
| 10 | , setComment | |
| 11 | -- * Specific SExprSpec Conversions | |
| 12 | , asRich | |
| 13 | , asWellFormed | |
| 14 | , withQuote | |
| 15 | -- * Using a SExprSpec | |
| 16 | , decode | |
| 17 | , decodeOne | |
| 18 | , encode | |
| 19 | , encodeOne | |
| 20 | -- * Useful Type Aliases | |
| 21 | , Reader | |
| 22 | , Comment | |
| 23 | , Serializer | |
| 24 | ) where | |
| 25 | ||
| 26 | import Control.Applicative ((<*), (*>), (<*>), (<$>), pure) | |
| 27 | import Control.Monad ((>=>)) | |
| 28 | import Data.Char (isAlpha, isDigit, isAlphaNum) | |
| 29 | import Data.Map.Strict (Map) | |
| 30 | import qualified Data.Map.Strict as M | |
| 31 | import Data.Maybe (fromJust) | |
| 32 | import Data.Monoid ((<>)) | |
| 33 | import Data.String (IsString) | |
| 34 | import Data.Text (Text, pack, unpack) | |
| 35 | import qualified Data.Text as T | |
| 36 | import Text.Parsec ( (<|>) | |
| 37 | , (<?>) | |
| 38 | , char | |
| 39 | , eof | |
| 40 | , lookAhead | |
| 41 | , many1 | |
| 42 | , runParser | |
| 43 | , skipMany | |
| 44 | ) | |
| 45 | import Text.Parsec.Char (anyChar, space) | |
| 46 | import Text.Parsec.Text (Parser) | |
| 47 | ||
| 48 | import Data.SCargot.Repr ( SExpr(..) | |
| 49 | , RichSExpr | |
| 50 | , WellFormedSExpr | |
| 51 | , fromRich | |
| 52 | , toRich | |
| 53 | , fromWellFormed | |
| 54 | , toWellFormed | |
| 55 | ) | |
| 56 | ||
| 57 | type ReaderMacroMap atom = Map Char (Reader atom) | |
| 58 | ||
| 59 | -- | A 'Reader' represents a reader macro: it takes a parser for | |
| 60 | -- the S-Expression type and performs as much or as little | |
| 61 | -- parsing as it would like, and then returns an S-expression. | |
| 62 | type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom)) | |
| 63 | ||
| 64 | -- | A 'Comment' represents any kind of skippable comment. This | |
| 65 | -- parser __must__ be able to fail if a comment is not being | |
| 66 | -- recognized, and it __must__ not consume any input. | |
| 67 | type Comment = Parser () | |
| 68 | ||
| 69 | -- | A 'Serializer' is any function which can serialize an Atom | |
| 70 | -- to 'Text'. | |
| 71 | type Serializer atom = atom -> Text | |
| 72 | ||
| 73 | -- | A 'SExprSpec' describes a parser and emitter for a particular | |
| 74 | -- variant of S-Expressions. The @atom@ type corresponds to a | |
| 75 | -- Haskell type used to represent the atoms, and the @carrier@ | |
| 76 | -- type corresponds to the parsed S-Expression structure. The | |
| 77 | -- 'SExprSpec' type is deliberately opaque so that it must be | |
| 78 | -- constructed and modified with other helper functions. | |
| 79 | data SExprSpec atom carrier = SExprSpec | |
| 80 | { sesPAtom :: Parser atom | |
| 81 | , sesSAtom :: Serializer atom | |
| 82 | , readerMap :: ReaderMacroMap atom | |
| 83 | , comment :: Maybe Comment | |
| 84 | , postparse :: SExpr atom -> Either String carrier | |
| 85 | , preserial :: carrier -> SExpr atom | |
| 86 | } | |
| 87 | ||
| 88 | -- | Create a basic 'SExprSpec' when given a parser and serializer | |
| 89 | -- for an atom type. A small minimal 'SExprSpec' that recognizes | |
| 90 | -- any alphanumeric sequence as a valid atom looks like: | |
| 91 | -- | |
| 92 | -- > simpleSpec :: SExprSpec Text (SExpr Text) | |
| 93 | -- > simpleSpec = mkSpec (pack <$> many1 isAlphaNum) id | |
| 94 | mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom) | |
| 95 | mkSpec p s = SExprSpec | |
| 96 | { sesPAtom = p <?> "atom" | |
| 97 | , sesSAtom = s | |
| 98 | , readerMap = M.empty | |
| 99 | , comment = Nothing | |
| 100 | , postparse = return | |
| 101 | , preserial = id | |
| 102 | } | |
| 103 | ||
| 104 | -- | Modify the carrier type for a 'SExprSpec'. This is | |
| 105 | -- used internally to convert between various 'SExpr' representations, | |
| 106 | -- but could also be used externally to add an extra conversion layer | |
| 107 | -- onto a 'SExprSpec'. | |
| 108 | -- | |
| 109 | -- The following defines an S-expression spec that recognizes the | |
| 110 | -- language of binary addition trees. It does so by first transforming | |
| 111 | -- the internal S-expression representation using 'asWellFormed', and | |
| 112 | -- then providing a conversion between the 'WellFormedSExpr' type and | |
| 113 | -- an @Expr@ AST. Notice that the below parser uses 'String' as its | |
| 114 | -- underlying atom type, instead of some token type. | |
| 115 | -- | |
| 116 | -- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show) | |
| 117 | -- > | |
| 118 | -- > toExpr :: WellFormedSExpr String -> Either String Expr | |
| 119 | -- > toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r | |
| 120 | -- > toExpr (A c) | all isDigit c = pure (Num (read c)) | |
| 121 | -- > toExpr c = Left ("Invalid expr: " ++ show c) | |
| 122 | -- > | |
| 123 | -- > fromExpr :: Expr -> WellFormedSExpr String | |
| 124 | -- > fromExpr (Add l r) = L [A "+", fromExpr l, fromExpr r] | |
| 125 | -- > fromExpr (Num n) = A (show n) | |
| 126 | -- > | |
| 127 | -- > mySpec :: SExprSpec String Expr | |
| 128 | -- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack | |
| 129 | -- > where parser = many1 (satisfy isValidChar) | |
| 130 | -- > isValidChar c = isDigit c || c == '+' | |
| 131 | convertSpec :: (b -> Either String c) -> (c -> b) | |
| 132 | -> SExprSpec a b -> SExprSpec a c | |
| 133 | convertSpec f g spec = spec | |
| 134 | { postparse = postparse spec >=> f | |
| 135 | , preserial = preserial spec . g | |
| 136 | } | |
| 137 | ||
| 138 | -- | Convert the final output representation from the 'SExpr' type | |
| 139 | -- to the 'RichSExpr' type. | |
| 140 | asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b) | |
| 141 | asRich = convertSpec (return . toRich) fromRich | |
| 142 | ||
| 143 | -- | Convert the final output representation from the 'SExpr' type | |
| 144 | -- to the 'WellFormedSExpr' type. | |
| 145 | asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b) | |
| 146 | asWellFormed = convertSpec toWellFormed fromWellFormed | |
| 147 | ||
| 148 | -- | Add the ability to execute some particular reader macro, as | |
| 149 | -- defined by its initial character and the 'Parser' which returns | |
| 150 | -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which | |
| 151 | -- can be recursively called to parse more S-Expressions, and begins | |
| 152 | -- parsing after the reader character has been removed from the | |
| 153 | -- stream. | |
| 154 | -- | |
| 155 | -- The following defines an S-expression variant that treats | |
| 156 | -- @'expr@ as being sugar for @(quote expr)@. Note that this is done | |
| 157 | -- already in a more general way by the 'withQuote' function, but | |
| 158 | -- it is a good illustration of using reader macros in practice: | |
| 159 | -- | |
| 160 | -- > mySpec :: SExprSpec String (SExpr Text) | |
| 161 | -- > mySpec = addReader '\'' reader $ mkSpec (many1 alphaNum) pack | |
| 162 | -- > where reader p = quote <$> p | |
| 163 | -- > quote e = SCons (SAtom "quote") (SCons e SNil) | |
| 164 | addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c | |
| 165 | addReader c reader spec = spec | |
| 166 | { readerMap = M.insert c reader (readerMap spec) } | |
| 167 | ||
| 168 | -- | Add the ability to ignore some kind of comment. This gets | |
| 169 | -- factored into whitespace parsing, and it's very important that | |
| 170 | -- the parser supplied __be able to fail__ (as otherwise it will | |
| 171 | -- cause an infinite loop), and also that it __not consume any input__ | |
| 172 | -- (which may require it to be wrapped in 'try'.) | |
| 173 | -- | |
| 174 | -- The following code defines an S-expression variant that skips | |
| 175 | -- C++-style comments, i.e. those which begin with @//@ and last | |
| 176 | -- until the end of a line: | |
| 177 | -- | |
| 178 | -- > t :: SExprSpec String (SExpr Text) | |
| 179 | -- > t = setComment comm $ mkSpec (many1 alphaNum) pack | |
| 180 | -- > where comm = try (string "//" *> manyTill newline *> pure ()) | |
| 181 | ||
| 182 | setComment :: Comment -> SExprSpec a c -> SExprSpec a c | |
| 183 | setComment c spec = spec { comment = Just (c <?> "comment") } | |
| 184 | ||
| 185 | -- | Add the ability to understand a quoted S-Expression. In general, | |
| 186 | -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is | |
| 187 | -- a convenience function which allows you to easily add quoted | |
| 188 | -- expressions to a 'SExprSpec', provided that you supply which | |
| 189 | -- atom you want substituted in for the symbol @quote@. | |
| 190 | withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t) | |
| 191 | withQuote = addReader '\'' (fmap go) | |
| 192 | where go s = SCons "quote" (SCons s SNil) | |
| 193 | ||
| 194 | peekChar :: Parser (Maybe Char) | |
| 195 | peekChar = Just <$> lookAhead anyChar <|> pure Nothing | |
| 196 | ||
| 197 | parseGenericSExpr :: | |
| 198 | Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) | |
| 199 | parseGenericSExpr atom reader skip = do | |
| 200 | let sExpr = parseGenericSExpr atom reader skip <?> "s-expr" | |
| 201 | skip | |
| 202 | c <- peekChar | |
| 203 | r <- case c of | |
| 204 | Nothing -> fail "Unexpected end of input" | |
| 205 | Just '(' -> char '(' >> skip >> parseList sExpr skip | |
| 206 | Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr | |
| 207 | _ -> SAtom `fmap` atom | |
| 208 | skip | |
| 209 | return r | |
| 210 | ||
| 211 | parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom) | |
| 212 | parseList sExpr skip = do | |
| 213 | i <- peekChar | |
| 214 | case i of | |
| 215 | Nothing -> fail "Unexpected end of input" | |
| 216 | Just ')' -> char ')' >> return SNil | |
| 217 | _ -> do | |
| 218 | car <- sExpr | |
| 219 | skip | |
| 220 | c <- peekChar | |
| 221 | case c of | |
| 222 | Just '.' -> do | |
| 223 | char '.' | |
| 224 | cdr <- sExpr | |
| 225 | skip | |
| 226 | char ')' | |
| 227 | skip | |
| 228 | return (SCons car cdr) | |
| 229 | Just ')' -> do | |
| 230 | char ')' | |
| 231 | skip | |
| 232 | return (SCons car SNil) | |
| 233 | _ -> do | |
| 234 | cdr <- parseList sExpr skip | |
| 235 | return (SCons car cdr) | |
| 236 | ||
| 237 | -- | Given a CommentMap, create the corresponding parser to | |
| 238 | -- skip those comments (if they exist). | |
| 239 | buildSkip :: Maybe (Parser ()) -> Parser () | |
| 240 | buildSkip Nothing = skipMany space | |
| 241 | buildSkip (Just c) = alternate | |
| 242 | where alternate = skipMany space >> ((c >> alternate) <|> return ()) | |
| 243 | ||
| 244 | doParse :: Parser a -> Text -> Either String a | |
| 245 | doParse p t = case runParser p () "" t of | |
| 246 | Left err -> Left (show err) | |
| 247 | Right x -> Right x | |
| 248 | ||
| 249 | -- | Decode a single S-expression. If any trailing input is left after | |
| 250 | -- the S-expression (ignoring comments or whitespace) then this | |
| 251 | -- will fail: for those cases, use 'decode', which returns a list of | |
| 252 | -- all the S-expressions found at the top level. | |
| 253 | decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier | |
| 254 | decodeOne spec = doParse (parser <* eof) >=> (postparse spec) | |
| 255 | where parser = parseGenericSExpr | |
| 256 | (sesPAtom spec) | |
| 257 | (readerMap spec) | |
| 258 | (buildSkip (comment spec)) | |
| 259 | ||
| 260 | -- | Decode several S-expressions according to a given 'SExprSpec'. This | |
| 261 | -- will return a list of every S-expression that appears at the top-level | |
| 262 | -- of the document. | |
| 263 | decode :: SExprSpec atom carrier -> Text -> Either String [carrier] | |
| 264 | decode spec = | |
| 265 | doParse (many1 parser <* eof) >=> mapM (postparse spec) | |
| 266 | where parser = parseGenericSExpr | |
| 267 | (sesPAtom spec) | |
| 268 | (readerMap spec) | |
| 269 | (buildSkip (comment spec)) | |
| 270 | ||
| 271 | -- | Encode (without newlines) a single S-expression. | |
| 272 | encodeSExpr :: SExpr atom -> (atom -> Text) -> Text | |
| 273 | encodeSExpr SNil _ = "()" | |
| 274 | encodeSExpr (SAtom s) t = t s | |
| 275 | encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t) | |
| 276 | where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")" | |
| 277 | go SNil rs = "(" <> rs <> ")" | |
| 278 | go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t) | |
| 279 | ||
| 280 | -- | Emit an S-Expression in a machine-readable way. This does no | |
| 281 | -- pretty-printing or indentation, and produces no comments. | |
| 282 | encodeOne :: SExprSpec atom carrier -> carrier -> Text | |
| 283 | encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec) | |
| 284 | ||
| 285 | encode :: SExprSpec atom carrier -> [carrier] -> Text | |
| 286 | encode spec cs = T.concat (map (encodeOne spec) cs) |
| 1 | {-# LANGUAGE OverloadedStrings #-} | |
| 2 | ||
| 3 | module Data.SCargot.HaskLike ( -- $info | |
| 4 | haskLikeSpec | |
| 5 | , HaskLikeAtom(..) | |
| 6 | ) where | |
| 7 | ||
| 8 | import Control.Applicative ((<$>), (<*>), (<$)) | |
| 9 | import Data.Maybe (catMaybes) | |
| 10 | import Data.String (IsString(..)) | |
| 11 | import Data.Text (Text, pack) | |
| 12 | import Text.Parsec | |
| 13 | import Text.Parsec.Text (Parser) | |
| 14 | ||
| 15 | import Prelude hiding (concatMap) | |
| 16 | ||
| 17 | import Data.SCargot.Common | |
| 18 | import Data.SCargot.Repr.Basic (SExpr) | |
| 19 | import Data.SCargot.General (SExprSpec, mkSpec) | |
| 20 | ||
| 21 | {- $info | |
| 22 | ||
| 23 | This module is intended for simple, ad-hoc configuration or data formats | |
| 24 | that might not need their on rich structure but might benefit from a few | |
| 25 | various kinds of literals. The 'haskLikeSpec' understands identifiers as | |
| 26 | defined by R5RS, as well as string, integer, and floating-point literals | |
| 27 | as defined by the Haskell spec. It does _not_ natively understand other | |
| 28 | data types, such as booleans, vectors, bitstrings, or the like. | |
| 29 | ||
| 30 | -} | |
| 31 | ||
| 32 | ||
| 33 | -- | An atom type that understands Haskell-like values as well as | |
| 34 | -- Scheme-like identifiers. | |
| 35 | data HaskLikeAtom | |
| 36 | = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme | |
| 37 | -- standard | |
| 38 | | HSString Text -- ^ A string, parsed according to the syntax for string | |
| 39 | -- literals in the Haskell report | |
| 40 | | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to | |
| 41 | -- the syntax for integer literals in the Haskell report | |
| 42 | | HSFloat Double -- ^ A double-precision floating-point value, parsed | |
| 43 | -- according to the syntax for floats in the Haskell | |
| 44 | -- report | |
| 45 | deriving (Eq, Show) | |
| 46 | ||
| 47 | instance IsString HaskLikeAtom where | |
| 48 | fromString = HSIdent . fromString | |
| 49 | ||
| 50 | pString :: Parser Text | |
| 51 | pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc)) | |
| 52 | where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026') | |
| 53 | esc = do char '\\' | |
| 54 | Nothing <$ (gap <|> char '&') <|> | |
| 55 | Just <$> code | |
| 56 | gap = many1 space >> char '\\' | |
| 57 | code = eEsc <|> eNum <|> eCtrl <|> eAscii | |
| 58 | eCtrl = char '^' >> unCtrl <$> upper | |
| 59 | eNum = (toEnum . fromInteger) <$> | |
| 60 | (decNumber <|> (char 'o' >> octNumber) | |
| 61 | <|> (char 'x' >> hexNumber)) | |
| 62 | eEsc = choice [ char a >> return b | (a, b) <- escMap ] | |
| 63 | eAscii = choice [ try (string a >> return b) | |
| 64 | | (a, b) <- asciiMap ] | |
| 65 | unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1) | |
| 66 | ||
| 67 | escMap :: [(Char, Char)] | |
| 68 | escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'" | |
| 69 | ||
| 70 | asciiMap :: [(String, Char)] | |
| 71 | asciiMap = zip | |
| 72 | ["BS","HT","LF","VT","FF","CR","SO","SI","EM" | |
| 73 | ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX" | |
| 74 | ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3" | |
| 75 | ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"] | |
| 76 | ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++ | |
| 77 | "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++ | |
| 78 | "\SYN\ETB\CAN\SUB\ESC\DEL") | |
| 79 | ||
| 80 | pFloat :: Parser Double | |
| 81 | pFloat = do | |
| 82 | n <- decNumber | |
| 83 | withDot n <|> noDot n | |
| 84 | where withDot n = do | |
| 85 | char '.' | |
| 86 | m <- decNumber | |
| 87 | e <- option 1.0 exponent | |
| 88 | return ((fromIntegral n + asDec m 0) * e) | |
| 89 | noDot n = do | |
| 90 | e <- exponent | |
| 91 | return (fromIntegral n * e) | |
| 92 | exponent = do | |
| 93 | oneOf "eE" | |
| 94 | s <- power | |
| 95 | x <- decNumber | |
| 96 | return (10 ** s (fromIntegral x)) | |
| 97 | asDec 0 k = k | |
| 98 | asDec n k = | |
| 99 | asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1) | |
| 100 | ||
| 101 | power :: Num a => Parser (a -> a) | |
| 102 | power = negate <$ char '-' <|> id <$ char '+' <|> return id | |
| 103 | ||
| 104 | pInt :: Parser Integer | |
| 105 | pInt = do | |
| 106 | s <- power | |
| 107 | n <- pZeroNum <|> decNumber | |
| 108 | return (fromIntegral (s n)) | |
| 109 | ||
| 110 | pZeroNum :: Parser Integer | |
| 111 | pZeroNum = char '0' >> | |
| 112 | ( (oneOf "xX" >> hexNumber) | |
| 113 | <|> (oneOf "oO" >> octNumber) | |
| 114 | <|> decNumber | |
| 115 | <|> return 0 | |
| 116 | ) | |
| 117 | ||
| 118 | pHaskLikeAtom :: Parser HaskLikeAtom | |
| 119 | pHaskLikeAtom | |
| 120 | = HSFloat <$> (try pFloat <?> "float") | |
| 121 | <|> HSInt <$> (try pInt <?> "integer") | |
| 122 | <|> HSString <$> (pString <?> "string literal") | |
| 123 | <|> HSIdent <$> (parseR5RSIdent <?> "token") | |
| 124 | ||
| 125 | sHaskLikeAtom :: HaskLikeAtom -> Text | |
| 126 | sHaskLikeAtom (HSIdent t) = t | |
| 127 | sHaskLikeAtom (HSString s) = pack (show s) | |
| 128 | sHaskLikeAtom (HSInt i) = pack (show i) | |
| 129 | sHaskLikeAtom (HSFloat f) = pack (show f) | |
| 130 | ||
| 131 | -- | This `SExprSpec` understands s-expressions that contain | |
| 132 | -- Scheme-like tokens, as well as string literals, integer | |
| 133 | -- literals, and floating-point literals. Each of these values | |
| 134 | -- is parsed according to the lexical rules in the Haskell | |
| 135 | -- report, so the same set of string escapes, numeric bases, | |
| 136 | -- and floating-point options are available. This spec does | |
| 137 | -- not parse comments and does not understand any reader | |
| 138 | -- macros. | |
| 139 | haskLikeSpec :: SExprSpec HaskLikeAtom (SExpr HaskLikeAtom) | |
| 140 | haskLikeSpec = mkSpec pHaskLikeAtom sHaskLikeAtom |
| 1 | {-# LANGUAGE OverloadedStrings #-} | |
| 2 | ||
| 3 | module Data.SCargot.Language.Basic | |
| 4 | ( -- * Spec | |
| 5 | -- $descr | |
| 6 | basicParser | |
| 7 | , basicPrinter | |
| 8 | ) where | |
| 9 | ||
| 10 | import Control.Applicative ((<$>)) | |
| 11 | import Data.Char (isAlphaNum) | |
| 12 | import Text.Parsec (many1, satisfy) | |
| 13 | import Data.Text (Text, pack) | |
| 14 | ||
| 15 | import Data.SCargot.Repr.Basic (SExpr) | |
| 16 | import Data.SCargot ( SExprParser | |
| 17 | , SExprPrinter | |
| 18 | , mkParser | |
| 19 | , flatPrint | |
| 20 | ) | |
| 21 | import Data.SCargot.Comments (withLispComments) | |
| 22 | ||
| 23 | isAtomChar :: Char -> Bool | |
| 24 | isAtomChar c = isAlphaNum c | |
| 25 | || c == '-' || c == '*' || c == '/' | |
| 26 | || c == '+' || c == '<' || c == '>' | |
| 27 | || c == '=' || c == '!' || c == '?' | |
| 28 | ||
| 29 | -- $descr | |
| 30 | -- The 'basicSpec' describes S-expressions whose atoms are simply | |
| 31 | -- text strings that contain alphanumeric characters and a small | |
| 32 | -- set of punctuation. It does no parsing of numbers or other data | |
| 33 | -- types, and will accept tokens that typical Lisp implementations | |
| 34 | -- would find nonsensical (like @77foo@). | |
| 35 | -- | |
| 36 | -- Atoms recognized by the 'basicSpec' are any string matching the | |
| 37 | -- regular expression @[A-Za-z0-9+*<>/=!?-]+@. | |
| 38 | ||
| 39 | -- | A 'SExprParser' that understands atoms to be sequences of | |
| 40 | -- alphanumeric characters as well as the punctuation | |
| 41 | -- characters @[-*/+<>=!?]@, and does no processing of them. | |
| 42 | -- | |
| 43 | -- >>> decode basicParser "(1 elephant)" | |
| 44 | -- Right [SCons (SAtom "1") (SCons (SAtom "elephant") SNil)] | |
| 45 | basicParser :: SExprParser Text (SExpr Text) | |
| 46 | basicParser = mkParser pToken | |
| 47 | where pToken = pack <$> many1 (satisfy isAtomChar) | |
| 48 | ||
| 49 | -- | A 'SExprPrinter' that prints textual atoms directly (without quoting | |
| 50 | -- or any other processing) onto a single line. | |
| 51 | -- | |
| 52 | -- >>> encode basicPrinter [L [A "1", A "elephant"]] | |
| 53 | -- "(1 elephant)" | |
| 54 | basicPrinter :: SExprPrinter Text (SExpr Text) | |
| 55 | basicPrinter = flatPrint id |
| 1 | {-# LANGUAGE OverloadedStrings #-} | |
| 2 | ||
| 3 | module Data.SCargot.Language.HaskLike | |
| 4 | ( -- $info | |
| 5 | HaskLikeAtom(..) | |
| 6 | , haskLikeParser | |
| 7 | , haskLikePrinter | |
| 8 | ) where | |
| 9 | ||
| 10 | import Control.Applicative ((<$>), (<*>), (<$)) | |
| 11 | import Data.Maybe (catMaybes) | |
| 12 | import Data.String (IsString(..)) | |
| 13 | import Data.Text (Text, pack) | |
| 14 | import Text.Parsec | |
| 15 | import Text.Parsec.Text (Parser) | |
| 16 | ||
| 17 | import Prelude hiding (concatMap) | |
| 18 | ||
| 19 | import Data.SCargot.Common | |
| 20 | import Data.SCargot.Repr.Basic (SExpr) | |
| 21 | import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint) | |
| 22 | ||
| 23 | {- $info | |
| 24 | ||
| 25 | This module is intended for simple, ad-hoc configuration or data formats | |
| 26 | that might not need their on rich structure but might benefit from a few | |
| 27 | various kinds of literals. The 'haskLikeParser' understands identifiers as | |
| 28 | defined by R5RS, as well as string, integer, and floating-point literals | |
| 29 | as defined by the Haskell spec. It does __not__ natively understand other | |
| 30 | data types, such as booleans, vectors, bitstrings. | |
| 31 | ||
| 32 | -} | |
| 33 | ||
| 34 | ||
| 35 | -- | An atom type that understands Haskell-like values as well as | |
| 36 | -- Scheme-like identifiers. | |
| 37 | data HaskLikeAtom | |
| 38 | = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme | |
| 39 | -- standard | |
| 40 | | HSString Text -- ^ A string, parsed according to the syntax for string | |
| 41 | -- literals in the Haskell report | |
| 42 | | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to | |
| 43 | -- the syntax for integer literals in the Haskell report | |
| 44 | | HSFloat Double -- ^ A double-precision floating-point value, parsed | |
| 45 | -- according to the syntax for floats in the Haskell | |
| 46 | -- report | |
| 47 | deriving (Eq, Show) | |
| 48 | ||
| 49 | instance IsString HaskLikeAtom where | |
| 50 | fromString = HSIdent . fromString | |
| 51 | ||
| 52 | pString :: Parser Text | |
| 53 | pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc)) | |
| 54 | where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026') | |
| 55 | esc = do char '\\' | |
| 56 | Nothing <$ (gap <|> char '&') <|> | |
| 57 | Just <$> code | |
| 58 | gap = many1 space >> char '\\' | |
| 59 | code = eEsc <|> eNum <|> eCtrl <|> eAscii | |
| 60 | eCtrl = char '^' >> unCtrl <$> upper | |
| 61 | eNum = (toEnum . fromInteger) <$> | |
| 62 | (decNumber <|> (char 'o' >> octNumber) | |
| 63 | <|> (char 'x' >> hexNumber)) | |
| 64 | eEsc = choice [ char a >> return b | (a, b) <- escMap ] | |
| 65 | eAscii = choice [ try (string a >> return b) | |
| 66 | | (a, b) <- asciiMap ] | |
| 67 | unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1) | |
| 68 | ||
| 69 | escMap :: [(Char, Char)] | |
| 70 | escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'" | |
| 71 | ||
| 72 | asciiMap :: [(String, Char)] | |
| 73 | asciiMap = zip | |
| 74 | ["BS","HT","LF","VT","FF","CR","SO","SI","EM" | |
| 75 | ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX" | |
| 76 | ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3" | |
| 77 | ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"] | |
| 78 | ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++ | |
| 79 | "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++ | |
| 80 | "\SYN\ETB\CAN\SUB\ESC\DEL") | |
| 81 | ||
| 82 | pFloat :: Parser Double | |
| 83 | pFloat = do | |
| 84 | n <- decNumber | |
| 85 | withDot n <|> noDot n | |
| 86 | where withDot n = do | |
| 87 | char '.' | |
| 88 | m <- decNumber | |
| 89 | e <- option 1.0 exponent | |
| 90 | return ((fromIntegral n + asDec m 0) * e) | |
| 91 | noDot n = do | |
| 92 | e <- exponent | |
| 93 | return (fromIntegral n * e) | |
| 94 | exponent = do | |
| 95 | oneOf "eE" | |
| 96 | s <- power | |
| 97 | x <- decNumber | |
| 98 | return (10 ** s (fromIntegral x)) | |
| 99 | asDec 0 k = k | |
| 100 | asDec n k = | |
| 101 | asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1) | |
| 102 | ||
| 103 | power :: Num a => Parser (a -> a) | |
| 104 | power = negate <$ char '-' <|> id <$ char '+' <|> return id | |
| 105 | ||
| 106 | pInt :: Parser Integer | |
| 107 | pInt = do | |
| 108 | s <- power | |
| 109 | n <- pZeroNum <|> decNumber | |
| 110 | return (fromIntegral (s n)) | |
| 111 | ||
| 112 | pZeroNum :: Parser Integer | |
| 113 | pZeroNum = char '0' >> | |
| 114 | ( (oneOf "xX" >> hexNumber) | |
| 115 | <|> (oneOf "oO" >> octNumber) | |
| 116 | <|> decNumber | |
| 117 | <|> return 0 | |
| 118 | ) | |
| 119 | ||
| 120 | pHaskLikeAtom :: Parser HaskLikeAtom | |
| 121 | pHaskLikeAtom | |
| 122 | = HSFloat <$> (try pFloat <?> "float") | |
| 123 | <|> HSInt <$> (try pInt <?> "integer") | |
| 124 | <|> HSString <$> (pString <?> "string literal") | |
| 125 | <|> HSIdent <$> (parseR5RSIdent <?> "token") | |
| 126 | ||
| 127 | sHaskLikeAtom :: HaskLikeAtom -> Text | |
| 128 | sHaskLikeAtom (HSIdent t) = t | |
| 129 | sHaskLikeAtom (HSString s) = pack (show s) | |
| 130 | sHaskLikeAtom (HSInt i) = pack (show i) | |
| 131 | sHaskLikeAtom (HSFloat f) = pack (show f) | |
| 132 | ||
| 133 | -- | This `SExprParser` understands s-expressions that contain | |
| 134 | -- Scheme-like tokens, as well as string literals, integer | |
| 135 | -- literals, and floating-point literals. Each of these values | |
| 136 | -- is parsed according to the lexical rules in the Haskell | |
| 137 | -- report, so the same set of string escapes, numeric bases, | |
| 138 | -- and floating-point options are available. This spec does | |
| 139 | -- not parse comments and does not understand any reader | |
| 140 | -- macros. | |
| 141 | -- | |
| 142 | -- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")" | |
| 143 | -- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)] | |
| 144 | haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom) | |
| 145 | haskLikeParser = mkParser pHaskLikeAtom | |
| 146 | ||
| 147 | -- | This 'SExprPrinter' emits s-expressions that contain Scheme-like | |
| 148 | -- tokens as well as string literals, integer literals, and floating-point | |
| 149 | -- literals, which will be emitted as the literals produced by Haskell's | |
| 150 | -- 'show' function. This printer will produce a flat s-expression with | |
| 151 | -- no indentation of any kind. | |
| 152 | -- | |
| 153 | -- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]] | |
| 154 | -- "(1 \"elephant\")" | |
| 155 | haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom) | |
| 156 | haskLikePrinter = flatPrint sHaskLikeAtom |
| 1 | {-# LANGUAGE ViewPatterns #-} | |
| 2 | {-# LANGUAGE OverloadedStrings #-} | |
| 3 | ||
| 4 | module Data.SCargot.Parse | |
| 5 | ( -- * Parsing | |
| 6 | decode | |
| 7 | , decodeOne | |
| 8 | -- * Parsing Control | |
| 9 | , SExprParser | |
| 10 | , Reader | |
| 11 | , Comment | |
| 12 | , mkParser | |
| 13 | , setCarrier | |
| 14 | , addReader | |
| 15 | , setComment | |
| 16 | -- * Specific SExprParser Conversions | |
| 17 | , asRich | |
| 18 | , asWellFormed | |
| 19 | , withQuote | |
| 20 | ) where | |
| 21 | ||
| 22 | import Control.Applicative ((<*), (*>), (<*>), (<$>), pure) | |
| 23 | import Control.Monad ((>=>)) | |
| 24 | import Data.Char (isAlpha, isDigit, isAlphaNum) | |
| 25 | import Data.Map.Strict (Map) | |
| 26 | import qualified Data.Map.Strict as M | |
| 27 | import Data.Maybe (fromJust) | |
| 28 | import Data.Monoid ((<>)) | |
| 29 | import Data.String (IsString) | |
| 30 | import Data.Text (Text, pack, unpack) | |
| 31 | import qualified Data.Text as T | |
| 32 | import Text.Parsec ( (<|>) | |
| 33 | , (<?>) | |
| 34 | , char | |
| 35 | , eof | |
| 36 | , lookAhead | |
| 37 | , many1 | |
| 38 | , runParser | |
| 39 | , skipMany | |
| 40 | ) | |
| 41 | import Text.Parsec.Char (anyChar, space) | |
| 42 | import Text.Parsec.Text (Parser) | |
| 43 | ||
| 44 | import Data.SCargot.Repr ( SExpr(..) | |
| 45 | , RichSExpr | |
| 46 | , WellFormedSExpr | |
| 47 | , fromRich | |
| 48 | , toRich | |
| 49 | , fromWellFormed | |
| 50 | , toWellFormed | |
| 51 | ) | |
| 52 | ||
| 53 | type ReaderMacroMap atom = Map Char (Reader atom) | |
| 54 | ||
| 55 | -- | A 'Reader' represents a reader macro: it takes a parser for | |
| 56 | -- the S-Expression type and performs as much or as little | |
| 57 | -- parsing as it would like, and then returns an S-expression. | |
| 58 | type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom)) | |
| 59 | ||
| 60 | -- | A 'Comment' represents any kind of skippable comment. This | |
| 61 | -- parser __must__ be able to fail if a comment is not being | |
| 62 | -- recognized, and it __must__ not consume any input in case | |
| 63 | -- of failure. | |
| 64 | type Comment = Parser () | |
| 65 | ||
| 66 | -- | A 'SExprParser' describes a parser for a particular value | |
| 67 | -- that has been serialized as an s-expression. The @atom@ parameter | |
| 68 | -- corresponds to a Haskell type used to represent the atoms, | |
| 69 | -- and the @carrier@ parameter corresponds to the parsed S-Expression | |
| 70 | -- structure. | |
| 71 | data SExprParser atom carrier = SExprParser | |
| 72 | { sesPAtom :: Parser atom | |
| 73 | , readerMap :: ReaderMacroMap atom | |
| 74 | , comment :: Maybe Comment | |
| 75 | , postparse :: SExpr atom -> Either String carrier | |
| 76 | } | |
| 77 | ||
| 78 | -- | Create a basic 'SExprParser' when given a parser | |
| 79 | -- for an atom type. | |
| 80 | -- | |
| 81 | -- >>> import Text.Parsec (alphaNum, many1) | |
| 82 | -- >>> let parser = mkParser (many1 alphaNum) | |
| 83 | -- >>> decode parser "(ele phant)" | |
| 84 | -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)] | |
| 85 | mkParser :: Parser atom -> SExprParser atom (SExpr atom) | |
| 86 | mkParser parser = SExprParser | |
| 87 | { sesPAtom = parser | |
| 88 | , readerMap = M.empty | |
| 89 | , comment = Nothing | |
| 90 | , postparse = return | |
| 91 | } | |
| 92 | ||
| 93 | -- | Modify the carrier type for a 'SExprParser'. This is | |
| 94 | -- used internally to convert between various 'SExpr' representations, | |
| 95 | -- but could also be used externally to add an extra conversion layer | |
| 96 | -- onto a 'SExprParser'. | |
| 97 | -- | |
| 98 | -- >>> import Text.Parsec (alphaNum, many1) | |
| 99 | -- >>> import Data.SCargot.Repr (toRich) | |
| 100 | -- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum)) | |
| 101 | -- >>> decode parser "(ele phant)" | |
| 102 | -- Right [RSlist [RSAtom "ele",RSAtom "phant"]] | |
| 103 | setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c | |
| 104 | setCarrier f spec = spec { postparse = postparse spec >=> f } | |
| 105 | ||
| 106 | -- | Convert the final output representation from the 'SExpr' type | |
| 107 | -- to the 'RichSExpr' type. | |
| 108 | -- | |
| 109 | -- >>> import Text.Parsec (alphaNum, many1) | |
| 110 | -- >>> let parser = asRich (mkParser (many1 alphaNum)) | |
| 111 | -- >>> decode parser "(ele phant)" | |
| 112 | -- Right [RSlist [RSAtom "ele",RSAtom "phant"]] | |
| 113 | asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b) | |
| 114 | asRich = setCarrier (return . toRich) | |
| 115 | ||
| 116 | -- | Convert the final output representation from the 'SExpr' type | |
| 117 | -- to the 'WellFormedSExpr' type. | |
| 118 | -- | |
| 119 | -- >>> import Text.Parsec (alphaNum, many1) | |
| 120 | -- >>> let parser = asWellFormed (mkParser (many1 alphaNum)) | |
| 121 | -- >>> decode parser "(ele phant)" | |
| 122 | -- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]] | |
| 123 | asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b) | |
| 124 | asWellFormed = setCarrier toWellFormed | |
| 125 | ||
| 126 | -- | Add the ability to execute some particular reader macro, as | |
| 127 | -- defined by its initial character and the 'Parser' which returns | |
| 128 | -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which | |
| 129 | -- can be recursively called to parse more S-Expressions, and begins | |
| 130 | -- parsing after the reader character has been removed from the | |
| 131 | -- stream. | |
| 132 | -- | |
| 133 | -- >>> import Text.Parsec (alphaNum, char, many1) | |
| 134 | -- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p) | |
| 135 | -- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum)) | |
| 136 | -- >>> decode parser "(an [ele phant])" | |
| 137 | -- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)] | |
| 138 | ||
| 139 | addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c | |
| 140 | addReader c reader spec = spec | |
| 141 | { readerMap = M.insert c reader (readerMap spec) } | |
| 142 | ||
| 143 | -- | Add the ability to ignore some kind of comment. This gets | |
| 144 | -- factored into whitespace parsing, and it's very important that | |
| 145 | -- the parser supplied __be able to fail__ (as otherwise it will | |
| 146 | -- cause an infinite loop), and also that it __not consume any input__ | |
| 147 | -- (which may require it to be wrapped in 'try'.) | |
| 148 | -- | |
| 149 | -- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string) | |
| 150 | -- >>> let comment = string "//" *> manyTill anyChar newline *> pure () | |
| 151 | -- >>> let parser = setComment comment (mkParser (many1 alphaNum)) | |
| 152 | -- >>> decode parser "(ele //a comment\n phant)" | |
| 153 | -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)] | |
| 154 | ||
| 155 | setComment :: Comment -> SExprParser a c -> SExprParser a c | |
| 156 | setComment c spec = spec { comment = Just (c <?> "comment") } | |
| 157 | ||
| 158 | -- | Add the ability to understand a quoted S-Expression. | |
| 159 | -- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This | |
| 160 | -- assumes that the underlying atom type implements the "IsString" | |
| 161 | -- class, and will create the @quote@ atom using @fromString "quote"@. | |
| 162 | -- | |
| 163 | -- >>> import Text.Parsec (alphaNum, many1) | |
| 164 | -- >>> let parser = withQuote (mkParser (many1 alphaNum)) | |
| 165 | -- >>> decode parser "'elephant" | |
| 166 | -- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)] | |
| 167 | withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t) | |
| 168 | withQuote = addReader '\'' (fmap go) | |
| 169 | where go s = SCons "quote" (SCons s SNil) | |
| 170 | ||
| 171 | peekChar :: Parser (Maybe Char) | |
| 172 | peekChar = Just <$> lookAhead anyChar <|> pure Nothing | |
| 173 | ||
| 174 | parseGenericSExpr :: | |
| 175 | Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) | |
| 176 | parseGenericSExpr atom reader skip = do | |
| 177 | let sExpr = parseGenericSExpr atom reader skip <?> "s-expr" | |
| 178 | skip | |
| 179 | c <- peekChar | |
| 180 | r <- case c of | |
| 181 | Nothing -> fail "Unexpected end of input" | |
| 182 | Just '(' -> char '(' >> skip >> parseList sExpr skip | |
| 183 | Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr | |
| 184 | _ -> SAtom `fmap` atom | |
| 185 | skip | |
| 186 | return r | |
| 187 | ||
| 188 | parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom) | |
| 189 | parseList sExpr skip = do | |
| 190 | i <- peekChar | |
| 191 | case i of | |
| 192 | Nothing -> fail "Unexpected end of input" | |
| 193 | Just ')' -> char ')' >> return SNil | |
| 194 | _ -> do | |
| 195 | car <- sExpr | |
| 196 | skip | |
| 197 | c <- peekChar | |
| 198 | case c of | |
| 199 | Just '.' -> do | |
| 200 | char '.' | |
| 201 | cdr <- sExpr | |
| 202 | skip | |
| 203 | char ')' | |
| 204 | skip | |
| 205 | return (SCons car cdr) | |
| 206 | Just ')' -> do | |
| 207 | char ')' | |
| 208 | skip | |
| 209 | return (SCons car SNil) | |
| 210 | _ -> do | |
| 211 | cdr <- parseList sExpr skip | |
| 212 | return (SCons car cdr) | |
| 213 | ||
| 214 | -- | Given a CommentMap, create the corresponding parser to | |
| 215 | -- skip those comments (if they exist). | |
| 216 | buildSkip :: Maybe (Parser ()) -> Parser () | |
| 217 | buildSkip Nothing = skipMany space | |
| 218 | buildSkip (Just c) = alternate | |
| 219 | where alternate = skipMany space >> ((c >> alternate) <|> return ()) | |
| 220 | ||
| 221 | doParse :: Parser a -> Text -> Either String a | |
| 222 | doParse p t = case runParser p () "" t of | |
| 223 | Left err -> Left (show err) | |
| 224 | Right x -> Right x | |
| 225 | ||
| 226 | -- | Decode a single S-expression. If any trailing input is left after | |
| 227 | -- the S-expression (ignoring comments or whitespace) then this | |
| 228 | -- will fail: for those cases, use 'decode', which returns a list of | |
| 229 | -- all the S-expressions found at the top level. | |
| 230 | decodeOne :: SExprParser atom carrier -> Text -> Either String carrier | |
| 231 | decodeOne spec = doParse (parser <* eof) >=> (postparse spec) | |
| 232 | where parser = parseGenericSExpr | |
| 233 | (sesPAtom spec) | |
| 234 | (readerMap spec) | |
| 235 | (buildSkip (comment spec)) | |
| 236 | ||
| 237 | -- | Decode several S-expressions according to a given 'SExprParser'. This | |
| 238 | -- will return a list of every S-expression that appears at the top-level | |
| 239 | -- of the document. | |
| 240 | decode :: SExprParser atom carrier -> Text -> Either String [carrier] | |
| 241 | decode spec = | |
| 242 | doParse (many1 parser <* eof) >=> mapM (postparse spec) | |
| 243 | where parser = parseGenericSExpr | |
| 244 | (sesPAtom spec) | |
| 245 | (readerMap spec) | |
| 246 | (buildSkip (comment spec)) | |
| 247 | ||
| 248 | {- | |
| 249 | -- | Encode (without newlines) a single S-expression. | |
| 250 | encodeSExpr :: SExpr atom -> (atom -> Text) -> Text | |
| 251 | encodeSExpr SNil _ = "()" | |
| 252 | encodeSExpr (SAtom s) t = t s | |
| 253 | encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t) | |
| 254 | where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")" | |
| 255 | go SNil rs = "(" <> rs <> ")" | |
| 256 | go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t) | |
| 257 | ||
| 258 | -- | Emit an S-Expression in a machine-readable way. This does no | |
| 259 | -- pretty-printing or indentation, and produces no comments. | |
| 260 | encodeOne :: SExprParser atom carrier -> carrier -> Text | |
| 261 | encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec) | |
| 262 | ||
| 263 | encode :: SExprParser atom carrier -> [carrier] -> Text | |
| 264 | encode spec cs = T.concat (map (encodeOne spec) cs) | |
| 265 | -} |
| 1 | {-# LANGUAGE RecordWildCards #-} | |
| 2 | {-# LANGUAGE OverloadedStrings #-} | |
| 3 | {-# LANGUAGE ScopedTypeVariables #-} | |
| 4 | ||
| 5 | module Data.SCargot.Pretty | |
| 6 | ( -- * Pretty-Printing | |
| 7 | prettyPrintSExpr | |
| 8 | -- * Pretty-Printing Control | |
| 9 | , LayoutOptions(..) | |
| 10 | , Indent(..) | |
| 11 | -- * Default Printing Strategies | |
| 12 | , basicPrint | |
| 13 | , flatPrint | |
| 14 | ) where | |
| 15 | ||
| 16 | import Data.Monoid ((<>)) | |
| 17 | import Data.Text (Text) | |
| 18 | import qualified Data.Text as T | |
| 19 | ||
| 20 | import Data.SCargot.Repr | |
| 21 | ||
| 22 | -- | The 'Indent' type is used to determine how to indent subsequent | |
| 23 | -- s-expressions in a list, after printing the head of the list. | |
| 24 | data Indent | |
| 25 | = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed | |
| 26 | -- amount more than the current line. | |
| 27 | -- | |
| 28 | -- > (foo | |
| 29 | -- > bar | |
| 30 | -- > baz | |
| 31 | -- > quux) | |
| 32 | | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the | |
| 33 | -- first @n@ expressions after the head on the same | |
| 34 | -- line as the head, and all after will be swung. | |
| 35 | -- 'SwingAfter' @0@ is equivalent to 'Swing'. | |
| 36 | -- | |
| 37 | -- > (foo bar | |
| 38 | -- > baz | |
| 39 | -- > quux) | |
| 40 | | Align -- ^ An 'Align' indent will print the first expression after | |
| 41 | -- the head on the same line, and subsequent expressions will | |
| 42 | -- be aligned with that one. | |
| 43 | -- | |
| 44 | -- > (foo bar | |
| 45 | -- > baz | |
| 46 | -- > quux) | |
| 47 | deriving (Eq, Show) | |
| 48 | ||
| 49 | -- | A 'LayoutOptions' value describes the strategy taken in | |
| 50 | -- pretty-printing a 'SExpr'. | |
| 51 | data LayoutOptions a = LayoutOptions | |
| 52 | { atomPrinter :: a -> Text | |
| 53 | -- ^ How to serialize a given atom to 'Text'. | |
| 54 | , swingIndent :: SExpr a -> Indent | |
| 55 | -- ^ How to indent subsequent expressions, as determined by | |
| 56 | -- the head of the list. | |
| 57 | , indentAmount :: Int | |
| 58 | -- ^ How much to indent after a swung indentation. | |
| 59 | , maxWidth :: Maybe Int | |
| 60 | -- ^ The maximum width (if any) If this is 'None' then | |
| 61 | -- the resulting s-expression will always be printed | |
| 62 | -- on a single line. | |
| 63 | } | |
| 64 | ||
| 65 | -- | A default 'LayoutOptions' struct that will always print a 'SExpr' | |
| 66 | -- as a single line. | |
| 67 | flatPrint :: (a -> Text) -> LayoutOptions a | |
| 68 | flatPrint printer = LayoutOptions | |
| 69 | { atomPrinter = printer | |
| 70 | , swingIndent = const Swing | |
| 71 | , indentAmount = 2 | |
| 72 | , maxWidth = Nothing | |
| 73 | } | |
| 74 | ||
| 75 | -- | A default 'LayoutOptions' struct that will always swing subsequent | |
| 76 | -- expressions onto later lines if they're too long, indenting them | |
| 77 | -- by two spaces. | |
| 78 | basicPrint :: (a -> Text) -> LayoutOptions a | |
| 79 | basicPrint printer = LayoutOptions | |
| 80 | { atomPrinter = printer | |
| 81 | , swingIndent = const Swing | |
| 82 | , indentAmount = 2 | |
| 83 | , maxWidth = Just 80 | |
| 84 | } | |
| 85 | ||
| 86 | -- Sort of like 'unlines' but without the trailing newline | |
| 87 | joinLines :: [Text] -> Text | |
| 88 | joinLines = T.intercalate "\n" | |
| 89 | ||
| 90 | -- Indents a line by n spaces | |
| 91 | indent :: Int -> Text -> Text | |
| 92 | indent n ts = T.replicate n " " <> ts | |
| 93 | ||
| 94 | -- Indents every line n spaces, and adds a newline to the beginning | |
| 95 | -- used in swung indents | |
| 96 | indentAll :: Int -> [Text] -> Text | |
| 97 | indentAll n = ("\n" <>) . joinLines . map (indent n) | |
| 98 | ||
| 99 | -- Indents every line but the first by some amount | |
| 100 | -- used in aligned indents | |
| 101 | indentSubsequent :: Int -> [Text] -> Text | |
| 102 | indentSubsequent _ [] = "" | |
| 103 | indentSubsequent _ [t] = t | |
| 104 | indentSubsequent n (t:ts) = joinLines (t : go ts) | |
| 105 | where go = map (indent n) | |
| 106 | ||
| 107 | -- oh god this code is so disgusting | |
| 108 | -- i'm sorry to everyone i let down by writing this | |
| 109 | -- i swear i'll do better in the future i promise i have to | |
| 110 | -- for my sake and for everyone's | |
| 111 | ||
| 112 | -- | Pretty-print a 'SExpr' according to the options in a | |
| 113 | -- 'LayoutOptions' value. | |
| 114 | prettyPrintSExpr :: LayoutOptions a -> SExpr a -> Text | |
| 115 | prettyPrintSExpr LayoutOptions { .. } = pHead 0 | |
| 116 | where pHead _ SNil = "()" | |
| 117 | pHead _ (SAtom a) = atomPrinter a | |
| 118 | pHead ind (SCons x xs) = gather ind x xs id | |
| 119 | gather _ _ (SAtom _) _ = error "no dotted pretty printing yet!" | |
| 120 | gather ind h (SCons x xs) k = gather ind h xs (k . (x:)) | |
| 121 | gather ind h SNil k = "(" <> hd <> body <> ")" | |
| 122 | where hd = indentSubsequent ind [pHead (ind+1) h] | |
| 123 | lst = k [] | |
| 124 | flat = T.unwords (map (pHead (ind+1)) lst) | |
| 125 | headWidth = T.length hd + 1 | |
| 126 | indented = | |
| 127 | case swingIndent h of | |
| 128 | SwingAfter n -> | |
| 129 | let (l, ls) = splitAt n lst | |
| 130 | t = T.unwords (map (pHead (ind+1)) l) | |
| 131 | ts = indentAll (ind + indentAmount) | |
| 132 | (map (pHead (ind + indentAmount)) ls) | |
| 133 | in t <> ts | |
| 134 | Swing -> | |
| 135 | indentAll (ind + indentAmount) | |
| 136 | (map (pHead (ind + indentAmount)) lst) | |
| 137 | Align -> | |
| 138 | indentSubsequent (ind + headWidth + 1) | |
| 139 | (map (pHead (ind + headWidth + 1)) lst) | |
| 140 | body | |
| 141 | | length lst == 0 = "" | |
| 142 | | Just maxAmt <- maxWidth | |
| 143 | , T.length flat + ind > maxAmt = " " <> indented | |
| 144 | | otherwise = " " <> flat |
| 1 | {-# LANGUAGE RecordWildCards #-} | |
| 2 | {-# LANGUAGE OverloadedStrings #-} | |
| 3 | {-# LANGUAGE ScopedTypeVariables #-} | |
| 4 | ||
| 5 | module Data.SCargot.Print | |
| 6 | ( -- * Pretty-Printing | |
| 7 | encodeOne | |
| 8 | , encode | |
| 9 | -- * Pretty-Printing Control | |
| 10 | , SExprPrinter | |
| 11 | , Indent(..) | |
| 12 | , setFromCarrier | |
| 13 | , setMaxWidth | |
| 14 | , removeMaxWidth | |
| 15 | , setIndentAmount | |
| 16 | , setIndentStrategy | |
| 17 | -- * Default Printing Strategies | |
| 18 | , basicPrint | |
| 19 | , flatPrint | |
| 20 | ) where | |
| 21 | ||
| 22 | import Data.Monoid ((<>)) | |
| 23 | import Data.Text (Text) | |
| 24 | import qualified Data.Text as T | |
| 25 | ||
| 26 | import Data.SCargot.Repr | |
| 27 | ||
| 28 | -- | The 'Indent' type is used to determine how to indent subsequent | |
| 29 | -- s-expressions in a list, after printing the head of the list. | |
| 30 | data Indent | |
| 31 | = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed | |
| 32 | -- amount more than the current line. | |
| 33 | -- | |
| 34 | -- > (foo | |
| 35 | -- > bar | |
| 36 | -- > baz | |
| 37 | -- > quux) | |
| 38 | | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the | |
| 39 | -- first @n@ expressions after the head on the same | |
| 40 | -- line as the head, and all after will be swung. | |
| 41 | -- 'SwingAfter' @0@ is equivalent to 'Swing'. | |
| 42 | -- | |
| 43 | -- > (foo bar | |
| 44 | -- > baz | |
| 45 | -- > quux) | |
| 46 | | Align -- ^ An 'Align' indent will print the first expression after | |
| 47 | -- the head on the same line, and subsequent expressions will | |
| 48 | -- be aligned with that one. | |
| 49 | -- | |
| 50 | -- > (foo bar | |
| 51 | -- > baz | |
| 52 | -- > quux) | |
| 53 | deriving (Eq, Show) | |
| 54 | ||
| 55 | -- | A 'SExprPrinter' value describes how to print a given value as an | |
| 56 | -- s-expression. The @carrier@ type parameter indicates the value | |
| 57 | -- that will be printed, and the @atom@ parameter indicates the type | |
| 58 | -- that will represent tokens in an s-expression structure. | |
| 59 | data SExprPrinter atom carrier = SExprPrinter | |
| 60 | { atomPrinter :: atom -> Text | |
| 61 | -- ^ How to serialize a given atom to 'Text'. | |
| 62 | , fromCarrier :: carrier -> SExpr atom | |
| 63 | -- ^ How to turn a carrier type back into a 'Sexpr'. | |
| 64 | , swingIndent :: SExpr atom -> Indent | |
| 65 | -- ^ How to indent subsequent expressions, as determined by | |
| 66 | -- the head of the list. | |
| 67 | , indentAmount :: Int | |
| 68 | -- ^ How much to indent after a swung indentation. | |
| 69 | , maxWidth :: Maybe Int | |
| 70 | -- ^ The maximum width (if any) If this is 'None' then | |
| 71 | -- the resulting s-expression will always be printed | |
| 72 | -- on a single line. | |
| 73 | } | |
| 74 | ||
| 75 | -- | A default 'LayoutOptions' struct that will always print a 'SExpr' | |
| 76 | -- as a single line. | |
| 77 | flatPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom) | |
| 78 | flatPrint printer = SExprPrinter | |
| 79 | { atomPrinter = printer | |
| 80 | , fromCarrier = id | |
| 81 | , swingIndent = const Swing | |
| 82 | , indentAmount = 2 | |
| 83 | , maxWidth = Nothing | |
| 84 | } | |
| 85 | ||
| 86 | -- | A default 'LayoutOptions' struct that will always swing subsequent | |
| 87 | -- expressions onto later lines if they're too long, indenting them | |
| 88 | -- by two spaces. | |
| 89 | basicPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom) | |
| 90 | basicPrint printer = SExprPrinter | |
| 91 | { atomPrinter = printer | |
| 92 | , fromCarrier = id | |
| 93 | , swingIndent = const Swing | |
| 94 | , indentAmount = 2 | |
| 95 | , maxWidth = Just 80 | |
| 96 | } | |
| 97 | ||
| 98 | -- | Modify the carrier type of a 'SExprPrinter' by describing how | |
| 99 | -- to convert the new type back to the previous type. For example, | |
| 100 | -- to pretty-print a well-formed s-expression, we can modify the | |
| 101 | -- 'SExprPrinter' value as follows: | |
| 102 | -- | |
| 103 | -- >>> let printer = setFromCarrier fromWellFormed (basicPrint id) | |
| 104 | -- >>> encodeOne printer (WFSList [WFSAtom "ele", WFSAtom "phant"]) | |
| 105 | -- "(ele phant)" | |
| 106 | setFromCarrier :: (c -> b) -> SExprPrinter a b -> SExprPrinter a c | |
| 107 | setFromCarrier fc pr = pr { fromCarrier = fromCarrier pr . fc } | |
| 108 | ||
| 109 | -- | Dictate a maximum width for pretty-printed s-expressions. | |
| 110 | -- | |
| 111 | -- >>> let printer = setMaxWidth 8 (basicPrint id) | |
| 112 | -- >>> encodeOne printer (L [A "one", A "two", A "three"]) | |
| 113 | -- "(one \n two\n three)" | |
| 114 | setMaxWidth :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier | |
| 115 | setMaxWidth n pr = pr { maxWidth = Just n } | |
| 116 | ||
| 117 | -- | Allow the serialized s-expression to be arbitrarily wide. This | |
| 118 | -- makes all pretty-printing happen on a single line. | |
| 119 | -- | |
| 120 | -- >>> let printer = removeMaxWidth (basicPrint id) | |
| 121 | -- >>> encodeOne printer (L [A "one", A "two", A "three"]) | |
| 122 | -- "(one two three)" | |
| 123 | removeMaxWidth :: SExprPrinter atom carrier -> SExprPrinter atom carrier | |
| 124 | removeMaxWidth pr = pr { maxWidth = Nothing } | |
| 125 | ||
| 126 | -- | Set the number of spaces that a subsequent line will be indented | |
| 127 | -- after a swing indentation. | |
| 128 | -- | |
| 129 | -- >>> let printer = setMaxWidth 12 (basicPrint id) | |
| 130 | -- >>> encodeOne printer (L [A "elephant", A "pachyderm"]) | |
| 131 | -- "(elephant \n pachyderm)" | |
| 132 | -- >>> encodeOne (setIndentAmount 4) (L [A "elephant", A "pachyderm"]) | |
| 133 | -- "(elephant \n pachyderm)" | |
| 134 | setIndentAmount :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier | |
| 135 | setIndentAmount n pr = pr { indentAmount = n } | |
| 136 | ||
| 137 | -- | Dictate how to indent subsequent lines based on the leading | |
| 138 | -- subexpression in an s-expression. For details on how this works, | |
| 139 | -- consult the documentation of the 'Indent' type. | |
| 140 | -- | |
| 141 | -- >>> let indent (A "def") = SwingAfter 1; indent _ = Swing | |
| 142 | -- >>> let printer = setIndentStrategy indent (setMaxWidth 8 (basicPrint id)) | |
| 143 | -- >>> encodeOne printer (L [ A "def", L [ A "func", A "arg" ], A "body" ]) | |
| 144 | -- "(def (func arg)\n body)" | |
| 145 | -- >>> encodeOne printer (L [ A "elephant", A "among", A "pachyderms" ]) | |
| 146 | -- "(elephant \n among\n pachyderms)" | |
| 147 | setIndentStrategy :: (SExpr atom -> Indent) -> SExprPrinter atom carrier -> SExprPrinter atom carrier | |
| 148 | setIndentStrategy st pr = pr { swingIndent = st } | |
| 149 | ||
| 150 | -- Sort of like 'unlines' but without the trailing newline | |
| 151 | joinLines :: [Text] -> Text | |
| 152 | joinLines = T.intercalate "\n" | |
| 153 | ||
| 154 | -- Indents a line by n spaces | |
| 155 | indent :: Int -> Text -> Text | |
| 156 | indent n ts = T.replicate n " " <> ts | |
| 157 | ||
| 158 | -- Indents every line n spaces, and adds a newline to the beginning | |
| 159 | -- used in swung indents | |
| 160 | indentAll :: Int -> [Text] -> Text | |
| 161 | indentAll n = ("\n" <>) . joinLines . map (indent n) | |
| 162 | ||
| 163 | -- Indents every line but the first by some amount | |
| 164 | -- used in aligned indents | |
| 165 | indentSubsequent :: Int -> [Text] -> Text | |
| 166 | indentSubsequent _ [] = "" | |
| 167 | indentSubsequent _ [t] = t | |
| 168 | indentSubsequent n (t:ts) = joinLines (t : go ts) | |
| 169 | where go = map (indent n) | |
| 170 | ||
| 171 | -- oh god this code is so disgusting | |
| 172 | -- i'm sorry to everyone i let down by writing this | |
| 173 | -- i swear i'll do better in the future i promise i have to | |
| 174 | -- for my sake and for everyone's | |
| 175 | ||
| 176 | -- | Pretty-print a 'SExpr' according to the options in a | |
| 177 | -- 'LayoutOptions' value. | |
| 178 | prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text | |
| 179 | prettyPrintSExpr SExprPrinter { .. } = pHead 0 | |
| 180 | where pHead _ SNil = "()" | |
| 181 | pHead _ (SAtom a) = atomPrinter a | |
| 182 | pHead ind (SCons x xs) = gather ind x xs id | |
| 183 | gather _ _ (SAtom _) _ = error "no dotted pretty printing yet!" | |
| 184 | gather ind h (SCons x xs) k = gather ind h xs (k . (x:)) | |
| 185 | gather ind h SNil k = "(" <> hd <> body <> ")" | |
| 186 | where hd = indentSubsequent ind [pHead (ind+1) h] | |
| 187 | lst = k [] | |
| 188 | flat = T.unwords (map (pHead (ind+1)) lst) | |
| 189 | headWidth = T.length hd + 1 | |
| 190 | indented = | |
| 191 | case swingIndent h of | |
| 192 | SwingAfter n -> | |
| 193 | let (l, ls) = splitAt n lst | |
| 194 | t = T.unwords (map (pHead (ind+1)) l) | |
| 195 | ts = indentAll (ind + indentAmount) | |
| 196 | (map (pHead (ind + indentAmount)) ls) | |
| 197 | in t <> ts | |
| 198 | Swing -> | |
| 199 | indentAll (ind + indentAmount) | |
| 200 | (map (pHead (ind + indentAmount)) lst) | |
| 201 | Align -> | |
| 202 | indentSubsequent (ind + headWidth + 1) | |
| 203 | (map (pHead (ind + headWidth + 1)) lst) | |
| 204 | body | |
| 205 | | length lst == 0 = "" | |
| 206 | | Just maxAmt <- maxWidth | |
| 207 | , T.length flat + ind > maxAmt = " " <> indented | |
| 208 | | otherwise = " " <> flat | |
| 209 | ||
| 210 | -- | Turn a single s-expression into a string according to a given | |
| 211 | -- 'SExprPrinter'. | |
| 212 | encodeOne :: SExprPrinter atom carrier -> carrier -> Text | |
| 213 | encodeOne s@(SExprPrinter { .. }) = | |
| 214 | prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier | |
| 215 | ||
| 216 | -- | Turn a list of s-expressions into a single string according to | |
| 217 | -- a given 'SExprPrinter'. | |
| 218 | encode :: SExprPrinter atom carrier -> [carrier] -> Text | |
| 219 | encode spec = T.intercalate "\n\n" . map (encodeOne spec) |
| 5 | 5 | {-# LANGUAGE TypeFamilies #-} |
| 6 | 6 | |
| 7 | 7 | module Data.SCargot.Repr |
| 8 |
( -- |
|
| 8 | ( -- $reprs | |
| 9 | -- * Elementary SExpr representation | |
| 9 | 10 | SExpr(..) |
| 10 | 11 | -- * Rich SExpr representation |
| 11 | 12 | , RichSExpr(..) |
| 51 | 52 | -- represent a well-formed cons list, and 'RSDotted' |
| 52 | 53 | -- to represent an improper list of the form |
| 53 | 54 | -- @(a b c . d)@. This representation is based on |
| 54 | -- the shape of the parsed S-Expression, and not on | |
| 55 | -- how it was represented, so @(a . (b))@ is going to | |
| 55 | -- the structure of the parsed S-Expression, and not on | |
| 56 | -- how it was originally represented: thus, @(a . (b))@ is going to | |
| 56 | 57 | -- be represented as @RSList[RSAtom a, RSAtom b]@ |
| 57 | 58 | -- despite having been originally represented as a |
| 58 | 59 | -- dotted list. |
| 138 | 139 | fromWellFormed (WFSAtom a) = SAtom a |
| 139 | 140 | fromWellFormed (WFSList xs) = |
| 140 | 141 | foldr SCons SNil (map fromWellFormed xs) |
| 142 | ||
| 143 | {- $reprs | |
| 144 | ||
| 145 | This module contains several different representations for | |
| 146 | s-expressions. The s-cargot library underlying uses the | |
| 147 | 'SExpr' type as its representation type, which is a binary | |
| 148 | tree representation with an arbitrary type for its leaves. | |
| 149 | ||
| 150 | This type is not always convenient to manipulate in Haskell | |
| 151 | code, this module defines two alternate representations | |
| 152 | which turn a sequence of nested right-branching cons pairs | |
| 153 | into Haskell lists: that is to say, they transform between | |
| 154 | ||
| 155 | @ | |
| 156 | SCons a (SCons b (SCons c SNil)) \<=\> RSList [a, b, c] | |
| 157 | @ | |
| 158 | ||
| 159 | These two types differ in how they handle non-well-formed | |
| 160 | lists, i.e. lists that end with an atom. The 'RichSExpr' | |
| 161 | format handles this with a special constructor for lists | |
| 162 | that end in an atom: | |
| 163 | ||
| 164 | @ | |
| 165 | SCons a (SCons b (SAtom c)) \<=\> RSDotted [a, b] c | |
| 166 | @ | |
| 167 | ||
| 168 | On the other hand, the 'WellFormedSExpr' type elects | |
| 169 | not to handle this case. This is unusual for Lisp source code, | |
| 170 | but is a reasonable choice for configuration or data | |
| 171 | storage formats that use s-expressions, where | |
| 172 | non-well-formed lists would be an unnecessary | |
| 173 | complication. | |
| 174 | ||
| 175 | To make working with these types less verbose, there are other | |
| 176 | modules that export pattern aliases and helper functions: these | |
| 177 | can be found at "Data.SCargot.Repr.Basic", | |
| 178 | "Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed". | |
| 179 | -} | |
| 1 | 1 | module Data.SCargot |
| 2 | ( module Data.SCargot.General | |
| 3 | ) where | |
| 2 | ( -- * SCargot Basics | |
| 4 | 3 | |
| 5 | import Data.SCargot.General | |
| 4 | -- $intro | |
| 5 | ||
| 6 | -- * Parsing and Printing | |
| 7 | decode | |
| 8 | , decodeOne | |
| 9 | , encode | |
| 10 | , encodeOne | |
| 11 | -- * Parser Construction | |
| 12 | -- ** Specifying a Parser | |
| 13 | , SExprParser | |
| 14 | , Reader | |
| 15 | , Comment | |
| 16 | , mkParser | |
| 17 | , setCarrier | |
| 18 | , addReader | |
| 19 | , setComment | |
| 20 | , asRich | |
| 21 | , asWellFormed | |
| 22 | , withQuote | |
| 23 | -- * Printer Construction | |
| 24 | -- * Specifying a Pretty-Printer | |
| 25 | , SExprPrinter | |
| 26 | , Indent(..) | |
| 27 | , basicPrint | |
| 28 | , flatPrint | |
| 29 | , setFromCarrier | |
| 30 | , setMaxWidth | |
| 31 | , removeMaxWidth | |
| 32 | , setIndentAmount | |
| 33 | , setIndentStrategy | |
| 34 | ) where | |
| 35 | ||
| 36 | import Data.SCargot.Parse | |
| 37 | import Data.SCargot.Print | |
| 38 | ||
| 39 | {- $intro | |
| 40 | ||
| 41 | The S-Cargot library is a library for parsing and emitting | |
| 42 | <https://en.wikipedia.org/wiki/S-expression s-expressions>, designed | |
| 43 | to be as flexible as possible. Despite some efforts at | |
| 44 | <http://people.csail.mit.edu/rivest/Sexp.txt standardization>, | |
| 45 | s-expressions are a general approach to describing a data format | |
| 46 | that can very often differ in subtle, incompatible ways: the | |
| 47 | s-expressions understood by Common Lisp are different from the | |
| 48 | s-expressions understood by Scheme, and even the different | |
| 49 | revisions of the Scheme language understand s-expressions in a | |
| 50 | slightly different way. To accomodate this, the S-Cargot library | |
| 51 | provides a toolbox for defining variations on s-expressions, | |
| 52 | complete with the ability to select various comment syntaxes, reader | |
| 53 | macros, and atom types. | |
| 54 | ||
| 55 | If all you want is to read some s-expressions and don't care about | |
| 56 | the edge cases of the format, or all you want is a new configuration | |
| 57 | format, try the "Data.SCargot.Language.Basic" or "Data.SCargot.Language.HaskLike" | |
| 58 | modules, which define an s-expression language whose atoms are | |
| 59 | plain strings and Haskell literals, respectively. | |
| 60 | ||
| 61 | The S-Cargot library works by specifying values which contain all | |
| 62 | the information needed to either parse or print an s-expression. | |
| 63 | The actual s-expression structure is parsed as a structure of | |
| 64 | <https://en.wikipedia.org/wiki/Cons cons cells> as represented | |
| 65 | by the 'SExpr' type, but can alternately be exposed as the | |
| 66 | isomorphic 'RichSExpr' type or the less expressive but | |
| 67 | easier-to-work-with 'WellFormedSExpr' type. Modules devoted | |
| 68 | to each representation type (in "Data.SCargot.Repr.Basic", | |
| 69 | "Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed") | |
| 70 | provide helper functions, lenses, and pattern synonyms to make | |
| 71 | creating and processing these values easier. | |
| 72 | ||
| 73 | The details of how to parse a given structure are represented | |
| 74 | by building up a 'SExprParser' value, which is defined in | |
| 75 | "Data.SCargot.Parse" and re-exported here. A minimal | |
| 76 | 'SExprParser' defines only how to parse the atoms of the | |
| 77 | language; helper functions can define comment syntaxes, | |
| 78 | reader macros, and transformations over the parsed structure. | |
| 79 | ||
| 80 | The details of how to print a given structure are represented | |
| 81 | by building up a 'SExprPrinter' value, which is defined in | |
| 82 | "Data.SCargot.Print" and re-exported here. A minimal | |
| 83 | 'SExprPrinter' defines only how to print the atoms of the | |
| 84 | language; helper functions help with the layout of the | |
| 85 | pretty-printed s-expression in terms of how to indent the | |
| 86 | surrounding expression. | |
| 87 | ||
| 88 | Other helper modules define useful primitives for building up | |
| 89 | s-expression languages: the "Data.SCargot.Common" module provides | |
| 90 | parsers for common literals, while the "Data.SCargot.Comments" | |
| 91 | module provides parsers for comment syntaxes borrowed from | |
| 92 | various other languages. | |
| 93 | ||
| 94 | -} |
| 1 | 1 | name: s-cargot |
| 2 | 2 | version: 0.1.0.0 |
| 3 | 3 | synopsis: A flexible, extensible s-expression library. |
| 4 | homepage: https://github.com/aisamanra/s-cargot | |
| 4 | 5 | description: S-Cargot is a library for working with s-expressions in |
| 5 | 6 | a modular and extensible way, opting for genericity and |
| 6 | 7 | flexibility instead of speed. Instead of understanding |
| 18 | 19 | build-type: Simple |
| 19 | 20 | cabal-version: >=1.10 |
| 20 | 21 | |
| 22 | source-repository head | |
| 23 | type: git | |
| 24 | location: git://github.com/aisamanra/s-cargot.git | |
| 25 | ||
| 21 | 26 | library |
| 22 | 27 | exposed-modules: Data.SCargot, |
| 23 | 28 | Data.SCargot.Repr, |
| 24 | 29 | Data.SCargot.Repr.Basic, |
| 25 | 30 | Data.SCargot.Repr.Rich, |
| 26 | 31 | Data.SCargot.Repr.WellFormed, |
| 27 | Data.SCargot.General, | |
| 28 | Data.SCargot.Pretty, | |
| 29 |
Data.SCargot. |
|
| 32 | Data.SCargot.Parse, | |
| 33 | Data.SCargot.Print, | |
| 30 | 34 | Data.SCargot.Comments, |
| 31 | 35 | Data.SCargot.Common, |
| 32 |
Data.SCargot. |
|
| 36 | Data.SCargot.Language.Basic, | |
| 37 | Data.SCargot.Language.HaskLike | |
| 33 | 38 | build-depends: base >=4.7 && <5, |
| 34 | 39 | parsec, |
| 35 | 40 | text, |