gdritter repos s-cargot / d01604d
S-Expression parser added and working, although in the middle of restructuring how comments work Getty Ritter 9 years ago
6 changed file(s) with 217 addition(s) and 71 deletion(s). Collapse all Expand all
44 -- macro definitions, this module should successfully parse and
55 -- desugar even quoted lists and vector literals.
66
7 module Data.SCargot.CommonLisp where
7 module Data.SCargot.CommonLisp
8 ( CLAtom(..)
9 , CommonLispSpec
10 , withComments
11 , withQuote
12 , withVectors
13 , decode
14 , encode
15 ) where
816
917 data CLAtom
1018 = CLSymbol Text
1422 | CLFloat Double
1523 deriving (Eq, Show, Read)
1624
17 type CommonLispSpec carrier = SExprSpec CLAtom carrier
25 data CommonLispSpec carrier = CommonLispSpec
26 { sexprSpec :: SExprSpec CLAtom carrier
27 , poundReaders :: ReaderMacroMap CLAtom
28 }
1829
1930 withComments :: CommonLispSpec c -> CommonLispSpec c
2031 withComments = addCommentType (const () <$> (char ';' *> restOfLine))
2839 withVectors :: CommonLispSpec c -> CommonLispSpec c
2940 withVectors = addReader '#' (go <$> parse)
3041
31 parse :: CommonLispSpec c -> Text -> Either String c
32 serialize :: CommonLispSpec c -> c -> Text
42 decode :: CommonLispSpec c -> Text -> Either String c
43 encode :: CommonLispSpec c -> c -> Text
1 {-# LANGUAGE RecordWildCards #-}
2 {-# LANGUAGE ViewPatterns #-}
3 {-# LANGUAGE OverloadedStrings #-}
4
15 module Data.SCargot.General
26 ( -- * SExprSpec
37 SExprSpec
48 , mkSpec
59 , convertSpec
610 , addReader
7 , addCommentType
11 , addComment
12 -- * Specific SExprSpec Conversions
813 , asRich
914 , asWellFormed
10 -- * A Few Standard Reader Macros
11 , quote
12 , vector
15 , withSemicolonComments
16 , withQuote
1317 -- * Using a SExprSpec
14 , parseSExpr
15 , serializeSExpr
18 , decode
19 , decodeOne
20 , encode
21 -- * Useful Type Aliases
22 , Reader
23 , Comment
24 , Serializer
1625 ) where
1726
18 import Control.Applicative
27 import Control.Applicative ((<*))
28 import Control.Monad ((>=>))
1929 import Data.Attoparsec.Text
20 import Data.Map.String (Map)
21 import qualified Data.Map.String as M
30 import Data.Char (isAlpha)
31 import Data.Map.Strict (Map)
32 import qualified Data.Map.Strict as M
33 import Data.Text (Text)
34
35 import Prelude hiding (takeWhile)
2236
2337 import Data.SCargot.Repr
2438
2539 type ReaderMacroMap atom = Map Char (Reader atom)
26 type CommentMap = Map Char (Parser ())
40 type CommentMap = Map Char Comment
41
42 -- | A 'Reader' represents a reader macro: it takes a parser for
43 -- the S-Expression type and performs as much or as little
44 -- parsing as it would like, and then returns an S-expression.
2745 type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
46
47 -- | A 'Comment' represents any kind of skippable comment.
48 type Comment = Parser ()
49
50 -- | A 'Serializer' is any function which can serialize an Atom
51 -- to 'Text'.
2852 type Serializer atom = atom -> Text
2953
3054 -- | A 'SExprSpec' describes a parser and emitter for a particular
3761 { sesPAtom :: Parser atom
3862 , sesSAtom :: Serializer atom
3963 , readerMap :: ReaderMacroMap atom
40 , commentMap :: CommentMap
64 , comment :: Comment
4165 , postparse :: SExpr atom -> Either String carrier
4266 , preserial :: carrier -> SExpr atom
4367 }
4468
45 -- | This creates a basic 'SExprSpec' when given a parser and serializer
69 -- | Create a basic 'SExprSpec' when given a parser and serializer
4670 -- for an atom type.
4771 mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
4872 mkSpec p s = SExprSpec
49 { sesPAtom = p
50 , sesSAtom = s
51 , rmMap = M.empty
52 , postparse = return
53 , preserial = id
73 { sesPAtom = p
74 , sesSAtom = s
75 , readerMap = M.empty
76 , commentMap = skipSpace
77 , postparse = return
78 , preserial = id
5479 }
5580
56 -- | This is used to modify the carrier type for a 'SExprSpec'. This is
81 -- | Modify the carrier type for a 'SExprSpec'. This is
5782 -- used internally to convert between various 'SExpr' representations,
5883 -- but could also be used externally to add an extra conversion layer
5984 -- onto a 'SExprSpec', e.g. for a custom Lisp-like language:
6489 convertSpec :: (b -> Either String c) -> (c -> b) -> SExprSpec a b -> SExprSpec a c
6590 convertSpec f g spec = spec
6691 { postparse = postparse spec >=> f
67 , preserial = g . preserial spec
92 , preserial = preserial spec . g
6893 }
6994
70 addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
71 addReader c reader spec = spec { rmMap = insert c reader (rmMap spec) }
72
73 addCommentType :: Char -> Comment -> SExprSpec a c -> SExprSpec a c
74 addCommentType c comment spec = spec { }
75
76 quote :: atom -> Reader atom
77 quote q parse = go <$> parse
78 where go v = SCons q (SCons v SNil)
79
95 -- | Convert the final output representation from the 'SExpr' type
96 -- to the 'RichSExpr' type.
8097 asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b)
8198 asRich = convertSpec (return . toRich) fromRich
8299
100 -- | Convert the final output representation from the 'SExpr' type
101 -- to the 'WellFormedSExpr' type.
83102 asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b)
84103 asWellFormed = convertSpec toWellFormed fromWellFormed
85104
86 parseGenericSExpr :: Parser atom -> ReaderMacroMap atom -> CommentMap -> Parser (SExpr atom)
87 parseGenericSExpr atom reader comment =
88 char '(' *>
89
90 -- |
91 parseSExpr :: SExprSpec atom carrier -> Text -> Either String carrier
92 parseSExpr spec = undefined
93
94 -- | blah
95 serializeSExpr :: SExprSpec atom carrier -> carrier -> Text
96 serializeSExpr spec = serializeGenericSExpr ses . preserial
105 -- | Add the ability to execute some particular reader macro, as
106 -- defined by its initial character and the 'Parser' which returns
107 -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
108 -- can be recursively called to parse more S-Expressions, and begins
109 -- parsing after the reader character has been removed from the
110 -- stream.
111 addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
112 addReader c reader spec = spec
113 { readerMap = M.insert c reader (readerMap spec) }
114
115 -- | Add the ability to ignore some kind of comment. If the comment
116 -- parser overlaps with a reader macro or the atom parser, then the
117 -- former will be tried first.
118 setComment :: Comment -> SExprSpec a c -> SExprSpec a c
119 setComment c spec = spec { comment = c }
120
121 -- | Add the ability to skip line comments beginning with a semicolon.
122 withSemicolonComments :: SExprSpec a c -> SExprSpec a c
123 withSemicolonComments = addComment ';' (skipWhile (\ c -> c /= '\n'))
124
125 -- | Add the ability to understand a quoted S-Expression. In general,
126 -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
127 -- a convenience function which allows you to easily add quoted
128 -- expressions to a 'SExprSpec', provided that you supply which
129 -- atom you want substituted in for the symbol @quote@.
130 withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a)
131 withQuote q = addReader '\'' prs
132 where prs p = go `fmap` p
133 go s = SCons (SAtom q) (SCons s SNil)
134
135 parseGenericSExpr ::
136 Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
137 parseGenericSExpr atom reader skip = do
138 let sExpr = parseGenericSExpr atom reader skip
139 skip
140 c <- peekChar
141 r <- case c of
142 Nothing -> fail "Unexpected end of input"
143 Just '(' -> char '(' >> skip >> parseList sExpr skip
144 Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
145 _ -> SAtom `fmap` atom
146 skip
147 return r
148
149 parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
150 parseList sExpr skip = do
151 i <- peekChar
152 case i of
153 Nothing -> fail "Unexpected end of input"
154 Just ')' -> char ')' >> return SNil
155 _ -> do
156 car <- sExpr
157 skip
158 c <- peekChar
159 case c of
160 Just '.' -> do
161 char '.'
162 cdr <- sExpr
163 skip
164 char ')'
165 skip
166 return (SCons car cdr)
167 Just ')' -> do
168 char ')'
169 skip
170 return (SCons car SNil)
171 _ -> do
172 cdr <- parseList sExpr skip
173 return (SCons car cdr)
174
175 -- | Given a CommentMap, create the corresponding parser to
176 -- skip those comments (if they exist).
177 buildSkip :: CommentMap -> Parser ()
178 buildSkip m = skipSpace >> comments >> skipSpace
179 where comments = do
180 c <- peekChar
181 case c of
182 Nothing -> return ()
183 Just c' -> case M.lookup c' m of
184 Just p -> anyChar >> p
185 Nothing -> return ()
186
187 (#) :: a -> (a -> b) -> b
188 (#) = flip ($)
189
190 testSpec :: SExprSpec Text (SExpr Text)
191 testSpec = mkSpec (takeWhile1 isAlpha) id
192 # withQuote "quote"
193 # addReader '#' (\ p -> SCons (SAtom "vector") `fmap` p)
194
195 -- | Decode a single S-expression. If any trailing input is left after
196 -- the S-expression (ignoring comments or whitespace) then this
197 -- will fail: for those cases, use 'decode', which returns a list of
198 -- all the S-expressions found at the top level.
199 decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
200 decodeOne SExprSpec { .. } = parseOnly (parser <* endOfInput) >=> postparse
201 where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
202
203 -- | Decode several S-expressions according to a given 'SExprSpec'. This
204 -- will return a list of every S-expression that appears at the top-level
205 -- of the document.
206 decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
207 decode SExprSpec { .. } =
208 parseOnly (many1 parser <* endOfInput) >=> mapM postparse
209 where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
210
211 -- | Emit an S-Expression in a machine-readable way. This
212 encode :: SExprSpec atom carrier -> carrier -> Text
213 encode SExprSpec { .. } = undefined
1313
1414 import Data.SCargot.Repr as R
1515
16 pattern List xs = R.RSList xs
17 pattern DotList xs = R.RSDotted xs
18 pattern Atom a = R.RSAtom a
16 pattern Atom a = R.RSAtom a
17 pattern List xs = R.RSList xs
18 pattern DotList xs x = R.RSDotted xs x
11 {-# LANGUAGE PatternSynonyms #-}
22
3 module Data.SCargot.Repr.Rich
3 module Data.SCargot.Repr.WellFormed
44 ( -- * 'WellFormedSExpr' representation
55 R.WellFormedSExpr(..)
66 , R.toWellFormed
11 module Data.SCargot.Repr
2 ( SExpr(..)
2 ( -- * Elementary SExpr representation
3 SExpr(..)
4 -- * Rich SExpr representation
35 , RichSExpr(..)
46 , toRich
57 , fromRich
8 -- * Well-Formed SExpr representation
69 , WellFormedSExpr(..)
710 , toWellFormed
811 , fromWellFormed
2326 -- exposed. In this case, we have 'RSList' to
2427 -- represent a well-formed cons list, and 'RSDotted'
2528 -- to represent an improper list of the form
26 -- @(a b c . d)@.
29 -- @(a b c . d)@. This representation is based on
30 -- the shape of the parsed S-Expression, and not on
31 -- how it was represented, so @(a . (b))@ is going to
32 -- be represented as @RSList[RSAtom a, RSAtom b]@
33 -- despite having been originally represented as a
34 -- dotted list.
2735 data RichSExpr atom
2836 = RSList [RichSExpr atom]
2937 | RSDotted [RichSExpr atom] atom
3038 | RSAtom atom
3139 deriving (Eq, Show, Read)
3240
33 -- | A Rich S-Expression might be a nicer interface
34 -- for certain libraries. It should always be true
35 -- that
41 -- | It should always be true that
3642 --
37 -- > fromRich . toRich == id
43 -- > fromRich (toRich x) == x
3844 --
3945 -- and that
4046 --
41 -- > toRich . fromRich == id
47 -- > toRich (fromRich x) == x
4248 toRich :: SExpr atom -> RichSExpr atom
4349 toRich (SAtom a) = RSAtom a
44 toRich (SCons x xs) = go xs [toRich x]
45 where go (SAtom a) rs = RSDotted rs a
46 go SNil rs = RSList rs
47 go (SCons x xs) rs = go xs (toRich x:rs)
50 toRich (SCons x xs) = go xs (toRich x:)
51 where go (SAtom a) rs = RSDotted (rs []) a
52 go SNil rs = RSList (rs [])
53 go (SCons x xs) rs = go xs (rs . (toRich x:))
4854
4955 -- | This follows the same laws as 'toRich'.
5056 fromRich :: RichSExpr atom -> SExpr atom
6268 | WFSAtom atom
6369 deriving (Eq, Show, Read)
6470
65 -- | This will be @Nothing@ is the argument contains an
71 -- | This will be @Nothing@ if the argument contains an
6672 -- improper list. It should hold that
6773 --
68 -- > toWellFormed . fromWellFormed == Right
74 -- > toWellFormed (fromWellFormed x) == Right x
75 --
76 -- and also (more tediously) that
77 --
78 -- > case fromWellFormed (toWellFormed x) of
79 -- > Left _ -> True
80 -- > Right y -> x == y
6981 toWellFormed :: SExpr atom -> Either String (WellFormedSExpr atom)
82 toWellFormed SNil = return (WFSList [])
7083 toWellFormed (SAtom a) = return (WFSAtom a)
7184 toWellFormed (SCons x xs) = do
7285 x' <- toWellFormed x
73 go xs [x']
86 go xs (x':)
7487 where go (SAtom a) rs = Left "Found atom in cdr position"
75 go SNil rs = return (WFSList rs)
88 go SNil rs = return (WFSList (rs []))
7689 go (SCons x xs) rs = do
7790 x' <- toWellFormed x
78 go xs (x':rs)
91 go xs (rs . (x':))
7992
8093 -- | Convert a WellFormedSExpr back into a SExpr.
8194 fromWellFormed :: WellFormedSExpr atom -> SExpr atom
8295 fromWellFormed (WFSAtom a) = SAtom a
8396 fromWellFormed (WFSList xs) =
84 foldr SCons SNil (map fromWellFormed xs)
97 foldl SCons SNil (map fromWellFormed xs)
1313 cabal-version: >=1.10
1414
1515 library
16 -- exposed-modules:
17 -- other-modules:
18 -- other-extensions:
19 build-depends: base >=4.7 && <4.8
20 -- hs-source-dirs:
21 default-language: Haskell2012
16 exposed-modules: Data.SCargot.Repr,
17 Data.SCargot.Repr.Basic,
18 Data.SCargot.Repr.Rich,
19 Data.SCargot.Repr.WellFormed,
20 Data.SCargot.General,
21 Data.SCargot.Tutorial
22 -- other-modules:
23 -- other-extensions:
24 build-depends: base >=4.7 && <4.8, attoparsec, text, containers
25 -- hs-source-dirs:
26 default-language: Haskell2010