Commit d1155974e20f2dc67045c96f0c51b9b5c1bc8362 - s-cargot

Fixed comment representation and began to add more documentation + examples Getty Ritter 10 years ago

4 changed file(s) with 84 addition(s) and 54 deletion(s). Collapse all Expand all

+72

-42

Data/SCargot/General.hs less more

8	8	, mkSpec
9	9	, convertSpec
10	10	, addReader
11		, ~~add~~Comment
	11	, setComment
12	12	-- * Specific SExprSpec Conversions
13	13	, asRich
14	14	, asWellFormed

24	24	, Serializer
25	25	) where
26	26
27		import Control.Applicative ((<*))
	27	import Control.Applicative ((<), (>), (<\|>), (<*>), (<$>), pure)
28	28	import Control.Monad ((>=>))
29	29	import Data.Attoparsec.Text
30		import Data.Char (isAlpha)
	30	import Data.Char (isAlpha, isDigit, isAlphaNum)
31	31	import Data.Map.Strict (Map)
32	32	import qualified Data.Map.Strict as M
33		import Data.Text (Text)
	33	import Data.Text (Text, pack, unpack)
34	34
35	35	import Prelude hiding (takeWhile)
36	36
37	37	import Data.SCargot.Repr
38	38
39	39	type ReaderMacroMap atom = Map Char (Reader atom)
40		type CommentMap = Map Char Comment
41	40
42	41	-- \| A 'Reader' represents a reader macro: it takes a parser for
43	42	-- the S-Expression type and performs as much or as little
44	43	-- parsing as it would like, and then returns an S-expression.
45	44	type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
46	45
47		-- \| A 'Comment' represents any kind of skippable comment.
	46	-- \| A 'Comment' represents any kind of skippable comment. This
	47	-- parser __must__ be able to fail if a comment is not being
	48	-- recognized, and it __must__ not consume any input.
48	49	type Comment = Parser ()
49	50
50	51	-- \| A 'Serializer' is any function which can serialize an Atom

61	62	{ sesPAtom :: Parser atom
62	63	, sesSAtom :: Serializer atom
63	64	, readerMap :: ReaderMacroMap atom
64		, comment :: Comment
	65	, comment :: Maybe Comment
65	66	, postparse :: SExpr atom -> Either String carrier
66	67	, preserial :: carrier -> SExpr atom
67	68	}
68	69
69	70	-- \| Create a basic 'SExprSpec' when given a parser and serializer
70		-- for an atom type.
	71	-- for an atom type. A small minimal 'SExprSpec' that recognizes
	72	-- any alphanumeric sequence as a valid atom looks like:
	73	--
	74	-- > simpleSpec :: SExprSpec Text (SExpr Text)
	75	-- > simpleSpec = mkSpec (takeWhile1 isAlphaNum) id
71	76	mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
72	77	mkSpec p s = SExprSpec
73	78	{ sesPAtom = p
74	79	, sesSAtom = s
75	80	, readerMap = M.empty
76		, comment~~Map = skipSpace~~
	81	, comment = Nothing
77	82	, postparse = return
78	83	, preserial = id
79	84	}

81	86	-- \| Modify the carrier type for a 'SExprSpec'. This is
82	87	-- used internally to convert between various 'SExpr' representations,
83	88	-- but could also be used externally to add an extra conversion layer
84		-- onto a 'SExprSpec', e.g. for a custom Lisp-like language:
85		--
86		-- > mySpec :: SExprSpec MyAtomType MyAST
87		-- > mySpec = convertSpec sexprToMyAST myASTToSexpr spec
88		-- > where spec = mkSpec myParser mySerializer
89		convertSpec :: (b -> Either String c) -> (c -> b) -> SExprSpec a b -> SExprSpec a c
	89	-- onto a 'SExprSpec'.
	90	--
	91	-- The following defines an S-expression spec that recognizes the
	92	-- language of binary addition trees. It does so by first transforming
	93	-- the internal S-expression representation using 'asWellFormed', and
	94	-- then providing a conversion between the 'WellFormedSExpr' type and
	95	-- an @Expr@ AST. Notice that the below parser uses 'String' as its
	96	-- underlying atom type.
	97	--
	98	-- > data Expr = Add Expr Expr \| Num Int deriving (Eq, Show)
	99	-- >
	100	-- > toExpr :: WellFormedSExpr String -> Either String Expr
	101	-- > toExpr (WFSList [WFSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r
	102	-- > toExpr (WFSAtom c) \| all isDigit c = pure (Num (read c))
	103	-- > toExpr c = Left ("Invalid expr: " ++ show c)
	104	-- >
	105	-- > fromExpr :: Expr -> WellFormedSExpr String
	106	-- > fromExpr (Add l r) = WFSList [WFSAtom "+", fromExpr l, fromExpr r]
	107	-- > fromExpr (Num n) = WFSAtom (show n)
	108	-- >
	109	-- > mySpec :: SExprSpec String Expr
	110	-- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack
	111	-- > where parser = unpack <$> takeWhile1 isValidChar
	112	-- > isValidChar c = isDigit c \|\| c == '+'
	113	convertSpec :: (b -> Either String c) -> (c -> b)
	114	-> SExprSpec a b -> SExprSpec a c
90	115	convertSpec f g spec = spec
91	116	{ postparse = postparse spec >=> f
92	117	, preserial = preserial spec . g

108	133	-- can be recursively called to parse more S-Expressions, and begins
109	134	-- parsing after the reader character has been removed from the
110	135	-- stream.
	136	--
	137	-- The following defines an S-expression variant that treats
	138	-- @'expr@ as being sugar for @(quote expr)@:
	139	--
	140	-- > mySpec :: SExprSpec Text (SExpr Text)
	141	-- > mySpec = addReader '\'' reader $ mkSpec (takeWhile1 isAlphaNum) id
	142	-- > where reader p = quote <$> p
	143	-- > quote e = SCons (SAtom "quote") (SCons e SNil)
111	144	addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
112	145	addReader c reader spec = spec
113	146	{ readerMap = M.insert c reader (readerMap spec) }
114	147
115		-- \| Add the ability to ignore some kind of comment. If the comment
116		-- parser overlaps with a reader macro or the atom parser, then the
117		-- ~~former will be tried first.~~
	148	-- \| Add the ability to ignore some kind of comment. This gets
	149	-- factored into whitespace parsing, and it's very important that
	150	-- the parser supplied __be able to fail__ (as otherwise it will
	151	-- cause an infinite loop), and also that it __not consume any input__
	152	-- (which may require it to be wrapped in 'try'.)
	153	--
	154	-- The following code defines an S-expression variant that skips
	155	-- C++-style comments, i.e. those which begin with @//@ and last
	156	-- until the end of a line:
	157	--
	158	-- > t :: SExprSpec Text (SExpr Text)
	159	-- > t = setComment comm $ mkSpec (takeWhile1 isAlphaNum) id
	160	-- > where comm = try (string "//" > takeWhile (/= '\n') > pure ())
	161
118	162	setComment :: Comment -> SExprSpec a c -> SExprSpec a c
119		setComment c spec = spec { comment = c }
	163	setComment c spec = spec { comment = Just c }
120	164
121	165	-- \| Add the ability to skip line comments beginning with a semicolon.
122	166	withSemicolonComments :: SExprSpec a c -> SExprSpec a c
123		withSemicolonComments = ~~addComment ';' (skipWhile (\ c -> c /= '\n'~~))
	167	withSemicolonComments = setComment (char ';' >> takeWhile (/='\n') >> return ())
124	168
125	169	-- \| Add the ability to understand a quoted S-Expression. In general,
126	170	-- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is

128	172	-- expressions to a 'SExprSpec', provided that you supply which
129	173	-- atom you want substituted in for the symbol @quote@.
130	174	withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a)
131		withQuote q = addReader '\'' prs
132		where prs p = go `fmap` p
133		~~go s = SCons (SAtom q) (SCons s SNil~~)
	175	withQuote q = addReader '\'' (fmap go)
	176	where go s = SCons (SAtom q) (SCons s SNil)
134	177
135	178	parseGenericSExpr ::
136	179	Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)

174	217
175	218	-- \| Given a CommentMap, create the corresponding parser to
176	219	-- skip those comments (if they exist).
177		buildSkip :: CommentMap -> Parser ()
178		buildSkip m = skipSpace >> comments >> skipSpace
179		where comments = do
180		c <- peekChar
181		case c of
182		Nothing -> return ()
183		Just c' -> case M.lookup c' m of
184		Just p -> anyChar >> p
185		Nothing -> return ()
186
187		(#) :: a -> (a -> b) -> b
188		(#) = flip ($)
189
190		testSpec :: SExprSpec Text (SExpr Text)
191		testSpec = mkSpec (takeWhile1 isAlpha) id
192		# withQuote "quote"
193		~~# addReader '#' (\ p -> SCons (SAtom "vector") `fmap` p~~)
	220	buildSkip :: Maybe (Parser ()) -> Parser ()
	221	buildSkip Nothing = skipSpace
	222	buildSkip (Just c) = alternate
	223	where alternate = skipSpace >> ((c >> alternate) <\|> return ())
194	224
195	225	-- \| Decode a single S-expression. If any trailing input is left after
196	226	-- the S-expression (ignoring comments or whitespace) then this

198	228	-- all the S-expressions found at the top level.
199	229	decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
200	230	decodeOne SExprSpec { .. } = parseOnly (parser <* endOfInput) >=> postparse
201		where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment~~Map~~)
	231	where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment)
202	232
203	233	-- \| Decode several S-expressions according to a given 'SExprSpec'. This
204	234	-- will return a list of every S-expression that appears at the top-level

206	236	decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
207	237	decode SExprSpec { .. } =
208	238	parseOnly (many1 parser <* endOfInput) >=> mapM postparse
209		where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment~~Map~~)
	239	where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment)
210	240
211	241	-- \| Emit an S-Expression in a machine-readable way. This
212	242	encode :: SExprSpec atom carrier -> carrier -> Text

-3

Data/SCargot/Repr.hs less more

	1	{-# LANGUAGE DeriveFunctor #-}
	2
1	3	module Data.SCargot.Repr
2	4	( -- * Elementary SExpr representation
3	5	SExpr(..)

19	21	= SCons (SExpr atom) (SExpr atom)
20	22	\| SAtom atom
21	23	\| SNil
22		deriving (Eq, Show, Read)
	24	deriving (Eq, Show, Read, Functor)
23	25
24	26	-- \| Sometimes, the cons-based interface is too low
25	27	-- level, and we'd rather have the lists themselves

36	38	= RSList [RichSExpr atom]
37	39	\| RSDotted [RichSExpr atom] atom
38	40	\| RSAtom atom
39		deriving (Eq, Show, Read)
	41	deriving (Eq, Show, Read, Functor)
40	42
41	43	-- \| It should always be true that
42	44	--

66	68	data WellFormedSExpr atom
67	69	= WFSList [WellFormedSExpr atom]
68	70	\| WFSAtom atom
69		deriving (Eq, Show, Read)
	71	deriving (Eq, Show, Read, Functor)
70	72
71	73	-- \| This will be @Nothing@ if the argument contains an
72	74	-- improper list. It should hold that

-2

Data/SCargot/Rivest.hs less more

5	5	import qualified Data.ByteString.Base64 as B64
6	6	import Data.Text (Text)
7	7	import qualified Data.Text as T
8
9		newtype Atom = Atom { fromAtom :: ByteString } deriving (Eq, Show, Read)
10	8
11	9	pToken :: Parser ByteString
12	10	pToken = undefined

-7

Data/SCargot/Tutorial.hs less more

9	9	module Data.SCargot.Tutorial
10	10	( -- * Basic Usage and Organization
11	11	-- $usage
	12	-- * Building a Custom Config Format
	13	-- $config
12	14	-- * Analyzing Scheme code
13	15	-- $scheme
14		-- * Building a Custom Config Format
15		-- $config
16	16	-- * Building a Custom Lisp
17	17	-- $lisp
18	18	) where
19	19
20	20	{- $usage
	21	When people talk about s-expressions, they're really talking about
	22	a _family_ of formats that have in common a rough structure and
	23	the fact that -}
	24
	25	{- $config
21	26
22	27	-}
23	28

25	30
26	31	-}
27	32
28
29		{- $config
30
31		-}
32
33	33	{- $lisp
34	34
35	35	-}