Commit 69205b487f21ac195148dd33b11458f944446bec - s-cargot

+13

-15

Data/SCargot/Basic.hs less more

1	1	{-# LANGUAGE OverloadedStrings #-}
2	2
3	3	module Data.SCargot.Basic
4		( basicSpec
5		, asRich
6		, asWellFormed
7		, addReader
8		, setComment
9		, withLispComments
10		~~, withQuote~~
	4	( -- * Spec
	5	-- $descr
	6	basicSpec
11	7	) where
12	8
13	9	import Control.Applicative ((<$>))

18	14	import Data.SCargot.Repr.Basic (SExpr)
19	15	import Data.SCargot.General ( SExprSpec
20	16	, mkSpec
21		, asRich
22		, asWellFormed
23		, addReader
24		, setComment
25		, withQuote
26	17	)
27	18	import Data.SCargot.Comments (withLispComments)
28	19

32	23	\|\| c == '+' \|\| c == '<' \|\| c == '>'
33	24	\|\| c == '=' \|\| c == '!' \|\| c == '?'
34	25
	26	-- $descr
	27	-- The 'basicSpec' describes S-expressions whose atoms are simply
	28	-- text strings that contain alphanumeric characters and a small
	29	-- set of punctuation. It does no parsing of numbers or other data
	30	-- types, and will accept tokens that typical Lisp implementations
	31	-- would find nonsensical (like @77foo@).
	32	--
	33	-- Atoms recognized by the 'basicSpec' are any string matching the
	34	-- regular expression @[A-Za-z0-9+*<>/=!?-]+@.
	35
35	36	-- \| A 'SExprSpec' that understands atoms to be sequences of
36	37	-- alphanumeric characters as well as the punctuation
37	38	-- characters @[-*/+<>=!?]@, and does no processing of them.
38		-- This is not quite representative of actual lisps, which
39		-- would, for example, accept various kinds of string
40		-- and numeric literals.
41	39	basicSpec :: SExprSpec Text (SExpr Text)
42	40	basicSpec = mkSpec pToken id
43	41	where pToken = pack <$> many1 (satisfy isAtomChar)

+12

-5

Data/SCargot/Comments.hs less more

1	1	{-# LANGUAGE OverloadedStrings #-}
2	2
3	3	module Data.SCargot.Comments
4		( -- * Comment Syntax
5		-- $intro
6		~~-- * Lisp Comments~~
	4	( -- $intro
	5
	6	-- * Lisp-Style Syntax
	7	-- $lisp
7	8	withLispComments
8	9	-- * Other Existing Comment Syntaxes
9	10	-- ** Scripting Language Syntax
10	11	-- $script
11	12	, withOctothorpeComments
12		-- ** C-~~Lik~~e Syntax
	13	-- ** C-Style Syntax
13	14	-- $clike
14	15	, withCLikeLineComments
15	16	, withCLikeBlockComments
16	17	, withCLikeComments
17		-- ** Haskell Syntax
	18	-- ** Haskell-Style Syntax
18	19	-- $haskell
19	20	, withHaskellLineComments
20	21	, withHaskellBlockComments

146	147
147	148	-}
148	149
	150	{- $lisp
	151	> (one ; a comment
	152	> two ; another one
	153	> three)
	154	-}
	155
149	156	{- $script
150	157	> (one # a comment
151	158	> two # another one

+114

-0

Data/SCargot/Common.hs less more

	1	module Data.SCargot.Common ( number
	2	, decNumber
	3	, hexNumber
	4	, octNumber
	5	, sign
	6	-- * Lisp Identifier Syntaxes
	7	, parseR5RSIdent
	8	, parseR6RSIdent
	9	, parseR7RSIdent
	10	) where
	11
	12	import Data.Char
	13	import Data.Text (Text)
	14	import qualified Data.Text as T
	15	import Text.Parsec
	16	import Text.Parsec.Char (satisfy)
	17	import Text.Parsec.Text (Parser)
	18
	19	parseR5RSIdent :: Parser Text
	20	parseR5RSIdent =
	21	T.pack <$> ((:) <$> initial <*> many subsequent <\|> peculiar)
	22	where initial = letter <\|> oneOf "!$%&*/:<=>?^_~"
	23	subsequent = initial <\|> digit <\|> oneOf "+-.@"
	24	peculiar = string "+" <\|> string "-" <\|> string "..."
	25
	26	hasCategory :: Char -> [GeneralCategory] -> Bool
	27	hasCategory c cs = generalCategory c `elem` cs
	28
	29	parseR6RSIdent :: Parser Text
	30	parseR6RSIdent =
	31	T.pack <$> ((:) <$> initial <*> many subsequent <\|> peculiar)
	32	where initial = constituent <\|> oneOf "!$%&*/:<=>?^_~" <\|> inlineHex
	33	constituent = letter
	34	<\|> uniClass (\ c -> isLetter c \|\|
	35	isSymbol c \|\|
	36	hasCategory c
	37	[ NonSpacingMark
	38	, LetterNumber
	39	, OtherNumber
	40	, DashPunctuation
	41	, ConnectorPunctuation
	42	, OtherPunctuation
	43	, PrivateUse
	44	])
	45	inlineHex = (chr . fromIntegral) <$> (string "\\x" > hexNumber < char ';')
	46	subsequent = initial <\|> digit <\|> oneOf "+-.@"
	47	<\|> uniClass (\ c -> hasCategory c
	48	[ DecimalNumber
	49	, SpacingCombiningMark
	50	, EnclosingMark
	51	])
	52	peculiar = string "+" <\|> string "-" <\|> string "..." <\|>
	53	((++) <$> string "->" <*> many subsequent)
	54	uniClass :: (Char -> Bool) -> Parser Char
	55	uniClass sp = satisfy (\ c -> c > '\x7f' && sp c)
	56
	57	parseR7RSIdent :: Parser Text
	58	parseR7RSIdent = T.pack <$>
	59	( (:) <$> initial <*> many subsequent
	60	<\|> char '\|' > many1 symbolElement < char '\|'
	61	<\|> peculiar
	62	)
	63	where initial = letter <\|> specInit
	64	specInit = oneOf "!$%&*/:<=>?^_~"
	65	subsequent = initial <\|> digit <\|> specSubsequent
	66	specSubsequent = expSign <\|> oneOf ".@"
	67	expSign = oneOf "+-"
	68	symbolElement = undefined
	69	peculiar = undefined
	70
	71	-- \| A helper function for defining parsers for arbitrary-base integers.
	72	-- The first argument will be the base, and the second will be the
	73	-- parser for the individual digits.
	74	number :: Integer -> Parser Char -> Parser Integer
	75	number base digits = foldl go 0 <$> many1 digits
	76	where go x d = base * x + toInteger (value d)
	77	value c
	78	\| c == 'a' \|\| c == 'A' = 0xa
	79	\| c == 'b' \|\| c == 'B' = 0xb
	80	\| c == 'c' \|\| c == 'C' = 0xc
	81	\| c == 'd' \|\| c == 'D' = 0xd
	82	\| c == 'e' \|\| c == 'E' = 0xe
	83	\| c == 'f' \|\| c == 'F' = 0xf
	84	\| c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
	85	\| otherwise = error ("Unknown letter in number: " ++ show c)
	86
	87	-- \| A parser for bare binary numbers
	88	binNumber :: Parser Integer
	89	binNumber = number 2 (char '0' <\|> char '1')
	90
	91	-- \| A parser for bare octal numbers
	92	octNumber :: Parser Integer
	93	octNumber = number 8 digit
	94
	95	-- \| A parser for bare decimal numbers
	96	decNumber :: Parser Integer
	97	decNumber = number 10 digit
	98
	99	-- \| A parser for bare hexadecimal numbers
	100	hexNumber :: Parser Integer
	101	hexNumber = number 16 hexDigit
	102
	103	-- \| A parser for numeric signs, represented as a function from numbers
	104	-- to numbers. It will parse '+' as the identity function, '-', as
	105	-- 'negate', or consume no input and return the identity function.
	106	-- This can be combined with other numeric literals to implement
	107	-- signedness:
	108	--
	109	-- > myNum = go <$> sign <*> decNumber
	110	-- > where go s n = s n
	111	sign :: Num a => Parser (a -> a)
	112	sign = (pure id <* char '+')
	113	<\|> (pure negate <* char '-')
	114	<\|> pure id

+8

-1

Data/SCargot/General.hs less more

45	45	import Text.Parsec.Char (anyChar, space)
46	46	import Text.Parsec.Text (Parser)
47	47
48		import Data.SCargot.Repr
	48	import Data.SCargot.Repr ( SExpr(..)
	49	, RichSExpr
	50	, WellFormedSExpr
	51	, fromRich
	52	, toRich
	53	, fromWellFormed
	54	, toWellFormed
	55	)
49	56
50	57	type ReaderMacroMap atom = Map Char (Reader atom)
51	58

+16

-44

Data/SCargot/HaskLike.hs less more

14	14
15	15	import Prelude hiding (concatMap)
16	16
	17	import Data.SCargot.Common
17	18	import Data.SCargot.Repr.Basic (SExpr)
18	19	import Data.SCargot.General (SExprSpec, mkSpec)
19	20

22	23	This module is intended for simple, ad-hoc configuration or data formats
23	24	that might not need their on rich structure but might benefit from a few
24	25	various literal formats. the 'haskLikeSpec' understands identifiers as
25		defined by R6RS as well as string, integer, and floating-point literals
	26	defined by R5RS as well as string, integer, and floating-point literals
26	27	as defined by the Haskell spec, but won't get any Lisp-specific vector
27	28	literals or other structure.
28	29

32	33	-- \| An atom type that understands Haskell-like values as well as
33	34	-- Scheme-like identifiers.
34	35	data HaskLikeAtom
35		= HSIdent Text -- ^ An identifier, parsed according to the R6RS Scheme
	36	= HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme
36	37	-- standard
37	38	\| HSString Text -- ^ A string, parsed according to the syntax for string
38	39	-- literals in the Haskell report

46	47	instance IsString HaskLikeAtom where
47	48	fromString = HSIdent . fromString
48	49
49		pToken :: Parser Text
50		pToken = pack <$> ( (:) <$> initial <*> many subsequent
51		<\|> string "+"
52		<\|> string "-"
53		<\|> string "..."
54		)
55
56		initial :: Parser Char
57		initial = letter <\|> oneOf "!$%&*/:<=>?^_~"
58
59		subsequent :: Parser Char
60		subsequent = initial <\|> digit <\|> oneOf "+-.@"
61
62	50	pString :: Parser Text
63	51	pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <\|> esc))
64	52	where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')

69	57	code = eEsc <\|> eNum <\|> eCtrl <\|> eAscii
70	58	eCtrl = char '^' >> unCtrl <$> upper
71	59	eNum = (toEnum . fromInteger) <$>
72		(decimal <\|> (char 'o' >> number 8 octDigit)
73		<\|> (char 'x' >> number 16 hexDigit))
	60	(decNumber <\|> (char 'o' >> octNumber)
	61	<\|> (char 'x' >> hexNumber))
74	62	eEsc = choice [ char a >> return b \| (a, b) <- escMap ]
75	63	eAscii = choice [ try (string a >> return b)
76	64	\| (a, b) <- asciiMap ]

89	77	"\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
90	78	"\SYN\ETB\CAN\SUB\ESC\DEL")
91	79
92		decimal :: Parser Integer
93		decimal = number 10 digit
94
95		number :: Integer -> Parser Char -> Parser Integer
96		number base digits = foldl go 0 <$> many1 digits
97		where go x d = base * x + toInteger (value d)
98		value c
99		\| c == 'a' \|\| c == 'A' = 0xa
100		\| c == 'b' \|\| c == 'B' = 0xb
101		\| c == 'c' \|\| c == 'C' = 0xc
102		\| c == 'd' \|\| c == 'D' = 0xd
103		\| c == 'e' \|\| c == 'E' = 0xe
104		\| c == 'f' \|\| c == 'F' = 0xf
105		\| c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
106		\| otherwise = error ("Unknown letter in number: " ++ show c)
107
108	80	pFloat :: Parser Double
109	81	pFloat = do
110		n <- dec~~imal~~
	82	n <- decNumber
111	83	withDot n <\|> noDot n
112	84	where withDot n = do
113	85	char '.'
114		m <- dec~~imal~~
	86	m <- decNumber
115	87	e <- option 1.0 exponent
116	88	return ((fromIntegral n + asDec m 0) * e)
117	89	noDot n = do

120	92	exponent = do
121	93	oneOf "eE"
122	94	s <- power
123		x <- dec~~imal~~
	95	x <- decNumber
124	96	return (10 ** s (fromIntegral x))
125	97	asDec 0 k = k
126	98	asDec n k =

132	104	pInt :: Parser Integer
133	105	pInt = do
134	106	s <- power
135		n <- pZeroNum <\|> dec~~imal~~
	107	n <- pZeroNum <\|> decNumber
136	108	return (fromIntegral (s n))
137	109
138	110	pZeroNum :: Parser Integer
139	111	pZeroNum = char '0' >>
140		( (oneOf "xX" >> number 16 hexDigit)
141		<\|> (oneOf "oO" >> number 8 octDigit)
142		~~<\|> decimal~~
	112	( (oneOf "xX" >> hexNumber)
	113	<\|> (oneOf "oO" >> octNumber)
	114	<\|> decNumber
143	115	<\|> return 0
144	116	)
145	117
146	118	pHaskLikeAtom :: Parser HaskLikeAtom
147	119	pHaskLikeAtom
148		= HSFloat <$> (try pFloat <?> "float")
149		<\|> HSInt <$> (try pInt <?> "integer")
150		<\|> HSString <$> (pString <?> "string literal")
151		<\|> HSIdent <$> (pToken <?> "token")
	120	= HSFloat <$> (try pFloat <?> "float")
	121	<\|> HSInt <$> (try pInt <?> "integer")
	122	<\|> HSString <$> (pString <?> "string literal")
	123	<\|> HSIdent <$> (parseR5RSIdent <?> "token")
152	124
153	125	sHaskLikeAtom :: HaskLikeAtom -> Text
154	126	sHaskLikeAtom (HSIdent t) = t

+6

-4

Data/SCargot/Pretty.hs less more

92	92	indent n ts = T.replicate n " " <> ts
93	93
94	94	-- Indents every line n spaces, and adds a newline to the beginning
	95	-- used in swung indents
95	96	indentAll :: Int -> [Text] -> Text
96	97	indentAll n = ("\n" <>) . joinLines . map (indent n)
97	98
98	99	-- Indents every line but the first by some amount
	100	-- used in aligned indents
99	101	indentSubsequent :: Int -> [Text] -> Text
100	102	indentSubsequent _ [] = ""
101	103	indentSubsequent _ [t] = t

107	109	-- i swear i'll do better in the future i promise i have to
108	110	-- for my sake and for everyone's
109	111
110		-- \| Pretty-print a 'Sexpr' according to the options in a
	112	-- \| Pretty-print a 'SExpr' according to the options in a
111	113	-- 'LayoutOptions' value.
112	114	prettyPrintSExpr :: LayoutOptions a -> SExpr a -> Text
113	115	prettyPrintSExpr LayoutOptions { .. } = pHead 0

136	138	indentSubsequent (ind + headWidth + 1)
137	139	(map (pHead (ind + headWidth + 1)) lst)
138	140	body
139		\| length lst == 0 = ""
	141	\| length lst == 0 = ""
140	142	\| Just maxAmt <- maxWidth
141		, (T.length flat + ind) > maxAmt = " " <> indented
142		\| otherwise = " " <> flat
	143	, T.length flat + ind > maxAmt = " " <> indented
	144	\| otherwise = " " <> flat

+85

-15

Data/SCargot/Scheme/R7RS.hs less more

	1	{-# LANGUAGE StandaloneDeriving #-}
	2	{-# LANGUAGE OverloadedStrings #-}
	3	{-# LANGUAGE FlexibleContexts #-}
	4	{-# LANGUAGE UndecidableInstances #-}
	5
1	6	module Data.SCargot.Scheme.R7RS where
	7
	8	import Data.Char (chr, isAlphaNum)
	9	import Data.Text (Text)
	10	import qualified Data.Text as T
	11	import Data.String (IsString(..))
	12	import Data.SCargot.Common
	13	import Data.SCargot.General
	14	import Data.SCargot.Repr.Basic
	15	import Data.Word (Word8)
	16	import Text.Parsec
	17	import Text.Parsec.Text (Parser)
	18
	19	instance IsString (SchemeAtom c) where
	20	fromString = ScmIdent . fromString
2	21
3	22	-- \| A Scheme value type. This is strictly larger than just
4	23	-- 'atoms', as they may include things like vectors or

16	35	\| ScmByteVec [Word8]
17	36	\| ScmLabeledDatum Int (carrier (SchemeAtom carrier))
18	37	\| ScmLabelReference Int
	38
	39	-- \| Scheme has a lot of numbers.
	40	data SchemeNumber
	41	= ScmNumber
	42	\| ScmComplexNumber Double Double
	43	\| ScmRealNumber Double
	44	\| ScmRationalNumber Rational
	45	\| ScmInteger Integer
19	46	deriving (Eq, Show)
20	47
21		~~withQuasiQuote :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom Sexpr)~~)
	48	deriving instance Show (c (SchemeAtom c)) => Show (SchemeAtom c)
	49	deriving instance Eq (c (SchemeAtom c)) => Eq (SchemeAtom c)
	50
	51	badSpec :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
	52	badSpec = mkSpec (ScmIdent . T.pack <$> many1 (satisfy isAlphaNum)) undefined
	53
	54	withQuasiQuote :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
22	55	-> SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
23	56	withQuasiQuote spec = addReader '`' (fmap (go "quasiquote"))
24	57	$ addReader ',' unquote
	58	$ addReader '\'' (fmap (go "quote"))
25	59	$ spec
26	60	where go name s = name ::: s ::: Nil
27		unquote p = char '@' *> fmap (go "unquote-splicing")
28		<\|> fmap (go "unquote")
	61	unquote p = char '@' *> fmap (go "unquote-splicing") p
	62	<\|> fmap (go "unquote") p
29	63
30		octoReader :: Reader (S~~Expr (SchemeAtom SExpr)~~)
	64	octoReader :: Reader (SchemeAtom SExpr)
31	65	octoReader pSexpr =
32		string "true" *> pure (ScmBool True)
33		<\|> string "false" *> pure (ScmBool False)
34		<\|> char 't' *> pure (ScmBool True)
35		<\|> char 'f' *> pure (ScmBool False)
36		<\|> char '\\' *> characterConstant
37		<\|> char '(' *> fmap ScmVec (vector pSexpr)
38		~~<\|> string "u8(" *> bytevec~~
	66	string "true" *> pure (A (ScmBool True))
	67	<\|> string "false" *> pure (A (ScmBool False))
	68	<\|> char 't' *> pure (A (ScmBool True))
	69	<\|> char 'f' *> pure (A (ScmBool False))
	70	<\|> char '\\' *> fmap (A . ScmChar) characterConstant
	71	<\|> char '(' *> fmap (A . ScmVec) (vector pSexpr)
	72	<\|> string "u8(" *> fmap A bytevec
39	73	<\|> do n <- read <$> many1 digit
40		(char '#' *> ScmLabelReference n <\|>
41		char '=' *> fmap (ScmLabeledDatum n) pSexpr)
	74	(char '#' *> pure (A (ScmLabelReference n)) <\|>
	75	char '=' *> fmap (A . ScmLabeledDatum n) pSexpr)
42	76
43	77	vector :: Parser (SExpr (SchemeAtom SExpr)) -> Parser [SExpr (SchemeAtom SExpr)]
44		vector pSexpr =
45		(char ')' > pure []) <\|> ((:) <$> pSExpr <> vector pSexpr)
	78	vector pSExpr =
	79	(char ')' > pure []) <\|> ((:) <$> pSExpr <> vector pSExpr)
	80
	81	bytevec :: Parser (SchemeAtom SExpr)
	82	bytevec = undefined
	83
	84	characterConstant :: Parser Char
	85	characterConstant = namedCharacter
	86	<\|> (chr . fromInteger <$> (char 'x' *> hexNumber))
	87	<\|> anyCharacter
	88	where namedCharacter = string "alarm" *> pure '\x07'
	89	<\|> string "backspace" *> pure '\x08'
	90	<\|> string "delete" *> pure '\x7f'
	91	<\|> string "escape" *> pure '\x1b'
	92	<\|> string "newline" *> pure '\x0a'
	93	<\|> string "null" *> pure '\x00'
	94	<\|> string "return" *> pure '\x0d'
	95	<\|> string "space" *> pure ' '
	96	<\|> string "tab" *> pure '\x09'
	97	anyCharacter = anyToken
	98
	99	r7rsNum :: Int -> Parser Int
	100	r7rsNum radix = prefix <*> complex
	101	where prefix = radix <> exactness <\|> exactness <> radix
	102	complex = real
	103	<\|> real <* char '@' <*> real
	104	<\|> real <* char '+' <> ureal < char 'i'
	105	<\|> real <* char '-' <> ureal < char 'i'
	106	<\|> real <* char '+' <* char 'i'
	107	<\|> real <* char '-' <* char 'i'
	108	<\|> real <> infnan < char 'i'
	109	<\|> char '+' > ureal < char 'i'
	110	<\|> char '-' > ureal < char 'i'
	111	<\|> infnan <* char 'i'
	112	<\|> string "+i"
	113	<\|> string "-i"
	114	real = ($) <$> sign <*> ureal
	115	<\|> infnan

+2

-2

s-cargot.cabal less more

21	21	Data.SCargot.Pretty,
22	22	Data.SCargot.Basic,
23	23	Data.SCargot.Comments,
24		Data.SCargot.HaskLike,
25		Data.SCargot.Tutorial
	24	Data.SCargot.Common,
	25	Data.SCargot.HaskLike
26	26	-- other-modules:
27	27	-- other-extensions:
28	28	build-depends: base >=4.7 && <5, parsec, text, containers