gdritter repos s-cargot / 2b126f2
Removing R7RS from s-cargot proper, pushing Scheme identifier functions to new Common file Getty Ritter 8 years ago
8 changed file(s) with 256 addition(s) and 86 deletion(s). Collapse all Expand all
11 {-# LANGUAGE OverloadedStrings #-}
22
33 module Data.SCargot.Basic
4 ( basicSpec
5 , asRich
6 , asWellFormed
7 , addReader
8 , setComment
9 , withLispComments
10 , withQuote
4 ( -- * Spec
5 -- $descr
6 basicSpec
117 ) where
128
139 import Control.Applicative ((<$>))
1814 import Data.SCargot.Repr.Basic (SExpr)
1915 import Data.SCargot.General ( SExprSpec
2016 , mkSpec
21 , asRich
22 , asWellFormed
23 , addReader
24 , setComment
25 , withQuote
2617 )
2718 import Data.SCargot.Comments (withLispComments)
2819
3223 || c == '+' || c == '<' || c == '>'
3324 || c == '=' || c == '!' || c == '?'
3425
26 -- $descr
27 -- The 'basicSpec' describes S-expressions whose atoms are simply
28 -- text strings that contain alphanumeric characters and a small
29 -- set of punctuation. It does no parsing of numbers or other data
30 -- types, and will accept tokens that typical Lisp implementations
31 -- would find nonsensical (like @77foo@).
32 --
33 -- Atoms recognized by the 'basicSpec' are any string matching the
34 -- regular expression @[A-Za-z0-9+*<>/=!?-]+@.
35
3536 -- | A 'SExprSpec' that understands atoms to be sequences of
3637 -- alphanumeric characters as well as the punctuation
3738 -- characters @[-*/+<>=!?]@, and does no processing of them.
38 -- This is not quite representative of actual lisps, which
39 -- would, for example, accept various kinds of string
40 -- and numeric literals.
4139 basicSpec :: SExprSpec Text (SExpr Text)
4240 basicSpec = mkSpec pToken id
4341 where pToken = pack <$> many1 (satisfy isAtomChar)
11 {-# LANGUAGE OverloadedStrings #-}
22
33 module Data.SCargot.Comments
4 ( -- * Comment Syntax
5 -- $intro
6 -- * Lisp Comments
4 ( -- $intro
5
6 -- * Lisp-Style Syntax
7 -- $lisp
78 withLispComments
89 -- * Other Existing Comment Syntaxes
910 -- ** Scripting Language Syntax
1011 -- $script
1112 , withOctothorpeComments
12 -- ** C-Like Syntax
13 -- ** C-Style Syntax
1314 -- $clike
1415 , withCLikeLineComments
1516 , withCLikeBlockComments
1617 , withCLikeComments
17 -- ** Haskell Syntax
18 -- ** Haskell-Style Syntax
1819 -- $haskell
1920 , withHaskellLineComments
2021 , withHaskellBlockComments
146147
147148 -}
148149
150 {- $lisp
151 > (one ; a comment
152 > two ; another one
153 > three)
154 -}
155
149156 {- $script
150157 > (one # a comment
151158 > two # another one
1 module Data.SCargot.Common ( number
2 , decNumber
3 , hexNumber
4 , octNumber
5 , sign
6 -- * Lisp Identifier Syntaxes
7 , parseR5RSIdent
8 , parseR6RSIdent
9 , parseR7RSIdent
10 ) where
11
12 import Data.Char
13 import Data.Text (Text)
14 import qualified Data.Text as T
15 import Text.Parsec
16 import Text.Parsec.Char (satisfy)
17 import Text.Parsec.Text (Parser)
18
19 parseR5RSIdent :: Parser Text
20 parseR5RSIdent =
21 T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar)
22 where initial = letter <|> oneOf "!$%&*/:<=>?^_~"
23 subsequent = initial <|> digit <|> oneOf "+-.@"
24 peculiar = string "+" <|> string "-" <|> string "..."
25
26 hasCategory :: Char -> [GeneralCategory] -> Bool
27 hasCategory c cs = generalCategory c `elem` cs
28
29 parseR6RSIdent :: Parser Text
30 parseR6RSIdent =
31 T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar)
32 where initial = constituent <|> oneOf "!$%&*/:<=>?^_~" <|> inlineHex
33 constituent = letter
34 <|> uniClass (\ c -> isLetter c ||
35 isSymbol c ||
36 hasCategory c
37 [ NonSpacingMark
38 , LetterNumber
39 , OtherNumber
40 , DashPunctuation
41 , ConnectorPunctuation
42 , OtherPunctuation
43 , PrivateUse
44 ])
45 inlineHex = (chr . fromIntegral) <$> (string "\\x" *> hexNumber <* char ';')
46 subsequent = initial <|> digit <|> oneOf "+-.@"
47 <|> uniClass (\ c -> hasCategory c
48 [ DecimalNumber
49 , SpacingCombiningMark
50 , EnclosingMark
51 ])
52 peculiar = string "+" <|> string "-" <|> string "..." <|>
53 ((++) <$> string "->" <*> many subsequent)
54 uniClass :: (Char -> Bool) -> Parser Char
55 uniClass sp = satisfy (\ c -> c > '\x7f' && sp c)
56
57 parseR7RSIdent :: Parser Text
58 parseR7RSIdent = T.pack <$>
59 ( (:) <$> initial <*> many subsequent
60 <|> char '|' *> many1 symbolElement <* char '|'
61 <|> peculiar
62 )
63 where initial = letter <|> specInit
64 specInit = oneOf "!$%&*/:<=>?^_~"
65 subsequent = initial <|> digit <|> specSubsequent
66 specSubsequent = expSign <|> oneOf ".@"
67 expSign = oneOf "+-"
68 symbolElement = undefined
69 peculiar = undefined
70
71 -- | A helper function for defining parsers for arbitrary-base integers.
72 -- The first argument will be the base, and the second will be the
73 -- parser for the individual digits.
74 number :: Integer -> Parser Char -> Parser Integer
75 number base digits = foldl go 0 <$> many1 digits
76 where go x d = base * x + toInteger (value d)
77 value c
78 | c == 'a' || c == 'A' = 0xa
79 | c == 'b' || c == 'B' = 0xb
80 | c == 'c' || c == 'C' = 0xc
81 | c == 'd' || c == 'D' = 0xd
82 | c == 'e' || c == 'E' = 0xe
83 | c == 'f' || c == 'F' = 0xf
84 | c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
85 | otherwise = error ("Unknown letter in number: " ++ show c)
86
87 -- | A parser for bare binary numbers
88 binNumber :: Parser Integer
89 binNumber = number 2 (char '0' <|> char '1')
90
91 -- | A parser for bare octal numbers
92 octNumber :: Parser Integer
93 octNumber = number 8 digit
94
95 -- | A parser for bare decimal numbers
96 decNumber :: Parser Integer
97 decNumber = number 10 digit
98
99 -- | A parser for bare hexadecimal numbers
100 hexNumber :: Parser Integer
101 hexNumber = number 16 hexDigit
102
103 -- | A parser for numeric signs, represented as a function from numbers
104 -- to numbers. It will parse '+' as the identity function, '-', as
105 -- 'negate', or consume no input and return the identity function.
106 -- This can be combined with other numeric literals to implement
107 -- signedness:
108 --
109 -- > myNum = go <$> sign <*> decNumber
110 -- > where go s n = s n
111 sign :: Num a => Parser (a -> a)
112 sign = (pure id <* char '+')
113 <|> (pure negate <* char '-')
114 <|> pure id
4545 import Text.Parsec.Char (anyChar, space)
4646 import Text.Parsec.Text (Parser)
4747
48 import Data.SCargot.Repr
48 import Data.SCargot.Repr ( SExpr(..)
49 , RichSExpr
50 , WellFormedSExpr
51 , fromRich
52 , toRich
53 , fromWellFormed
54 , toWellFormed
55 )
4956
5057 type ReaderMacroMap atom = Map Char (Reader atom)
5158
1414
1515 import Prelude hiding (concatMap)
1616
17 import Data.SCargot.Common
1718 import Data.SCargot.Repr.Basic (SExpr)
1819 import Data.SCargot.General (SExprSpec, mkSpec)
1920
2223 This module is intended for simple, ad-hoc configuration or data formats
2324 that might not need their on rich structure but might benefit from a few
2425 various literal formats. the 'haskLikeSpec' understands identifiers as
25 defined by R6RS as well as string, integer, and floating-point literals
26 defined by R5RS as well as string, integer, and floating-point literals
2627 as defined by the Haskell spec, but won't get any Lisp-specific vector
2728 literals or other structure.
2829
3233 -- | An atom type that understands Haskell-like values as well as
3334 -- Scheme-like identifiers.
3435 data HaskLikeAtom
35 = HSIdent Text -- ^ An identifier, parsed according to the R6RS Scheme
36 = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme
3637 -- standard
3738 | HSString Text -- ^ A string, parsed according to the syntax for string
3839 -- literals in the Haskell report
4647 instance IsString HaskLikeAtom where
4748 fromString = HSIdent . fromString
4849
49 pToken :: Parser Text
50 pToken = pack <$> ( (:) <$> initial <*> many subsequent
51 <|> string "+"
52 <|> string "-"
53 <|> string "..."
54 )
55
56 initial :: Parser Char
57 initial = letter <|> oneOf "!$%&*/:<=>?^_~"
58
59 subsequent :: Parser Char
60 subsequent = initial <|> digit <|> oneOf "+-.@"
61
6250 pString :: Parser Text
6351 pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
6452 where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
6957 code = eEsc <|> eNum <|> eCtrl <|> eAscii
7058 eCtrl = char '^' >> unCtrl <$> upper
7159 eNum = (toEnum . fromInteger) <$>
72 (decimal <|> (char 'o' >> number 8 octDigit)
73 <|> (char 'x' >> number 16 hexDigit))
60 (decNumber <|> (char 'o' >> octNumber)
61 <|> (char 'x' >> hexNumber))
7462 eEsc = choice [ char a >> return b | (a, b) <- escMap ]
7563 eAscii = choice [ try (string a >> return b)
7664 | (a, b) <- asciiMap ]
8977 "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
9078 "\SYN\ETB\CAN\SUB\ESC\DEL")
9179
92 decimal :: Parser Integer
93 decimal = number 10 digit
94
95 number :: Integer -> Parser Char -> Parser Integer
96 number base digits = foldl go 0 <$> many1 digits
97 where go x d = base * x + toInteger (value d)
98 value c
99 | c == 'a' || c == 'A' = 0xa
100 | c == 'b' || c == 'B' = 0xb
101 | c == 'c' || c == 'C' = 0xc
102 | c == 'd' || c == 'D' = 0xd
103 | c == 'e' || c == 'E' = 0xe
104 | c == 'f' || c == 'F' = 0xf
105 | c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
106 | otherwise = error ("Unknown letter in number: " ++ show c)
107
10880 pFloat :: Parser Double
10981 pFloat = do
110 n <- decimal
82 n <- decNumber
11183 withDot n <|> noDot n
11284 where withDot n = do
11385 char '.'
114 m <- decimal
86 m <- decNumber
11587 e <- option 1.0 exponent
11688 return ((fromIntegral n + asDec m 0) * e)
11789 noDot n = do
12092 exponent = do
12193 oneOf "eE"
12294 s <- power
123 x <- decimal
95 x <- decNumber
12496 return (10 ** s (fromIntegral x))
12597 asDec 0 k = k
12698 asDec n k =
132104 pInt :: Parser Integer
133105 pInt = do
134106 s <- power
135 n <- pZeroNum <|> decimal
107 n <- pZeroNum <|> decNumber
136108 return (fromIntegral (s n))
137109
138110 pZeroNum :: Parser Integer
139111 pZeroNum = char '0' >>
140 ( (oneOf "xX" >> number 16 hexDigit)
141 <|> (oneOf "oO" >> number 8 octDigit)
142 <|> decimal
112 ( (oneOf "xX" >> hexNumber)
113 <|> (oneOf "oO" >> octNumber)
114 <|> decNumber
143115 <|> return 0
144116 )
145117
146118 pHaskLikeAtom :: Parser HaskLikeAtom
147119 pHaskLikeAtom
148 = HSFloat <$> (try pFloat <?> "float")
149 <|> HSInt <$> (try pInt <?> "integer")
150 <|> HSString <$> (pString <?> "string literal")
151 <|> HSIdent <$> (pToken <?> "token")
120 = HSFloat <$> (try pFloat <?> "float")
121 <|> HSInt <$> (try pInt <?> "integer")
122 <|> HSString <$> (pString <?> "string literal")
123 <|> HSIdent <$> (parseR5RSIdent <?> "token")
152124
153125 sHaskLikeAtom :: HaskLikeAtom -> Text
154126 sHaskLikeAtom (HSIdent t) = t
9292 indent n ts = T.replicate n " " <> ts
9393
9494 -- Indents every line n spaces, and adds a newline to the beginning
95 -- used in swung indents
9596 indentAll :: Int -> [Text] -> Text
9697 indentAll n = ("\n" <>) . joinLines . map (indent n)
9798
9899 -- Indents every line but the first by some amount
100 -- used in aligned indents
99101 indentSubsequent :: Int -> [Text] -> Text
100102 indentSubsequent _ [] = ""
101103 indentSubsequent _ [t] = t
107109 -- i swear i'll do better in the future i promise i have to
108110 -- for my sake and for everyone's
109111
110 -- | Pretty-print a 'Sexpr' according to the options in a
112 -- | Pretty-print a 'SExpr' according to the options in a
111113 -- 'LayoutOptions' value.
112114 prettyPrintSExpr :: LayoutOptions a -> SExpr a -> Text
113115 prettyPrintSExpr LayoutOptions { .. } = pHead 0
136138 indentSubsequent (ind + headWidth + 1)
137139 (map (pHead (ind + headWidth + 1)) lst)
138140 body
139 | length lst == 0 = ""
141 | length lst == 0 = ""
140142 | Just maxAmt <- maxWidth
141 , (T.length flat + ind) > maxAmt = " " <> indented
142 | otherwise = " " <> flat
143 , T.length flat + ind > maxAmt = " " <> indented
144 | otherwise = " " <> flat
1 {-# LANGUAGE StandaloneDeriving #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE UndecidableInstances #-}
5
16 module Data.SCargot.Scheme.R7RS where
7
8 import Data.Char (chr, isAlphaNum)
9 import Data.Text (Text)
10 import qualified Data.Text as T
11 import Data.String (IsString(..))
12 import Data.SCargot.Common
13 import Data.SCargot.General
14 import Data.SCargot.Repr.Basic
15 import Data.Word (Word8)
16 import Text.Parsec
17 import Text.Parsec.Text (Parser)
18
19 instance IsString (SchemeAtom c) where
20 fromString = ScmIdent . fromString
221
322 -- | A Scheme value type. This is strictly larger than just
423 -- 'atoms', as they may include things like vectors or
1635 | ScmByteVec [Word8]
1736 | ScmLabeledDatum Int (carrier (SchemeAtom carrier))
1837 | ScmLabelReference Int
38
39 -- | Scheme has a lot of numbers.
40 data SchemeNumber
41 = ScmNumber
42 | ScmComplexNumber Double Double
43 | ScmRealNumber Double
44 | ScmRationalNumber Rational
45 | ScmInteger Integer
1946 deriving (Eq, Show)
2047
21 withQuasiQuote :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom Sexpr))
48 deriving instance Show (c (SchemeAtom c)) => Show (SchemeAtom c)
49 deriving instance Eq (c (SchemeAtom c)) => Eq (SchemeAtom c)
50
51 badSpec :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
52 badSpec = mkSpec (ScmIdent . T.pack <$> many1 (satisfy isAlphaNum)) undefined
53
54 withQuasiQuote :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
2255 -> SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
2356 withQuasiQuote spec = addReader '`' (fmap (go "quasiquote"))
2457 $ addReader ',' unquote
58 $ addReader '\'' (fmap (go "quote"))
2559 $ spec
2660 where go name s = name ::: s ::: Nil
27 unquote p = char '@' *> fmap (go "unquote-splicing")
28 <|> fmap (go "unquote")
61 unquote p = char '@' *> fmap (go "unquote-splicing") p
62 <|> fmap (go "unquote") p
2963
30 octoReader :: Reader (SExpr (SchemeAtom SExpr))
64 octoReader :: Reader (SchemeAtom SExpr)
3165 octoReader pSexpr =
32 string "true" *> pure (ScmBool True)
33 <|> string "false" *> pure (ScmBool False)
34 <|> char 't' *> pure (ScmBool True)
35 <|> char 'f' *> pure (ScmBool False)
36 <|> char '\\' *> characterConstant
37 <|> char '(' *> fmap ScmVec (vector pSexpr)
38 <|> string "u8(" *> bytevec
66 string "true" *> pure (A (ScmBool True))
67 <|> string "false" *> pure (A (ScmBool False))
68 <|> char 't' *> pure (A (ScmBool True))
69 <|> char 'f' *> pure (A (ScmBool False))
70 <|> char '\\' *> fmap (A . ScmChar) characterConstant
71 <|> char '(' *> fmap (A . ScmVec) (vector pSexpr)
72 <|> string "u8(" *> fmap A bytevec
3973 <|> do n <- read <$> many1 digit
40 (char '#' *> ScmLabelReference n <|>
41 char '=' *> fmap (ScmLabeledDatum n) pSexpr)
74 (char '#' *> pure (A (ScmLabelReference n)) <|>
75 char '=' *> fmap (A . ScmLabeledDatum n) pSexpr)
4276
4377 vector :: Parser (SExpr (SchemeAtom SExpr)) -> Parser [SExpr (SchemeAtom SExpr)]
44 vector pSexpr =
45 (char ')' *> pure []) <|> ((:) <$> pSExpr <*> vector pSexpr)
78 vector pSExpr =
79 (char ')' *> pure []) <|> ((:) <$> pSExpr <*> vector pSExpr)
80
81 bytevec :: Parser (SchemeAtom SExpr)
82 bytevec = undefined
83
84 characterConstant :: Parser Char
85 characterConstant = namedCharacter
86 <|> (chr . fromInteger <$> (char 'x' *> hexNumber))
87 <|> anyCharacter
88 where namedCharacter = string "alarm" *> pure '\x07'
89 <|> string "backspace" *> pure '\x08'
90 <|> string "delete" *> pure '\x7f'
91 <|> string "escape" *> pure '\x1b'
92 <|> string "newline" *> pure '\x0a'
93 <|> string "null" *> pure '\x00'
94 <|> string "return" *> pure '\x0d'
95 <|> string "space" *> pure ' '
96 <|> string "tab" *> pure '\x09'
97 anyCharacter = anyToken
98
99 r7rsNum :: Int -> Parser Int
100 r7rsNum radix = prefix <*> complex
101 where prefix = radix <*> exactness <|> exactness <*> radix
102 complex = real
103 <|> real <* char '@' <*> real
104 <|> real <* char '+' <*> ureal <* char 'i'
105 <|> real <* char '-' <*> ureal <* char 'i'
106 <|> real <* char '+' <* char 'i'
107 <|> real <* char '-' <* char 'i'
108 <|> real <*> infnan <* char 'i'
109 <|> char '+' *> ureal <* char 'i'
110 <|> char '-' *> ureal <* char 'i'
111 <|> infnan <* char 'i'
112 <|> string "+i"
113 <|> string "-i"
114 real = ($) <$> sign <*> ureal
115 <|> infnan
2121 Data.SCargot.Pretty,
2222 Data.SCargot.Basic,
2323 Data.SCargot.Comments,
24 Data.SCargot.HaskLike,
25 Data.SCargot.Tutorial
24 Data.SCargot.Common,
25 Data.SCargot.HaskLike
2626 -- other-modules:
2727 -- other-extensions:
2828 build-depends: base >=4.7 && <5, parsec, text, containers