Removing R7RS from s-cargot proper, pushing Scheme identifier functions to new Common file
Getty Ritter
9 years ago
1 | 1 | {-# LANGUAGE OverloadedStrings #-} |
2 | 2 | |
3 | 3 | module Data.SCargot.Basic |
4 | ( basicSpec | |
5 | , asRich | |
6 | , asWellFormed | |
7 | , addReader | |
8 | , setComment | |
9 | , withLispComments | |
10 |
|
|
4 | ( -- * Spec | |
5 | -- $descr | |
6 | basicSpec | |
11 | 7 | ) where |
12 | 8 | |
13 | 9 | import Control.Applicative ((<$>)) |
18 | 14 | import Data.SCargot.Repr.Basic (SExpr) |
19 | 15 | import Data.SCargot.General ( SExprSpec |
20 | 16 | , mkSpec |
21 | , asRich | |
22 | , asWellFormed | |
23 | , addReader | |
24 | , setComment | |
25 | , withQuote | |
26 | 17 | ) |
27 | 18 | import Data.SCargot.Comments (withLispComments) |
28 | 19 | |
32 | 23 | || c == '+' || c == '<' || c == '>' |
33 | 24 | || c == '=' || c == '!' || c == '?' |
34 | 25 | |
26 | -- $descr | |
27 | -- The 'basicSpec' describes S-expressions whose atoms are simply | |
28 | -- text strings that contain alphanumeric characters and a small | |
29 | -- set of punctuation. It does no parsing of numbers or other data | |
30 | -- types, and will accept tokens that typical Lisp implementations | |
31 | -- would find nonsensical (like @77foo@). | |
32 | -- | |
33 | -- Atoms recognized by the 'basicSpec' are any string matching the | |
34 | -- regular expression @[A-Za-z0-9+*<>/=!?-]+@. | |
35 | ||
35 | 36 | -- | A 'SExprSpec' that understands atoms to be sequences of |
36 | 37 | -- alphanumeric characters as well as the punctuation |
37 | 38 | -- characters @[-*/+<>=!?]@, and does no processing of them. |
38 | -- This is not quite representative of actual lisps, which | |
39 | -- would, for example, accept various kinds of string | |
40 | -- and numeric literals. | |
41 | 39 | basicSpec :: SExprSpec Text (SExpr Text) |
42 | 40 | basicSpec = mkSpec pToken id |
43 | 41 | where pToken = pack <$> many1 (satisfy isAtomChar) |
1 | 1 | {-# LANGUAGE OverloadedStrings #-} |
2 | 2 | |
3 | 3 | module Data.SCargot.Comments |
4 | ( -- * Comment Syntax | |
5 | -- $intro | |
6 |
|
|
4 | ( -- $intro | |
5 | ||
6 | -- * Lisp-Style Syntax | |
7 | -- $lisp | |
7 | 8 | withLispComments |
8 | 9 | -- * Other Existing Comment Syntaxes |
9 | 10 | -- ** Scripting Language Syntax |
10 | 11 | -- $script |
11 | 12 | , withOctothorpeComments |
12 |
-- ** C- |
|
13 | -- ** C-Style Syntax | |
13 | 14 | -- $clike |
14 | 15 | , withCLikeLineComments |
15 | 16 | , withCLikeBlockComments |
16 | 17 | , withCLikeComments |
17 |
-- ** Haskell |
|
18 | -- ** Haskell-Style Syntax | |
18 | 19 | -- $haskell |
19 | 20 | , withHaskellLineComments |
20 | 21 | , withHaskellBlockComments |
146 | 147 | |
147 | 148 | -} |
148 | 149 | |
150 | {- $lisp | |
151 | > (one ; a comment | |
152 | > two ; another one | |
153 | > three) | |
154 | -} | |
155 | ||
149 | 156 | {- $script |
150 | 157 | > (one # a comment |
151 | 158 | > two # another one |
1 | module Data.SCargot.Common ( number | |
2 | , decNumber | |
3 | , hexNumber | |
4 | , octNumber | |
5 | , sign | |
6 | -- * Lisp Identifier Syntaxes | |
7 | , parseR5RSIdent | |
8 | , parseR6RSIdent | |
9 | , parseR7RSIdent | |
10 | ) where | |
11 | ||
12 | import Data.Char | |
13 | import Data.Text (Text) | |
14 | import qualified Data.Text as T | |
15 | import Text.Parsec | |
16 | import Text.Parsec.Char (satisfy) | |
17 | import Text.Parsec.Text (Parser) | |
18 | ||
19 | parseR5RSIdent :: Parser Text | |
20 | parseR5RSIdent = | |
21 | T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar) | |
22 | where initial = letter <|> oneOf "!$%&*/:<=>?^_~" | |
23 | subsequent = initial <|> digit <|> oneOf "+-.@" | |
24 | peculiar = string "+" <|> string "-" <|> string "..." | |
25 | ||
26 | hasCategory :: Char -> [GeneralCategory] -> Bool | |
27 | hasCategory c cs = generalCategory c `elem` cs | |
28 | ||
29 | parseR6RSIdent :: Parser Text | |
30 | parseR6RSIdent = | |
31 | T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar) | |
32 | where initial = constituent <|> oneOf "!$%&*/:<=>?^_~" <|> inlineHex | |
33 | constituent = letter | |
34 | <|> uniClass (\ c -> isLetter c || | |
35 | isSymbol c || | |
36 | hasCategory c | |
37 | [ NonSpacingMark | |
38 | , LetterNumber | |
39 | , OtherNumber | |
40 | , DashPunctuation | |
41 | , ConnectorPunctuation | |
42 | , OtherPunctuation | |
43 | , PrivateUse | |
44 | ]) | |
45 | inlineHex = (chr . fromIntegral) <$> (string "\\x" *> hexNumber <* char ';') | |
46 | subsequent = initial <|> digit <|> oneOf "+-.@" | |
47 | <|> uniClass (\ c -> hasCategory c | |
48 | [ DecimalNumber | |
49 | , SpacingCombiningMark | |
50 | , EnclosingMark | |
51 | ]) | |
52 | peculiar = string "+" <|> string "-" <|> string "..." <|> | |
53 | ((++) <$> string "->" <*> many subsequent) | |
54 | uniClass :: (Char -> Bool) -> Parser Char | |
55 | uniClass sp = satisfy (\ c -> c > '\x7f' && sp c) | |
56 | ||
57 | parseR7RSIdent :: Parser Text | |
58 | parseR7RSIdent = T.pack <$> | |
59 | ( (:) <$> initial <*> many subsequent | |
60 | <|> char '|' *> many1 symbolElement <* char '|' | |
61 | <|> peculiar | |
62 | ) | |
63 | where initial = letter <|> specInit | |
64 | specInit = oneOf "!$%&*/:<=>?^_~" | |
65 | subsequent = initial <|> digit <|> specSubsequent | |
66 | specSubsequent = expSign <|> oneOf ".@" | |
67 | expSign = oneOf "+-" | |
68 | symbolElement = undefined | |
69 | peculiar = undefined | |
70 | ||
71 | -- | A helper function for defining parsers for arbitrary-base integers. | |
72 | -- The first argument will be the base, and the second will be the | |
73 | -- parser for the individual digits. | |
74 | number :: Integer -> Parser Char -> Parser Integer | |
75 | number base digits = foldl go 0 <$> many1 digits | |
76 | where go x d = base * x + toInteger (value d) | |
77 | value c | |
78 | | c == 'a' || c == 'A' = 0xa | |
79 | | c == 'b' || c == 'B' = 0xb | |
80 | | c == 'c' || c == 'C' = 0xc | |
81 | | c == 'd' || c == 'D' = 0xd | |
82 | | c == 'e' || c == 'E' = 0xe | |
83 | | c == 'f' || c == 'F' = 0xf | |
84 | | c >= '0' && c <= '9' = fromEnum c - fromEnum '0' | |
85 | | otherwise = error ("Unknown letter in number: " ++ show c) | |
86 | ||
87 | -- | A parser for bare binary numbers | |
88 | binNumber :: Parser Integer | |
89 | binNumber = number 2 (char '0' <|> char '1') | |
90 | ||
91 | -- | A parser for bare octal numbers | |
92 | octNumber :: Parser Integer | |
93 | octNumber = number 8 digit | |
94 | ||
95 | -- | A parser for bare decimal numbers | |
96 | decNumber :: Parser Integer | |
97 | decNumber = number 10 digit | |
98 | ||
99 | -- | A parser for bare hexadecimal numbers | |
100 | hexNumber :: Parser Integer | |
101 | hexNumber = number 16 hexDigit | |
102 | ||
103 | -- | A parser for numeric signs, represented as a function from numbers | |
104 | -- to numbers. It will parse '+' as the identity function, '-', as | |
105 | -- 'negate', or consume no input and return the identity function. | |
106 | -- This can be combined with other numeric literals to implement | |
107 | -- signedness: | |
108 | -- | |
109 | -- > myNum = go <$> sign <*> decNumber | |
110 | -- > where go s n = s n | |
111 | sign :: Num a => Parser (a -> a) | |
112 | sign = (pure id <* char '+') | |
113 | <|> (pure negate <* char '-') | |
114 | <|> pure id |
45 | 45 | import Text.Parsec.Char (anyChar, space) |
46 | 46 | import Text.Parsec.Text (Parser) |
47 | 47 | |
48 |
import Data.SCargot.Repr |
|
48 | import Data.SCargot.Repr ( SExpr(..) | |
49 | , RichSExpr | |
50 | , WellFormedSExpr | |
51 | , fromRich | |
52 | , toRich | |
53 | , fromWellFormed | |
54 | , toWellFormed | |
55 | ) | |
49 | 56 | |
50 | 57 | type ReaderMacroMap atom = Map Char (Reader atom) |
51 | 58 |
14 | 14 | |
15 | 15 | import Prelude hiding (concatMap) |
16 | 16 | |
17 | import Data.SCargot.Common | |
17 | 18 | import Data.SCargot.Repr.Basic (SExpr) |
18 | 19 | import Data.SCargot.General (SExprSpec, mkSpec) |
19 | 20 | |
22 | 23 | This module is intended for simple, ad-hoc configuration or data formats |
23 | 24 | that might not need their on rich structure but might benefit from a few |
24 | 25 | various literal formats. the 'haskLikeSpec' understands identifiers as |
25 |
defined by R |
|
26 | defined by R5RS as well as string, integer, and floating-point literals | |
26 | 27 | as defined by the Haskell spec, but won't get any Lisp-specific vector |
27 | 28 | literals or other structure. |
28 | 29 | |
32 | 33 | -- | An atom type that understands Haskell-like values as well as |
33 | 34 | -- Scheme-like identifiers. |
34 | 35 | data HaskLikeAtom |
35 |
= HSIdent Text -- ^ An identifier, parsed according to the R |
|
36 | = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme | |
36 | 37 | -- standard |
37 | 38 | | HSString Text -- ^ A string, parsed according to the syntax for string |
38 | 39 | -- literals in the Haskell report |
46 | 47 | instance IsString HaskLikeAtom where |
47 | 48 | fromString = HSIdent . fromString |
48 | 49 | |
49 | pToken :: Parser Text | |
50 | pToken = pack <$> ( (:) <$> initial <*> many subsequent | |
51 | <|> string "+" | |
52 | <|> string "-" | |
53 | <|> string "..." | |
54 | ) | |
55 | ||
56 | initial :: Parser Char | |
57 | initial = letter <|> oneOf "!$%&*/:<=>?^_~" | |
58 | ||
59 | subsequent :: Parser Char | |
60 | subsequent = initial <|> digit <|> oneOf "+-.@" | |
61 | ||
62 | 50 | pString :: Parser Text |
63 | 51 | pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc)) |
64 | 52 | where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026') |
69 | 57 | code = eEsc <|> eNum <|> eCtrl <|> eAscii |
70 | 58 | eCtrl = char '^' >> unCtrl <$> upper |
71 | 59 | eNum = (toEnum . fromInteger) <$> |
72 | (decimal <|> (char 'o' >> number 8 octDigit) | |
73 | <|> (char 'x' >> number 16 hexDigit)) | |
60 | (decNumber <|> (char 'o' >> octNumber) | |
61 | <|> (char 'x' >> hexNumber)) | |
74 | 62 | eEsc = choice [ char a >> return b | (a, b) <- escMap ] |
75 | 63 | eAscii = choice [ try (string a >> return b) |
76 | 64 | | (a, b) <- asciiMap ] |
89 | 77 | "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++ |
90 | 78 | "\SYN\ETB\CAN\SUB\ESC\DEL") |
91 | 79 | |
92 | decimal :: Parser Integer | |
93 | decimal = number 10 digit | |
94 | ||
95 | number :: Integer -> Parser Char -> Parser Integer | |
96 | number base digits = foldl go 0 <$> many1 digits | |
97 | where go x d = base * x + toInteger (value d) | |
98 | value c | |
99 | | c == 'a' || c == 'A' = 0xa | |
100 | | c == 'b' || c == 'B' = 0xb | |
101 | | c == 'c' || c == 'C' = 0xc | |
102 | | c == 'd' || c == 'D' = 0xd | |
103 | | c == 'e' || c == 'E' = 0xe | |
104 | | c == 'f' || c == 'F' = 0xf | |
105 | | c >= '0' && c <= '9' = fromEnum c - fromEnum '0' | |
106 | | otherwise = error ("Unknown letter in number: " ++ show c) | |
107 | ||
108 | 80 | pFloat :: Parser Double |
109 | 81 | pFloat = do |
110 |
n <- dec |
|
82 | n <- decNumber | |
111 | 83 | withDot n <|> noDot n |
112 | 84 | where withDot n = do |
113 | 85 | char '.' |
114 |
m <- dec |
|
86 | m <- decNumber | |
115 | 87 | e <- option 1.0 exponent |
116 | 88 | return ((fromIntegral n + asDec m 0) * e) |
117 | 89 | noDot n = do |
120 | 92 | exponent = do |
121 | 93 | oneOf "eE" |
122 | 94 | s <- power |
123 |
x <- dec |
|
95 | x <- decNumber | |
124 | 96 | return (10 ** s (fromIntegral x)) |
125 | 97 | asDec 0 k = k |
126 | 98 | asDec n k = |
132 | 104 | pInt :: Parser Integer |
133 | 105 | pInt = do |
134 | 106 | s <- power |
135 |
n <- pZeroNum <|> dec |
|
107 | n <- pZeroNum <|> decNumber | |
136 | 108 | return (fromIntegral (s n)) |
137 | 109 | |
138 | 110 | pZeroNum :: Parser Integer |
139 | 111 | pZeroNum = char '0' >> |
140 | ( (oneOf "xX" >> number 16 hexDigit) | |
141 | <|> (oneOf "oO" >> number 8 octDigit) | |
142 |
|
|
112 | ( (oneOf "xX" >> hexNumber) | |
113 | <|> (oneOf "oO" >> octNumber) | |
114 | <|> decNumber | |
143 | 115 | <|> return 0 |
144 | 116 | ) |
145 | 117 | |
146 | 118 | pHaskLikeAtom :: Parser HaskLikeAtom |
147 | 119 | pHaskLikeAtom |
148 | = HSFloat <$> (try pFloat <?> "float") | |
149 | <|> HSInt <$> (try pInt <?> "integer") | |
150 | <|> HSString <$> (pString <?> "string literal") | |
151 | <|> HSIdent <$> (pToken <?> "token") | |
120 | = HSFloat <$> (try pFloat <?> "float") | |
121 | <|> HSInt <$> (try pInt <?> "integer") | |
122 | <|> HSString <$> (pString <?> "string literal") | |
123 | <|> HSIdent <$> (parseR5RSIdent <?> "token") | |
152 | 124 | |
153 | 125 | sHaskLikeAtom :: HaskLikeAtom -> Text |
154 | 126 | sHaskLikeAtom (HSIdent t) = t |
92 | 92 | indent n ts = T.replicate n " " <> ts |
93 | 93 | |
94 | 94 | -- Indents every line n spaces, and adds a newline to the beginning |
95 | -- used in swung indents | |
95 | 96 | indentAll :: Int -> [Text] -> Text |
96 | 97 | indentAll n = ("\n" <>) . joinLines . map (indent n) |
97 | 98 | |
98 | 99 | -- Indents every line but the first by some amount |
100 | -- used in aligned indents | |
99 | 101 | indentSubsequent :: Int -> [Text] -> Text |
100 | 102 | indentSubsequent _ [] = "" |
101 | 103 | indentSubsequent _ [t] = t |
107 | 109 | -- i swear i'll do better in the future i promise i have to |
108 | 110 | -- for my sake and for everyone's |
109 | 111 | |
110 |
-- | Pretty-print a 'S |
|
112 | -- | Pretty-print a 'SExpr' according to the options in a | |
111 | 113 | -- 'LayoutOptions' value. |
112 | 114 | prettyPrintSExpr :: LayoutOptions a -> SExpr a -> Text |
113 | 115 | prettyPrintSExpr LayoutOptions { .. } = pHead 0 |
136 | 138 | indentSubsequent (ind + headWidth + 1) |
137 | 139 | (map (pHead (ind + headWidth + 1)) lst) |
138 | 140 | body |
139 |
| length lst == 0 |
|
141 | | length lst == 0 = "" | |
140 | 142 | | Just maxAmt <- maxWidth |
141 | , (T.length flat + ind) > maxAmt = " " <> indented | |
142 | | otherwise = " " <> flat | |
143 | , T.length flat + ind > maxAmt = " " <> indented | |
144 | | otherwise = " " <> flat |
1 | {-# LANGUAGE StandaloneDeriving #-} | |
2 | {-# LANGUAGE OverloadedStrings #-} | |
3 | {-# LANGUAGE FlexibleContexts #-} | |
4 | {-# LANGUAGE UndecidableInstances #-} | |
5 | ||
1 | 6 | module Data.SCargot.Scheme.R7RS where |
7 | ||
8 | import Data.Char (chr, isAlphaNum) | |
9 | import Data.Text (Text) | |
10 | import qualified Data.Text as T | |
11 | import Data.String (IsString(..)) | |
12 | import Data.SCargot.Common | |
13 | import Data.SCargot.General | |
14 | import Data.SCargot.Repr.Basic | |
15 | import Data.Word (Word8) | |
16 | import Text.Parsec | |
17 | import Text.Parsec.Text (Parser) | |
18 | ||
19 | instance IsString (SchemeAtom c) where | |
20 | fromString = ScmIdent . fromString | |
2 | 21 | |
3 | 22 | -- | A Scheme value type. This is strictly larger than just |
4 | 23 | -- 'atoms', as they may include things like vectors or |
16 | 35 | | ScmByteVec [Word8] |
17 | 36 | | ScmLabeledDatum Int (carrier (SchemeAtom carrier)) |
18 | 37 | | ScmLabelReference Int |
38 | ||
39 | -- | Scheme has a lot of numbers. | |
40 | data SchemeNumber | |
41 | = ScmNumber | |
42 | | ScmComplexNumber Double Double | |
43 | | ScmRealNumber Double | |
44 | | ScmRationalNumber Rational | |
45 | | ScmInteger Integer | |
19 | 46 | deriving (Eq, Show) |
20 | 47 | |
21 |
|
|
48 | deriving instance Show (c (SchemeAtom c)) => Show (SchemeAtom c) | |
49 | deriving instance Eq (c (SchemeAtom c)) => Eq (SchemeAtom c) | |
50 | ||
51 | badSpec :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr)) | |
52 | badSpec = mkSpec (ScmIdent . T.pack <$> many1 (satisfy isAlphaNum)) undefined | |
53 | ||
54 | withQuasiQuote :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr)) | |
22 | 55 | -> SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr)) |
23 | 56 | withQuasiQuote spec = addReader '`' (fmap (go "quasiquote")) |
24 | 57 | $ addReader ',' unquote |
58 | $ addReader '\'' (fmap (go "quote")) | |
25 | 59 | $ spec |
26 | 60 | where go name s = name ::: s ::: Nil |
27 | unquote p = char '@' *> fmap (go "unquote-splicing") | |
28 | <|> fmap (go "unquote") | |
61 | unquote p = char '@' *> fmap (go "unquote-splicing") p | |
62 | <|> fmap (go "unquote") p | |
29 | 63 | |
30 |
octoReader :: Reader (S |
|
64 | octoReader :: Reader (SchemeAtom SExpr) | |
31 | 65 | octoReader pSexpr = |
32 | string "true" *> pure (ScmBool True) | |
33 | <|> string "false" *> pure (ScmBool False) | |
34 | <|> char 't' *> pure (ScmBool True) | |
35 | <|> char 'f' *> pure (ScmBool False) | |
36 | <|> char '\\' *> characterConstant | |
37 | <|> char '(' *> fmap ScmVec (vector pSexpr) | |
38 |
|
|
66 | string "true" *> pure (A (ScmBool True)) | |
67 | <|> string "false" *> pure (A (ScmBool False)) | |
68 | <|> char 't' *> pure (A (ScmBool True)) | |
69 | <|> char 'f' *> pure (A (ScmBool False)) | |
70 | <|> char '\\' *> fmap (A . ScmChar) characterConstant | |
71 | <|> char '(' *> fmap (A . ScmVec) (vector pSexpr) | |
72 | <|> string "u8(" *> fmap A bytevec | |
39 | 73 | <|> do n <- read <$> many1 digit |
40 | (char '#' *> ScmLabelReference n <|> | |
41 | char '=' *> fmap (ScmLabeledDatum n) pSexpr) | |
74 | (char '#' *> pure (A (ScmLabelReference n)) <|> | |
75 | char '=' *> fmap (A . ScmLabeledDatum n) pSexpr) | |
42 | 76 | |
43 | 77 | vector :: Parser (SExpr (SchemeAtom SExpr)) -> Parser [SExpr (SchemeAtom SExpr)] |
44 | vector pSexpr = | |
45 | (char ')' *> pure []) <|> ((:) <$> pSExpr <*> vector pSexpr) | |
78 | vector pSExpr = | |
79 | (char ')' *> pure []) <|> ((:) <$> pSExpr <*> vector pSExpr) | |
80 | ||
81 | bytevec :: Parser (SchemeAtom SExpr) | |
82 | bytevec = undefined | |
83 | ||
84 | characterConstant :: Parser Char | |
85 | characterConstant = namedCharacter | |
86 | <|> (chr . fromInteger <$> (char 'x' *> hexNumber)) | |
87 | <|> anyCharacter | |
88 | where namedCharacter = string "alarm" *> pure '\x07' | |
89 | <|> string "backspace" *> pure '\x08' | |
90 | <|> string "delete" *> pure '\x7f' | |
91 | <|> string "escape" *> pure '\x1b' | |
92 | <|> string "newline" *> pure '\x0a' | |
93 | <|> string "null" *> pure '\x00' | |
94 | <|> string "return" *> pure '\x0d' | |
95 | <|> string "space" *> pure ' ' | |
96 | <|> string "tab" *> pure '\x09' | |
97 | anyCharacter = anyToken | |
98 | ||
99 | r7rsNum :: Int -> Parser Int | |
100 | r7rsNum radix = prefix <*> complex | |
101 | where prefix = radix <*> exactness <|> exactness <*> radix | |
102 | complex = real | |
103 | <|> real <* char '@' <*> real | |
104 | <|> real <* char '+' <*> ureal <* char 'i' | |
105 | <|> real <* char '-' <*> ureal <* char 'i' | |
106 | <|> real <* char '+' <* char 'i' | |
107 | <|> real <* char '-' <* char 'i' | |
108 | <|> real <*> infnan <* char 'i' | |
109 | <|> char '+' *> ureal <* char 'i' | |
110 | <|> char '-' *> ureal <* char 'i' | |
111 | <|> infnan <* char 'i' | |
112 | <|> string "+i" | |
113 | <|> string "-i" | |
114 | real = ($) <$> sign <*> ureal | |
115 | <|> infnan |