| 1 |
{-# LANGUAGE OverloadedStrings #-}
|
| 2 |
|
| 3 |
module Data.SCargot.HaskLike ( -- $info
|
| 4 |
haskLikeSpec
|
| 5 |
, HaskLikeAtom(..)
|
| 6 |
) where
|
| 7 |
|
| 8 |
import Control.Applicative ((<$>), (<*>), (<$))
|
| 9 |
import Data.Maybe (catMaybes)
|
| 10 |
import Data.String (IsString(..))
|
| 11 |
import Data.Text (Text, pack)
|
| 12 |
import Text.Parsec
|
| 13 |
import Text.Parsec.Text (Parser)
|
| 14 |
|
| 15 |
import Prelude hiding (concatMap)
|
| 16 |
|
| 17 |
import Data.SCargot.Repr.Basic (SExpr)
|
| 18 |
import Data.SCargot.General (SExprSpec, mkSpec)
|
| 19 |
|
| 20 |
{- $info
|
| 21 |
|
| 22 |
This module is intended for simple, ad-hoc configuration or data formats
|
| 23 |
that might not need their on rich structure but might benefit from a few
|
| 24 |
various literal formats. the 'haskLikeSpec' understands identifiers as
|
| 25 |
defined by R6RS as well as string, integer, and floating-point literals
|
| 26 |
as defined by the Haskell spec, but won't get any Lisp-specific vector
|
| 27 |
literals or other structure.
|
| 28 |
|
| 29 |
-}
|
| 30 |
|
| 31 |
|
| 32 |
-- | An atom type that understands Haskell-like values as well as
|
| 33 |
-- Scheme-like identifiers.
|
| 34 |
data HaskLikeAtom
|
| 35 |
= HSIdent Text -- ^ An identifier, parsed according to the R6RS Scheme
|
| 36 |
-- standard
|
| 37 |
| HSString Text -- ^ A string, parsed according to the syntax for string
|
| 38 |
-- literals in the Haskell report
|
| 39 |
| HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to
|
| 40 |
-- the syntax for integer literals in the Haskell report
|
| 41 |
| HSFloat Double -- ^ A double-precision floating-point value, parsed
|
| 42 |
-- according to the syntax for floats in the Haskell
|
| 43 |
-- report
|
| 44 |
deriving (Eq, Show)
|
| 45 |
|
| 46 |
instance IsString HaskLikeAtom where
|
| 47 |
fromString = HSIdent . fromString
|
| 48 |
|
| 49 |
pToken :: Parser Text
|
| 50 |
pToken = pack <$> ( (:) <$> initial <*> many subsequent
|
| 51 |
<|> string "+"
|
| 52 |
<|> string "-"
|
| 53 |
<|> string "..."
|
| 54 |
)
|
| 55 |
|
| 56 |
initial :: Parser Char
|
| 57 |
initial = letter <|> oneOf "!$%&*/:<=>?^_~"
|
| 58 |
|
| 59 |
subsequent :: Parser Char
|
| 60 |
subsequent = initial <|> digit <|> oneOf "+-.@"
|
| 61 |
|
| 62 |
pString :: Parser Text
|
| 63 |
pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
|
| 64 |
where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
|
| 65 |
esc = do char '\\'
|
| 66 |
Nothing <$ (gap <|> char '&') <|>
|
| 67 |
Just <$> code
|
| 68 |
gap = many1 space >> char '\\'
|
| 69 |
code = eEsc <|> eNum <|> eCtrl <|> eAscii
|
| 70 |
eCtrl = char '^' >> unCtrl <$> upper
|
| 71 |
eNum = (toEnum . fromInteger) <$>
|
| 72 |
(decimal <|> (char 'o' >> number 8 octDigit)
|
| 73 |
<|> (char 'x' >> number 16 hexDigit))
|
| 74 |
eEsc = choice [ char a >> return b | (a, b) <- escMap ]
|
| 75 |
eAscii = choice [ try (string a >> return b)
|
| 76 |
| (a, b) <- asciiMap ]
|
| 77 |
unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1)
|
| 78 |
|
| 79 |
escMap :: [(Char, Char)]
|
| 80 |
escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
|
| 81 |
|
| 82 |
asciiMap :: [(String, Char)]
|
| 83 |
asciiMap = zip
|
| 84 |
["BS","HT","LF","VT","FF","CR","SO","SI","EM"
|
| 85 |
,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX"
|
| 86 |
,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3"
|
| 87 |
,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"]
|
| 88 |
("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++
|
| 89 |
"\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
|
| 90 |
"\SYN\ETB\CAN\SUB\ESC\DEL")
|
| 91 |
|
| 92 |
decimal :: Parser Integer
|
| 93 |
decimal = number 10 digit
|
| 94 |
|
| 95 |
number :: Integer -> Parser Char -> Parser Integer
|
| 96 |
number base digits = foldl go 0 <$> many1 digits
|
| 97 |
where go x d = base * x + toInteger (value d)
|
| 98 |
value c
|
| 99 |
| c == 'a' || c == 'A' = 0xa
|
| 100 |
| c == 'b' || c == 'B' = 0xb
|
| 101 |
| c == 'c' || c == 'C' = 0xc
|
| 102 |
| c == 'd' || c == 'D' = 0xd
|
| 103 |
| c == 'e' || c == 'E' = 0xe
|
| 104 |
| c == 'f' || c == 'F' = 0xf
|
| 105 |
| c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
|
| 106 |
| otherwise = error ("Unknown letter in number: " ++ show c)
|
| 107 |
|
| 108 |
pFloat :: Parser Double
|
| 109 |
pFloat = fail "???"
|
| 110 |
|
| 111 |
pInt :: Parser Integer
|
| 112 |
pInt = do
|
| 113 |
s <- negate <$ char '-' <|> id <$ char '+' <|> return id
|
| 114 |
n <- pZeroNum <|> decimal
|
| 115 |
return (s n)
|
| 116 |
|
| 117 |
pZeroNum :: Parser Integer
|
| 118 |
pZeroNum = char '0' >>
|
| 119 |
( (oneOf "xX" >> number 16 hexDigit)
|
| 120 |
<|> (oneOf "oO" >> number 8 octDigit)
|
| 121 |
<|> decimal
|
| 122 |
<|> return 0
|
| 123 |
)
|
| 124 |
|
| 125 |
pHaskLikeAtom :: Parser HaskLikeAtom
|
| 126 |
pHaskLikeAtom =
|
| 127 |
HSInt <$> (try pInt <?> "integer")
|
| 128 |
<|> HSFloat <$> (try pFloat <?> "float")
|
| 129 |
<|> HSString <$> (pString <?> "string literal")
|
| 130 |
<|> HSIdent <$> (pToken <?> "token")
|
| 131 |
|
| 132 |
sHaskLikeAtom :: HaskLikeAtom -> Text
|
| 133 |
sHaskLikeAtom (HSIdent t) = t
|
| 134 |
sHaskLikeAtom (HSString s) = pack (show s)
|
| 135 |
sHaskLikeAtom (HSInt i) = pack (show i)
|
| 136 |
sHaskLikeAtom (HSFloat f) = pack (show f)
|
| 137 |
|
| 138 |
-- | This `SExprSpec` understands s-expressions that contain
|
| 139 |
-- Scheme-like tokens, as well as string literals, integer
|
| 140 |
-- literals, and floating-point literals. These are read
|
| 141 |
-- and shown with Haskell lexical syntax, so the same set
|
| 142 |
-- of values understood by GHC should be understood by this
|
| 143 |
-- spec as well. This includes string escapes, different
|
| 144 |
-- number bases, and so forth.
|
| 145 |
haskLikeSpec :: SExprSpec HaskLikeAtom (SExpr HaskLikeAtom)
|
| 146 |
haskLikeSpec = mkSpec pHaskLikeAtom sHaskLikeAtom
|