gdritter repos s-cargot / master Data / SCargot / Language / HaskLike.hs
master

Tree @master (Download .tar.gz)

HaskLike.hs @master

c01b118
5eb10bd
 
ed1b3db
 
 
 
 
6af98a2
 
07f31ac
 
 
 
ed1b3db
5eb10bd
14c258a
 
 
5eb10bd
 
 
 
 
 
 
 
2b126f2
5eb10bd
ed1b3db
5eb10bd
 
 
6036b46
 
 
 
 
 
 
5eb10bd
 
 
 
 
 
 
2b126f2
5eb10bd
 
 
 
 
 
 
 
 
 
 
 
 
c01b118
 
 
6036b46
 
07f31ac
 
5eb10bd
14c258a
5eb10bd
 
 
 
 
 
2b126f2
 
5eb10bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6036b46
 
07f31ac
 
2b126f2
6f8a584
 
14c258a
2b126f2
14c258a
6f8a584
 
14c258a
6f8a584
14c258a
 
6f8a584
2b126f2
6f8a584
 
 
 
 
 
 
5eb10bd
6036b46
 
07f31ac
 
6f8a584
2b126f2
6f8a584
5eb10bd
 
 
2b126f2
 
 
5eb10bd
 
 
 
6f8a584
07f31ac
 
 
2b126f2
5eb10bd
 
 
 
 
 
 
ed1b3db
5eb10bd
325d34e
 
 
 
 
 
ed1b3db
 
 
 
 
 
cb4adc5
 
 
 
 
 
 
 
 
 
ed1b3db
 
 
 
 
 
 
 
 
 
6af98a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}

module Data.SCargot.Language.HaskLike
  ( -- $info
    HaskLikeAtom(..)
  , haskLikeParser
  , haskLikePrinter
  , locatedHaskLikeParser
  , locatedHaskLikePrinter
    -- * Individual Parsers
  , parseHaskellString
  , parseHaskellFloat
  , parseHaskellInt
  ) where

#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<$))
#endif
import           Data.Maybe (catMaybes)
import           Data.String (IsString(..))
import           Data.Text (Text, pack)
import           Text.Parsec
import           Text.Parsec.Text (Parser)

import           Prelude hiding (concatMap)

import Data.SCargot.Common
import Data.SCargot.Repr.Basic (SExpr)
import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint)

{- $info

This module is intended for simple, ad-hoc configuration or data
formats that might not need their on rich structure but might benefit
from a few various kinds of literals. The 'haskLikeParser' understands
identifiers as defined by R5RS, as well as string, integer, and
floating-point literals as defined by the Haskell 2010 spec. It does
__not__ natively understand other data types, such as booleans,
vectors, bitstrings.

-}


-- | An atom type that understands Haskell-like values as well as
--   Scheme-like identifiers.
data HaskLikeAtom
  = HSIdent  Text  -- ^ An identifier, parsed according to the R5RS Scheme
                   --   standard
  | HSString Text  -- ^ A string, parsed according to the syntax for string
                   --   literals in the Haskell report
  | HSInt Integer  -- ^ An arbitrary-sized integer value, parsed according to
                   --   the syntax for integer literals in the Haskell report
  | HSFloat Double -- ^ A double-precision floating-point value, parsed
                   --   according to the syntax for floats in the Haskell
                   --   report
    deriving (Eq, Show)

instance IsString HaskLikeAtom where
  fromString = HSIdent . fromString

instance IsString (Located HaskLikeAtom) where
  fromString = (At dLocation) . HSIdent . fromString

-- | Parse a Haskell string literal as defined by the Haskell 2010
-- language specification.
parseHaskellString :: Parser Text
parseHaskellString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
  where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
        esc = do _ <- char '\\'
                 Nothing <$ (gap <|> char '&') <|>
                   Just <$> code
        gap  = many1 space >> char '\\'
        code = eEsc <|> eNum <|> eCtrl <|> eAscii
        eCtrl  = char '^' >> unCtrl <$> upper
        eNum   = (toEnum . fromInteger) <$>
                   (decNumber <|> (char 'o' >> octNumber)
                              <|> (char 'x' >> hexNumber))
        eEsc   = choice [ char a >> return b | (a, b) <- escMap ]
        eAscii = choice [ try (string a >> return b)
                        | (a, b) <- asciiMap ]
        unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1)

escMap :: [(Char,  Char)]
escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"

asciiMap :: [(String, Char)]
asciiMap = zip
  ["BS","HT","LF","VT","FF","CR","SO","SI","EM"
  ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX"
  ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3"
  ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"]
  ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++
   "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
   "\SYN\ETB\CAN\SUB\ESC\DEL")

-- | Parse a Haskell floating-point number as defined by the Haskell
-- 2010 language specification.
parseHaskellFloat :: Parser Double
parseHaskellFloat = do
  n <- decNumber
  withDot n <|> noDot n
  where withDot n = do
          _ <- char '.'
          m <- decNumber
          e <- option 1.0 expn
          return ((fromIntegral n + asDec m 0) * e)
        noDot n = do
          e <- expn
          return (fromIntegral n * e)
        expn = do
          _ <- oneOf "eE"
          s <- power
          x <- decNumber
          return (10 ** s (fromIntegral x))
        asDec 0 k = k
        asDec n k =
          asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1)

power :: Num a => Parser (a -> a)
power = negate <$ char '-' <|> id <$ char '+' <|> return id

-- | Parse a Haskell integer literal as defined by the Haskell 2010
-- language specification.
parseHaskellInt :: Parser Integer
parseHaskellInt = do
  s <- power
  n <- pZeroNum <|> decNumber
  return (fromIntegral (s n))

pZeroNum :: Parser Integer
pZeroNum = char '0' >>
  (  (oneOf "xX" >> hexNumber)
 <|> (oneOf "oO" >> octNumber)
 <|> decNumber
 <|> return 0
  )

pHaskLikeAtom :: Parser HaskLikeAtom
pHaskLikeAtom
   =  HSFloat   <$> (try parseHaskellFloat <?> "float")
  <|> HSInt     <$> (try parseHaskellInt   <?> "integer")
  <|> HSString  <$> (parseHaskellString    <?> "string literal")
  <|> HSIdent   <$> (parseR5RSIdent <?> "token")

sHaskLikeAtom :: HaskLikeAtom -> Text
sHaskLikeAtom (HSIdent t)  = t
sHaskLikeAtom (HSString s) = pack (show s)
sHaskLikeAtom (HSInt i)    = pack (show i)
sHaskLikeAtom (HSFloat f)  = pack (show f)

-- | This `SExprParser` understands s-expressions that contain
--   Scheme-like tokens, as well as string literals, integer
--   literals, and floating-point literals. Each of these values
--   is parsed according to the lexical rules in the Haskell
--   report, so the same set of string escapes, numeric bases,
--   and floating-point options are available. This spec does
--   not parse comments and does not understand any reader
--   macros.
--
-- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")"
-- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)]
haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom)
haskLikeParser = mkParser pHaskLikeAtom

-- | A 'haskLikeParser' which produces 'Located' values
--
-- >>> decode locatedHaskLikeParser $ pack "(0x01 \"\\x65lephant\")"
-- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 6)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 7) (line 1, column 20)) (HSString "elephant"))) SNil)]
--
-- >>> decode locatedHaskLikeParser $ pack "(1 elephant)"
-- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)]
locatedHaskLikeParser :: SExprParser (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom))
locatedHaskLikeParser = mkParser $ located pHaskLikeAtom

-- | This 'SExprPrinter' emits s-expressions that contain Scheme-like
--   tokens as well as string literals, integer literals, and floating-point
--   literals, which will be emitted as the literals produced by Haskell's
--   'show' function. This printer will produce a flat s-expression with
--   no indentation of any kind.
--
-- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]]
-- "(1 \"elephant\")"
haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom)
haskLikePrinter = flatPrint sHaskLikeAtom

-- | Ignore location tags when packing values into text
sLocatedHasklikeAtom :: Located HaskLikeAtom -> Text
sLocatedHasklikeAtom (At _loc e) = sHaskLikeAtom e

-- | A 'SExprPrinter' for 'Located' values. Works exactly like 'haskLikePrinter'
--   It ignores the location tags when printing the result.
--
-- >>> let (Right dec) = decode locatedHaskLikeParser $ pack "(1 elephant)"
-- [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)]
--
-- >>> encode locatedHaskLikePrinter dec
-- "(1 elephant)"
locatedHaskLikePrinter :: SExprPrinter (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom))
locatedHaskLikePrinter = flatPrint sLocatedHasklikeAtom