1 | |
{-# LANGUAGE OverloadedStrings #-}
|
| 1 |
{-# LANGUAGE TupleSections #-}
|
| 2 |
{-# LANGUAGE LambdaCase #-}
|
2 | 3 |
|
3 | |
module Data.NDBL.Parse (pNDBL) where
|
| 4 |
module Data.NDBL.Parse (Document, Group, Pair, pNDBL) where
|
4 | 5 |
|
5 | |
import Control.Applicative
|
6 | |
import Data.Attoparsec.Text
|
7 | |
import Data.Char (isSpace)
|
8 | |
import Data.Text (Text)
|
9 | |
import qualified Data.Text as T
|
10 | |
import Data.Text.Encoding (decodeUtf8)
|
11 | |
import Data.Word (Word8)
|
12 | |
import Prelude hiding (takeWhile)
|
| 6 |
type Document = [Group]
|
| 7 |
type Group = [Pair]
|
| 8 |
type Pair = (String,String)
|
13 | 9 |
|
14 | |
type Pair = (Text, Text)
|
| 10 |
type Result a = Either String (String, a)
|
| 11 |
type Parse a = String -> Result a
|
| 12 |
|
| 13 |
throw = Left
|
| 14 |
|
| 15 |
over :: (a -> b) -> Result a -> Result b
|
| 16 |
over _ (Left err) = Left err
|
| 17 |
over f (Right (s, x)) = Right (s, f x)
|
| 18 |
|
| 19 |
bind :: Result a -> ((String, a) -> Result b) -> Result b
|
| 20 |
bind (Left err) _ = Left err
|
| 21 |
bind (Right a) f = f a
|
15 | 22 |
|
16 | 23 |
isSep :: Char -> Bool
|
17 | |
isSep ' ' = True
|
18 | |
isSep '\t' = True
|
19 | |
isSep '\r' = True
|
20 | |
isSep '\n' = True
|
21 | |
isSep '=' = True
|
22 | |
isSep _ = False
|
| 24 |
isSep c = c `elem` " \t\r\n="
|
23 | 25 |
|
24 | |
-- | Skips any horizontal (non-new-line) spaces
|
25 | |
hSpace :: Parser ()
|
26 | |
hSpace = skipWhile isHorizontalSpace
|
| 26 |
pQString :: Parse String
|
| 27 |
pQString = go
|
| 28 |
where go ('\\':x:xs) = (x:) `over` go xs
|
| 29 |
go ('"':xs) = return (xs, "")
|
| 30 |
go (x:xs) = (x:) `over` go xs
|
27 | 31 |
|
28 | |
-- | Parses a quotedd string
|
29 | |
pQString :: Parser Text
|
30 | |
pQString = string "\"" *> (T.pack <$> sBody)
|
31 | |
where sBody = do
|
32 | |
c <- anyChar
|
33 | |
case c of
|
34 | |
'\\' -> (:) <$> anyChar <*> sBody
|
35 | |
'"' -> return []
|
36 | |
_ -> (c:) <$> sBody
|
| 32 |
pWord :: Parse String
|
| 33 |
pWord s@(x:xs)
|
| 34 |
| not (isSep x) = (x:) `over` pWord xs
|
| 35 |
pWord s = return (s, "")
|
37 | 36 |
|
38 | |
-- | Parse a word of any length
|
39 | |
pWord :: Parser Text
|
40 | |
pWord = takeWhile (not . isSep)
|
| 37 |
pWord1 :: Parse String
|
| 38 |
pWord1 (x:xs)
|
| 39 |
| not (isSep x) = (x:) `over` pWord xs
|
| 40 |
pWord1 s = throw $ "Expected word; found " ++ show s
|
41 | 41 |
|
42 | |
-- | Parse a word of at least length one
|
43 | |
pWord1 :: Parser Text
|
44 | |
pWord1 = takeWhile1 (not . isSep)
|
45 | 42 |
|
46 | |
-- | Parse a key-value pair
|
47 | |
pPair :: Parser Pair
|
48 | |
pPair = (,) <$> pWord1 <*. "=" <*> (pQString <|> pWord)
|
| 43 |
pPair :: Parse (String, String)
|
| 44 |
pPair s = bind (pWord1 s) $ \case
|
| 45 |
('=':'"':s', r) -> (r,) `over` pQString s'
|
| 46 |
('=':s', r) -> (r,) `over` pWord s'
|
| 47 |
_ -> throw "Expected '=' after pair name"
|
49 | 48 |
|
50 | |
-- | Directions passed back from the skipping parsers---whether
|
51 | |
-- to continue parsing the current block, start a new block,
|
52 | |
-- or stop entirely.
|
53 | |
data NewBlock
|
54 | |
= StartNewBlock
|
55 | |
| ContinueBlock
|
56 | |
| StopParsing
|
57 | |
deriving (Eq,Show)
|
| 49 |
isHSpace :: Char -> Bool
|
| 50 |
isHSpace c = c == ' ' || c == '\t'
|
58 | 51 |
|
59 | |
comment :: Parser ()
|
60 | |
comment = char '#' >> skipWhile (not . (== '\n'))
|
| 52 |
isVSpace :: Char -> Bool
|
| 53 |
isVSpace c = c == '\n' || c == '\r'
|
61 | 54 |
|
62 | |
-- | Skips to the next pair (or end of input) and returns whether to parse
|
63 | |
-- the next pair as part of the same group, the next group, or whether
|
64 | |
-- it's reached the end.
|
65 | |
sSkipToNext :: Bool -> Parser NewBlock
|
66 | |
sSkipToNext False = hSpace >> peekChar >>= go
|
67 | |
where go (Just '\n') = anyChar >> sSkipToNext True
|
68 | |
go (Just '\r') = anyChar >> sSkipToNext True
|
69 | |
go (Just '#') = comment >> sSkipToNext False
|
70 | |
go (Just _) = return ContinueBlock
|
71 | |
go Nothing = return StopParsing
|
72 | |
sSkipToNext True = peekChar >>= go
|
73 | |
where go (Just c) | isSpace c = anyChar >> sSkipToNext False
|
74 | |
| otherwise = return StartNewBlock
|
75 | |
go Nothing = return StopParsing
|
| 55 |
pSkip :: Parse Bool
|
| 56 |
pSkip (y:[]) = return ("", False)
|
| 57 |
pSkip (y:s@(x:xs))
|
| 58 |
| isVSpace y && isHSpace x = pSkip xs
|
| 59 |
| isVSpace y = return (s, False)
|
| 60 |
pSkip s@(x:xs)
|
| 61 |
| isHSpace x = pSkip xs
|
| 62 |
| otherwise = return (s, True)
|
76 | 63 |
|
77 | |
pBlock :: Parser (NewBlock, [Pair])
|
78 | |
pBlock = do
|
79 | |
p <- pPair
|
80 | |
b <- sSkipToNext False
|
81 | |
case b of
|
82 | |
StartNewBlock -> return (StartNewBlock, [p])
|
83 | |
StopParsing -> return (StopParsing, [p])
|
84 | |
ContinueBlock -> do
|
85 | |
(b, ps) <- pBlock
|
86 | |
return (b, p:ps)
|
87 | 64 |
|
88 | |
pBlocks :: Parser [[Pair]]
|
89 | |
pBlocks = do result <- pBlock
|
90 | |
go result
|
91 | |
where go (StartNewBlock, ps) = (ps:) <$> pBlocks
|
92 | |
go (StopParsing, ps) = return [ps]
|
93 | |
go _ = error "unreachable"
|
| 65 |
pGroup :: Parse [(String, String)]
|
| 66 |
pGroup s = bind (pPair s) $ \case
|
| 67 |
(s', p) -> bind (pSkip s') $ \case
|
| 68 |
(s'', True) -> (p:) `over` pGroup s''
|
| 69 |
(s'', False) -> return (s'', [p])
|
94 | 70 |
|
95 | |
pNDBL :: Text -> Either String [[Pair]]
|
96 | |
pNDBL t = parseOnly pBlocks t
|
| 71 |
pDocument :: Parse Document
|
| 72 |
pDocument s = bind (pGroup s) $ \case
|
| 73 |
("", g) -> return ("", [g])
|
| 74 |
(xs, g) -> (g:) `over` pDocument xs
|
| 75 |
|
| 76 |
pNDBL :: String -> Either String Document
|
| 77 |
pNDBL s = case pDocument s of
|
| 78 |
Right (_, d) -> Right d
|
| 79 |
Left err -> Left err
|