gdritter repos ndbl / 9c1bc6a
Initial commit into git repo Getty Ritter 10 years ago
6 changed file(s) with 390 addition(s) and 0 deletion(s). Collapse all Expand all
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Data.NDBL.Parse (pNDBL) where
4
5 import Control.Applicative
6 import Data.Attoparsec.Text
7 import Data.Char (isSpace)
8 import Data.Text (Text)
9 import qualified Data.Text as T
10 import Data.Text.Encoding (decodeUtf8)
11 import Data.Word (Word8)
12 import Prelude hiding (takeWhile)
13
14 type Pair = (Text, Text)
15
16
17
18 isSep :: Char -> Bool
19 isSep ' ' = True
20 isSep '\t' = True
21 isSep '\r' = True
22 isSep '\n' = True
23 isSep '=' = True
24 isSep _ = False
25
26 hSpace :: Parser ()
27 hSpace = skipWhile isHorizontalSpace
28
29 pString :: Parser Text
30 pString = string "\"" *> (T.pack <$> sBody)
31 where sBody = do
32 c <- anyChar
33 case c of
34 '\\' -> (:) <$> anyChar <*> sBody
35 '"' -> return []
36 _ -> (c:) <$> sBody
37
38 pWord :: Parser Text
39 pWord = takeWhile (not . isSep)
40
41 pWord1 :: Parser Text
42 pWord1 = takeWhile1 (not . isSep)
43
44 pPair :: Parser Pair
45 pPair = (,) <$> pWord1 <*. "=" <*> (pString <|> pWord)
46
47 data NewBlock
48 = StartNewBlock
49 | ContinueBlock
50 | StopParsing
51 deriving (Eq,Show)
52
53 sNextMSet :: Parser NewBlock
54 sNextMSet = do
55 hSpace
56 n <- peekChar
57 go n
58 where go (Just '\n') = do
59 _ <- anyChar
60 n <- peekChar
61 go' n
62 go (Just '#') = do
63 skipWhile (not . (== '\n'))
64 sNextMSet
65 go (Just _) = return ContinueBlock
66 go Nothing = return StopParsing
67 go' (Just '\n') = do
68 _ <- anyChar
69 n <- peekChar
70 go' n
71 go' (Just '#') = do
72 skipWhile (not . (== '\n'))
73 n <- peekChar
74 go' n
75 go' (Just c) | isSpace c = sNextMSet
76 | otherwise = return StartNewBlock
77 go' Nothing = return StopParsing
78
79 pBlock :: Parser (NewBlock, [Pair])
80 pBlock = do
81 p <- pPair
82 b <- sNextMSet
83 case b of
84 StartNewBlock -> return (StartNewBlock, [p])
85 StopParsing -> return (StopParsing, [p])
86 ContinueBlock -> do
87 (b, ps) <- pBlock
88 return (b, p:ps)
89
90 pBlocks :: Parser [[Pair]]
91 pBlocks = do result <- pBlock
92 go result
93 where go (StartNewBlock, ps) = (ps:) <$> pBlocks
94 go (StopParsing, ps) = return [ps]
95 go _ = error "unreachable"
96
97 pNDBL :: Text -> Either String [[Pair]]
98 pNDBL t = parseOnly pBlocks t
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Data.NDBL.Print where
4
5 import Data.Char (isSpace)
6 import Data.Text (Text,unpack,pack,concatMap,singleton,any)
7 import Text.PrettyPrint
8 import Prelude hiding (concatMap,any)
9
10 pretty :: [[(Text, Text)]] -> Text
11 pretty = pack . render . dNDBL
12
13 dNDBL :: [[(Text, Text)]] -> Doc
14 dNDBL = vcat . map (vcat . indentTail . map dPair)
15
16 indentTail :: [Doc] -> [Doc]
17 indentTail (x:xs) = x:map (nest 2) xs
18 indentTail xs = xs
19
20 dPair :: (Text, Text) -> Doc
21 dPair (k,v) = text (unpack k) <> equals <> dVal v
22
23 dVal :: Text -> Doc
24 dVal t
25 | any isSpace t = doubleQuotes $ text $ unpack $ concatMap go t
26 | otherwise = text (unpack t)
27 where go '\\' = "\\\\"
28 go '"' = "\\\""
29 go c = singleton c
1 -- | NDBL is a simple configuration format designed after the @ndb(6)@ set of
2 -- utilities used in the Plan 9 operating system. An NDBL file is a sequence
3 -- of groups, where each group is a multiset of key-value pairs. This will
4 -- cover the basics of NDBL; for a more in-depth explanation, consule
5 -- [insert url here].
6 --
7 -- Grouping in NDBL is done by
8 -- indentation: a new group is started by listing a key-value pair without
9 -- indentation, while indentation (of any amount) means that the key-value
10 -- pairs on that line belong to the previous group. A key-value pair consists
11 -- of any non-space identifiers, followed by an '=', followed by another
12 -- sequence of non-space identifiers. For example, the following parses to
13 -- @[[(\"user\",\"alice\"),(\"city\",\"paris\")]]@:
14 --
15 -- @
16 -- user=alice
17 -- city=paris
18 -- @
19 --
20 -- Whereas this one parses to @[[(\"user\",\"alice\")],[(\"city\",\"paris\")]]@:
21 --
22 -- @
23 -- user=alice
24 -- city=paris
25 -- @
26 --
27 -- Empty values are allowed, but not empty keys, so the following is valid and
28 -- parses as @[[(\"database\",\"\"),(\"file\",\"file1.txt\"),(\"file\",\"file2.txt\")]]@
29 --
30 -- @
31 -- database=
32 -- file=file1.txt
33 -- file=file2.txt
34 -- @
35
36 module Data.NDBL ( -- * Convenience Types
37 NDBL
38 , Pair
39 -- * MultiMap Representation
40 -- $mm
41 , decode
42 , encode
43 -- * List-Of-List Representation
44 -- $ll
45 , decodeList
46 , encodeList
47 -- * Flat List Representation
48 -- $fl
49 , decodeFlat
50 , encodeFlat
51 ) where
52
53 import Data.MultiMap (MultiMap)
54 import qualified Data.MultiMap as M
55 import Data.Text (Text)
56
57 import Data.NDBL.Parse
58 import Data.NDBL.Print
59
60 type NDBL = [MultiMap Text Text]
61 type Pair = (Text, Text)
62
63 -- $mm
64 -- The most convenient way of parsing an NDBL file is as a
65 -- multimap. In this case, the set of groups is given in-order
66 -- as a list, and each individual group is represented as a
67 -- multimap with text keys and values.
68 --
69 -- This does mean that the empty string is a possible value for the multimap,
70 -- although all the keys will be at least one character long.
71
72 decode :: Text -> Maybe NDBL
73 decode t = case pNDBL t of
74 Right r -> Just (map M.fromList r)
75 Left _ -> Nothing
76
77 encode :: NDBL -> Text
78 encode = pretty . map M.toList
79
80 -- $ll
81 -- Not every application wants to bring in a dependency on the
82 -- multimap package, so functions that deal with lists of lists
83 -- of tuples are also provided.
84
85 decodeList :: Text -> Maybe [[Pair]]
86 decodeList t = case pNDBL t of
87 Right r -> Just r
88 Left _ -> Nothing
89
90 encodeList :: [[Pair]] -> Text
91 encodeList = pretty
92
93 -- $fl
94 -- Often a config file doesn't need grouping, so these are offered
95 -- for pure utility.
96
97 decodeFlat :: Text -> Maybe [Pair]
98 decodeFlat t = case pNDBL t of
99 Right r -> Just $ concat r
100 Left _ -> Nothing
101
102 encodeFlat :: [Pair] -> Text
103 encodeFlat = pretty . map (:[])
1 NDBL
2 ====
3
4 NDBL is a basic config file format based roughly on the config format of
5 Plan 9's [ndb](http://man.cat-v.org/plan_b/6/ndb).
6
7 Short Example
8 -------------
9
10 # A short exemplary snippet of NDBL
11 fileset=
12 file=/home/gdritter/english.txt
13 file=/home/gdritter/french.txt
14 file=/home/gdritter/italian.txt
15 default=eng
16
17 remote=main
18 ip=192.168.1.300
19 user=guest
20 key=public.pem
21
22 Rationale
23 ---------
24
25 NDBL is a very straighforward configuration format, which means it's not
26 always appropriate for situations in which large amounts of data are
27 needed. It cannot represent arbitrary hierarchical structures like
28 JSON, nor does it have the wealth of data types that YAML does. However,
29 it is a very simple format to both produce and implement (both by
30 hand and in code) and its simplicity is one of its major virtues. Other
31 options in this space, but with more structure and extant tooling,
32 include [YAML](http://en.wikipedia.org/wiki/YAML) and
33 [TOML](https://github.com/mojombo/toml), both of which are significantly
34 more complicated than NDBL.
35
36 Structure of an NDBL Document
37 -----------------------------
38
39 All NDBL documents consist of a sequence of multisets of key-value pairs.
40 All data is represented as text; it is the responsibility of the library
41 user to parse numeric data, boolean data, &c, during use.
42
43 A _comment_ is introduced by any whitespace (including newlines)
44 followed by a pound sign (`#`) and lasts until the end of a line. This
45 means that a key cannot begin with the `#` character, but that a `#`
46 character can occur as a constitutent of a key-value pair.
47
48 A _key-value pair_ consists of a string of at least length one, followed
49 by a equals sign (`=`) and subsequently by a string of at least zero.
50 The value may be quoted, in which case it is allowed to contain any
51 printable character, including the equals sign, whitespace, and newlines.
52 An unquoted value is allowed to contain any non-whitespace character
53 except the equals sign. The value can be zero length. No spaces are
54 allowed around the equals sign.
55
56 A _group_ is a multiset of key-value pairs. A group is introduced by a
57 non-indented key-value pair; all subsequent key-value pairs on the same
58 line, as well as any key-value pairs on subsequent indented lines, belong
59 to the same group.
60
61 A _document_ is a sequence of groups.
62
63 Examples With Explanation
64 -------------------------
65
66 In the examples below, I will use `{ key: value }` as shorthand to represent a
67 multiset.
68
69 host=machine1
70 host=machine2
71
72 This parses to the following structure:
73
74 [{host:machine1},{host:machine2}]
75
76 Adding indentation merges the two multisets, as the second line is now
77 considered a 'cotinuation' of the first group.
78
79 host=machine1
80 host=machine2
81
82 This becomes
83
84 [{host:machine1,host:machine2}]
85
86 A third non-indented line will then start a new group:
87
88 host=machine1
89 host=machine2
90 host=machine3
91
92 This becomes
93
94 [{host:machine1,host:machine2},{host:machine3}]
95
96 Empty values are permitted, and can be used as a tag of sorts.
97
98 database=
99 file=file1.txt
100 file=file2.txt
101 file=file3.txt
102
103 This becomes
104
105 [{database:},{file:file1.txt},{file:file2.txt},{file:file3.txt}]
106
107 Comments are allowed but must come after a whitespace character, which
108 means that the following document contains no comments:
109
110 key=value#hello
111
112 This becomes
113
114 [{key:value#hello}]
115
116 But this document does contain a comment:
117
118 key=value #hello
119
120 This becomes
121
122 [{key:value}]
123
124 Comments can begin a line, as well.
125
126 # WARNING: do not change
127 host=hg-remote
128 portforwarding=
129 hostname=hunter-gratzner.example.com
130 port=22
131 user=abu-al-walid
132 nicename="H-G Remote Server"
1 import Distribution.Simple
2 main = defaultMain
1 -- Initial ndb-like.cabal generated by cabal init. For further
2 -- documentation, see http://haskell.org/cabal/users-guide/
3
4 name: ndbl
5 version: 0.1.0.0
6 synopsis: A simple interface to the NDBL config format.
7 description: A simplistic interface to a config file format
8 meant to resemble the configuration file of
9 Plan 9's ndb(6) and associated utilities.
10 license: BSD3
11 author: Getty Ritter
12 maintainer: gettylefou@gmail.com
13 copyright: (c) 2014 Getty Ritter
14 category: Data
15 build-type: Simple
16 cabal-version: >=1.10
17
18 library
19 exposed-modules: Data.NDBL
20 other-modules: Data.NDBL.Parse, Data.NDBL.Print
21 build-depends: base >=4.6 && <4.7,
22 multimap,
23 attoparsec,
24 text,
25 pretty
26 default-language: Haskell2010