Initial commit into git repo
Getty Ritter
10 years ago
1 | {-# LANGUAGE OverloadedStrings #-} | |
2 | ||
3 | module Data.NDBL.Parse (pNDBL) where | |
4 | ||
5 | import Control.Applicative | |
6 | import Data.Attoparsec.Text | |
7 | import Data.Char (isSpace) | |
8 | import Data.Text (Text) | |
9 | import qualified Data.Text as T | |
10 | import Data.Text.Encoding (decodeUtf8) | |
11 | import Data.Word (Word8) | |
12 | import Prelude hiding (takeWhile) | |
13 | ||
14 | type Pair = (Text, Text) | |
15 | ||
16 | ||
17 | ||
18 | isSep :: Char -> Bool | |
19 | isSep ' ' = True | |
20 | isSep '\t' = True | |
21 | isSep '\r' = True | |
22 | isSep '\n' = True | |
23 | isSep '=' = True | |
24 | isSep _ = False | |
25 | ||
26 | hSpace :: Parser () | |
27 | hSpace = skipWhile isHorizontalSpace | |
28 | ||
29 | pString :: Parser Text | |
30 | pString = string "\"" *> (T.pack <$> sBody) | |
31 | where sBody = do | |
32 | c <- anyChar | |
33 | case c of | |
34 | '\\' -> (:) <$> anyChar <*> sBody | |
35 | '"' -> return [] | |
36 | _ -> (c:) <$> sBody | |
37 | ||
38 | pWord :: Parser Text | |
39 | pWord = takeWhile (not . isSep) | |
40 | ||
41 | pWord1 :: Parser Text | |
42 | pWord1 = takeWhile1 (not . isSep) | |
43 | ||
44 | pPair :: Parser Pair | |
45 | pPair = (,) <$> pWord1 <*. "=" <*> (pString <|> pWord) | |
46 | ||
47 | data NewBlock | |
48 | = StartNewBlock | |
49 | | ContinueBlock | |
50 | | StopParsing | |
51 | deriving (Eq,Show) | |
52 | ||
53 | sNextMSet :: Parser NewBlock | |
54 | sNextMSet = do | |
55 | hSpace | |
56 | n <- peekChar | |
57 | go n | |
58 | where go (Just '\n') = do | |
59 | _ <- anyChar | |
60 | n <- peekChar | |
61 | go' n | |
62 | go (Just '#') = do | |
63 | skipWhile (not . (== '\n')) | |
64 | sNextMSet | |
65 | go (Just _) = return ContinueBlock | |
66 | go Nothing = return StopParsing | |
67 | go' (Just '\n') = do | |
68 | _ <- anyChar | |
69 | n <- peekChar | |
70 | go' n | |
71 | go' (Just '#') = do | |
72 | skipWhile (not . (== '\n')) | |
73 | n <- peekChar | |
74 | go' n | |
75 | go' (Just c) | isSpace c = sNextMSet | |
76 | | otherwise = return StartNewBlock | |
77 | go' Nothing = return StopParsing | |
78 | ||
79 | pBlock :: Parser (NewBlock, [Pair]) | |
80 | pBlock = do | |
81 | p <- pPair | |
82 | b <- sNextMSet | |
83 | case b of | |
84 | StartNewBlock -> return (StartNewBlock, [p]) | |
85 | StopParsing -> return (StopParsing, [p]) | |
86 | ContinueBlock -> do | |
87 | (b, ps) <- pBlock | |
88 | return (b, p:ps) | |
89 | ||
90 | pBlocks :: Parser [[Pair]] | |
91 | pBlocks = do result <- pBlock | |
92 | go result | |
93 | where go (StartNewBlock, ps) = (ps:) <$> pBlocks | |
94 | go (StopParsing, ps) = return [ps] | |
95 | go _ = error "unreachable" | |
96 | ||
97 | pNDBL :: Text -> Either String [[Pair]] | |
98 | pNDBL t = parseOnly pBlocks t |
1 | {-# LANGUAGE OverloadedStrings #-} | |
2 | ||
3 | module Data.NDBL.Print where | |
4 | ||
5 | import Data.Char (isSpace) | |
6 | import Data.Text (Text,unpack,pack,concatMap,singleton,any) | |
7 | import Text.PrettyPrint | |
8 | import Prelude hiding (concatMap,any) | |
9 | ||
10 | pretty :: [[(Text, Text)]] -> Text | |
11 | pretty = pack . render . dNDBL | |
12 | ||
13 | dNDBL :: [[(Text, Text)]] -> Doc | |
14 | dNDBL = vcat . map (vcat . indentTail . map dPair) | |
15 | ||
16 | indentTail :: [Doc] -> [Doc] | |
17 | indentTail (x:xs) = x:map (nest 2) xs | |
18 | indentTail xs = xs | |
19 | ||
20 | dPair :: (Text, Text) -> Doc | |
21 | dPair (k,v) = text (unpack k) <> equals <> dVal v | |
22 | ||
23 | dVal :: Text -> Doc | |
24 | dVal t | |
25 | | any isSpace t = doubleQuotes $ text $ unpack $ concatMap go t | |
26 | | otherwise = text (unpack t) | |
27 | where go '\\' = "\\\\" | |
28 | go '"' = "\\\"" | |
29 | go c = singleton c |
1 | -- | NDBL is a simple configuration format designed after the @ndb(6)@ set of | |
2 | -- utilities used in the Plan 9 operating system. An NDBL file is a sequence | |
3 | -- of groups, where each group is a multiset of key-value pairs. This will | |
4 | -- cover the basics of NDBL; for a more in-depth explanation, consule | |
5 | -- [insert url here]. | |
6 | -- | |
7 | -- Grouping in NDBL is done by | |
8 | -- indentation: a new group is started by listing a key-value pair without | |
9 | -- indentation, while indentation (of any amount) means that the key-value | |
10 | -- pairs on that line belong to the previous group. A key-value pair consists | |
11 | -- of any non-space identifiers, followed by an '=', followed by another | |
12 | -- sequence of non-space identifiers. For example, the following parses to | |
13 | -- @[[(\"user\",\"alice\"),(\"city\",\"paris\")]]@: | |
14 | -- | |
15 | -- @ | |
16 | -- user=alice | |
17 | -- city=paris | |
18 | -- @ | |
19 | -- | |
20 | -- Whereas this one parses to @[[(\"user\",\"alice\")],[(\"city\",\"paris\")]]@: | |
21 | -- | |
22 | -- @ | |
23 | -- user=alice | |
24 | -- city=paris | |
25 | -- @ | |
26 | -- | |
27 | -- Empty values are allowed, but not empty keys, so the following is valid and | |
28 | -- parses as @[[(\"database\",\"\"),(\"file\",\"file1.txt\"),(\"file\",\"file2.txt\")]]@ | |
29 | -- | |
30 | -- @ | |
31 | -- database= | |
32 | -- file=file1.txt | |
33 | -- file=file2.txt | |
34 | -- @ | |
35 | ||
36 | module Data.NDBL ( -- * Convenience Types | |
37 | NDBL | |
38 | , Pair | |
39 | -- * MultiMap Representation | |
40 | -- $mm | |
41 | , decode | |
42 | , encode | |
43 | -- * List-Of-List Representation | |
44 | -- $ll | |
45 | , decodeList | |
46 | , encodeList | |
47 | -- * Flat List Representation | |
48 | -- $fl | |
49 | , decodeFlat | |
50 | , encodeFlat | |
51 | ) where | |
52 | ||
53 | import Data.MultiMap (MultiMap) | |
54 | import qualified Data.MultiMap as M | |
55 | import Data.Text (Text) | |
56 | ||
57 | import Data.NDBL.Parse | |
58 | import Data.NDBL.Print | |
59 | ||
60 | type NDBL = [MultiMap Text Text] | |
61 | type Pair = (Text, Text) | |
62 | ||
63 | -- $mm | |
64 | -- The most convenient way of parsing an NDBL file is as a | |
65 | -- multimap. In this case, the set of groups is given in-order | |
66 | -- as a list, and each individual group is represented as a | |
67 | -- multimap with text keys and values. | |
68 | -- | |
69 | -- This does mean that the empty string is a possible value for the multimap, | |
70 | -- although all the keys will be at least one character long. | |
71 | ||
72 | decode :: Text -> Maybe NDBL | |
73 | decode t = case pNDBL t of | |
74 | Right r -> Just (map M.fromList r) | |
75 | Left _ -> Nothing | |
76 | ||
77 | encode :: NDBL -> Text | |
78 | encode = pretty . map M.toList | |
79 | ||
80 | -- $ll | |
81 | -- Not every application wants to bring in a dependency on the | |
82 | -- multimap package, so functions that deal with lists of lists | |
83 | -- of tuples are also provided. | |
84 | ||
85 | decodeList :: Text -> Maybe [[Pair]] | |
86 | decodeList t = case pNDBL t of | |
87 | Right r -> Just r | |
88 | Left _ -> Nothing | |
89 | ||
90 | encodeList :: [[Pair]] -> Text | |
91 | encodeList = pretty | |
92 | ||
93 | -- $fl | |
94 | -- Often a config file doesn't need grouping, so these are offered | |
95 | -- for pure utility. | |
96 | ||
97 | decodeFlat :: Text -> Maybe [Pair] | |
98 | decodeFlat t = case pNDBL t of | |
99 | Right r -> Just $ concat r | |
100 | Left _ -> Nothing | |
101 | ||
102 | encodeFlat :: [Pair] -> Text | |
103 | encodeFlat = pretty . map (:[]) |
1 | NDBL | |
2 | ==== | |
3 | ||
4 | NDBL is a basic config file format based roughly on the config format of | |
5 | Plan 9's [ndb](http://man.cat-v.org/plan_b/6/ndb). | |
6 | ||
7 | Short Example | |
8 | ------------- | |
9 | ||
10 | # A short exemplary snippet of NDBL | |
11 | fileset= | |
12 | file=/home/gdritter/english.txt | |
13 | file=/home/gdritter/french.txt | |
14 | file=/home/gdritter/italian.txt | |
15 | default=eng | |
16 | ||
17 | remote=main | |
18 | ip=192.168.1.300 | |
19 | user=guest | |
20 | key=public.pem | |
21 | ||
22 | Rationale | |
23 | --------- | |
24 | ||
25 | NDBL is a very straighforward configuration format, which means it's not | |
26 | always appropriate for situations in which large amounts of data are | |
27 | needed. It cannot represent arbitrary hierarchical structures like | |
28 | JSON, nor does it have the wealth of data types that YAML does. However, | |
29 | it is a very simple format to both produce and implement (both by | |
30 | hand and in code) and its simplicity is one of its major virtues. Other | |
31 | options in this space, but with more structure and extant tooling, | |
32 | include [YAML](http://en.wikipedia.org/wiki/YAML) and | |
33 | [TOML](https://github.com/mojombo/toml), both of which are significantly | |
34 | more complicated than NDBL. | |
35 | ||
36 | Structure of an NDBL Document | |
37 | ----------------------------- | |
38 | ||
39 | All NDBL documents consist of a sequence of multisets of key-value pairs. | |
40 | All data is represented as text; it is the responsibility of the library | |
41 | user to parse numeric data, boolean data, &c, during use. | |
42 | ||
43 | A _comment_ is introduced by any whitespace (including newlines) | |
44 | followed by a pound sign (`#`) and lasts until the end of a line. This | |
45 | means that a key cannot begin with the `#` character, but that a `#` | |
46 | character can occur as a constitutent of a key-value pair. | |
47 | ||
48 | A _key-value pair_ consists of a string of at least length one, followed | |
49 | by a equals sign (`=`) and subsequently by a string of at least zero. | |
50 | The value may be quoted, in which case it is allowed to contain any | |
51 | printable character, including the equals sign, whitespace, and newlines. | |
52 | An unquoted value is allowed to contain any non-whitespace character | |
53 | except the equals sign. The value can be zero length. No spaces are | |
54 | allowed around the equals sign. | |
55 | ||
56 | A _group_ is a multiset of key-value pairs. A group is introduced by a | |
57 | non-indented key-value pair; all subsequent key-value pairs on the same | |
58 | line, as well as any key-value pairs on subsequent indented lines, belong | |
59 | to the same group. | |
60 | ||
61 | A _document_ is a sequence of groups. | |
62 | ||
63 | Examples With Explanation | |
64 | ------------------------- | |
65 | ||
66 | In the examples below, I will use `{ key: value }` as shorthand to represent a | |
67 | multiset. | |
68 | ||
69 | host=machine1 | |
70 | host=machine2 | |
71 | ||
72 | This parses to the following structure: | |
73 | ||
74 | [{host:machine1},{host:machine2}] | |
75 | ||
76 | Adding indentation merges the two multisets, as the second line is now | |
77 | considered a 'cotinuation' of the first group. | |
78 | ||
79 | host=machine1 | |
80 | host=machine2 | |
81 | ||
82 | This becomes | |
83 | ||
84 | [{host:machine1,host:machine2}] | |
85 | ||
86 | A third non-indented line will then start a new group: | |
87 | ||
88 | host=machine1 | |
89 | host=machine2 | |
90 | host=machine3 | |
91 | ||
92 | This becomes | |
93 | ||
94 | [{host:machine1,host:machine2},{host:machine3}] | |
95 | ||
96 | Empty values are permitted, and can be used as a tag of sorts. | |
97 | ||
98 | database= | |
99 | file=file1.txt | |
100 | file=file2.txt | |
101 | file=file3.txt | |
102 | ||
103 | This becomes | |
104 | ||
105 | [{database:},{file:file1.txt},{file:file2.txt},{file:file3.txt}] | |
106 | ||
107 | Comments are allowed but must come after a whitespace character, which | |
108 | means that the following document contains no comments: | |
109 | ||
110 | key=value#hello | |
111 | ||
112 | This becomes | |
113 | ||
114 | [{key:value#hello}] | |
115 | ||
116 | But this document does contain a comment: | |
117 | ||
118 | key=value #hello | |
119 | ||
120 | This becomes | |
121 | ||
122 | [{key:value}] | |
123 | ||
124 | Comments can begin a line, as well. | |
125 | ||
126 | # WARNING: do not change | |
127 | host=hg-remote | |
128 | portforwarding= | |
129 | hostname=hunter-gratzner.example.com | |
130 | port=22 | |
131 | user=abu-al-walid | |
132 | nicename="H-G Remote Server" |
1 | -- Initial ndb-like.cabal generated by cabal init. For further | |
2 | -- documentation, see http://haskell.org/cabal/users-guide/ | |
3 | ||
4 | name: ndbl | |
5 | version: 0.1.0.0 | |
6 | synopsis: A simple interface to the NDBL config format. | |
7 | description: A simplistic interface to a config file format | |
8 | meant to resemble the configuration file of | |
9 | Plan 9's ndb(6) and associated utilities. | |
10 | license: BSD3 | |
11 | author: Getty Ritter | |
12 | maintainer: gettylefou@gmail.com | |
13 | copyright: (c) 2014 Getty Ritter | |
14 | category: Data | |
15 | build-type: Simple | |
16 | cabal-version: >=1.10 | |
17 | ||
18 | library | |
19 | exposed-modules: Data.NDBL | |
20 | other-modules: Data.NDBL.Parse, Data.NDBL.Print | |
21 | build-depends: base >=4.6 && <4.7, | |
22 | multimap, | |
23 | attoparsec, | |
24 | text, | |
25 | pretty | |
26 | default-language: Haskell2010⏎ |