Finished comments; clarified text
Getty Ritter
9 years ago
2 | 2 | {-# LANGUAGE LambdaCase #-} |
3 | 3 | |
4 | 4 | module Data.NDBL.Parse (Document, Group, Pair, pNDBL) where |
5 | ||
6 | import Data.Char (isPrint, isSpace) | |
5 | 7 | |
6 | 8 | type Document = [Group] |
7 | 9 | type Group = [Pair] |
25 | 27 | |
26 | 28 | pQString :: Parse String |
27 | 29 | pQString = go |
28 |
where go ('\\':x:xs) |
|
30 | where go ('\\':x:xs) | |
31 | | x == '\\' || | |
32 | x == '"' = (x:) `over` go xs | |
33 | | otherwise = throw $ "Unrecognized escape: \"\\" ++ [x] ++ "\"" | |
29 | 34 | go ('"':xs) = return (xs, "") |
30 |
go (x:xs) |
|
35 | go (x:xs) | |
36 | | isPrint x || isSpace x = (x:) `over` go xs | |
37 | | otherwise = throw $ "Non-printable character: " ++ show x | |
38 | go [] = throw $ "End of document while still inside string" | |
31 | 39 | |
32 | 40 | pWord :: Parse String |
33 | 41 | pWord s@(x:xs) |
34 |
| |
|
42 | | isPrint x && not (isSep x) = (x:) `over` pWord xs | |
43 | | isSep x = return (s, "") | |
44 | | not (isPrint x) = throw $ "Non-printable character: " ++ show x | |
35 | 45 | pWord s = return (s, "") |
36 | 46 | |
37 | 47 | pWord1 :: Parse String |
38 | 48 | pWord1 (x:xs) |
39 |
| |
|
49 | | isPrint x && not (isSep x) = (x:) `over` pWord xs | |
50 | | isSep x = throw $ "`=` without previous key" | |
51 | | not (isPrint x) = throw $ "Non-printable character: " ++ show x | |
40 | 52 | pWord1 s = throw $ "Expected word; found " ++ show s |
41 | 53 | |
42 | 54 | |
52 | 64 | isVSpace :: Char -> Bool |
53 | 65 | isVSpace c = c == '\n' || c == '\r' |
54 | 66 | |
67 | pComment :: Parse Bool | |
68 | pComment s@(x:xs) | |
69 | | isVSpace x = pSkip s | |
70 | | otherwise = pComment xs | |
71 | ||
55 | 72 | pSkip :: Parse Bool |
56 |
pSkip |
|
73 | pSkip "" = return ("", False) | |
74 | pSkip (y:"") = return ("", False) | |
75 | pSkip ('#':xs) = pComment xs | |
57 | 76 | pSkip (y:s@(x:xs)) |
58 | 77 | | isVSpace y && isHSpace x = pSkip xs |
59 | 78 | | isVSpace y = return (s, False) |
60 | 60 | A _comment_ is introduced by any whitespace (including newlines) |
61 | 61 | followed by a pound sign (`#`) and lasts until the end of a line. This |
62 | 62 | means that a key cannot begin with the `#` character, but that a `#` |
63 |
character can occur as a constitutent of a key-value pair |
|
63 | character can occur as a constitutent of a key-value pair, including | |
64 | as a trailing character. | |
64 | 65 | |
65 | 66 | A _key-value pair_ consists of a string of at least length one, followed |
66 | 67 | by a equals sign (`=`) and subsequently by a string of at least zero. |
67 |
The |
|
68 | The key must be a bare string, and can contain any printable non-whitespace | |
69 | character except the equals sign (`=`) and additionally must not begin | |
70 | with a pound sign (`#`). It is acceptable for a key to contains a | |
71 | pound sign if it is not the first character of the key. | |
72 | The value may be quoted with double quotes (`"`), in which case it is allowed to contain any | |
68 | 73 | printable character, including the equals sign, whitespace, and newlines. |
69 | An unquoted value is allowed to contain any non-whitespace character | |
70 | except the equals sign. The value can be zero length. No spaces are | |
74 | Quoted values understand the escape sequences `\\` for a backslash and | |
75 | `\"` for a double quote; no other escape sequences are provided. | |
76 | An unquoted value is allowed to contain any printable non-whitespace | |
77 | character except the equals sign. The value can be zero length. No spaces are | |
71 | 78 | allowed around the equals sign. |
72 | 79 | |
73 |
A _group_ is a |
|
80 | A _group_ is a sequence of key-value pairs. A group is introduced by a | |
74 | 81 | non-indented key-value pair; all subsequent key-value pairs on the same |
75 | 82 | line, as well as any key-value pairs on subsequent indented lines, belong |
76 | 83 | to the same group. |