Merge branch 'master' of rosencrantz:/srv/git/adnot
Getty Ritter
6 years ago
15 | 15 | import Data.Adnot.Parse |
16 | 16 | import Data.Adnot.Type |
17 | 17 | import Data.Adnot.Emit |
18 | import Data.Adnot.Parse | |
18 | 19 | import Data.Int |
19 | 20 | import Data.Word |
20 | 21 | import qualified Data.ByteString as BS |
172 | 173 | instance ToAdnot a => ToAdnot (MS.Map T.Text a) where |
173 | 174 | toAdnot ls = Product (fmap toAdnot ls) |
174 | 175 | |
175 | -- Tuples | |
176 | product :: [(T.Text, Value)] -> Value | |
177 | product = Product . MS.fromList | |
178 | ||
179 | (.=) :: ToAdnot t => T.Text -> t -> (T.Text, Value) | |
180 | key .= val = (key, toAdnot val) | |
181 | ||
182 | -- * Tuples | |
176 | 183 | instance ToAdnot () where |
177 | 184 | toAdnot () = List [] |
178 | 185 | |
196 | 203 | toAdnot (Right y) = Sum "Right" [toAdnot y] |
197 | 204 | |
198 | 205 | instance ToAdnot Bool where |
199 | toAdnot True = Symbol "True" | |
200 | toAdnot False = Symbol "False" | |
206 | toAdnot True = String "True" | |
207 | toAdnot False = String "False" | |
201 | 208 | |
202 | 209 | -- Parsing |
203 | 210 | |
206 | 213 | |
207 | 214 | type ParseError = String |
208 | 215 | type Parser a = Either ParseError a |
216 | ||
217 | niceType :: Value -> String | |
218 | niceType Sum {} = "sum" | |
219 | niceType Product {} = "product" | |
220 | niceType List {} = "list" | |
221 | niceType Integer {} = "integer" | |
222 | niceType Double {} = "double" | |
223 | niceType String {} = "string" | |
209 | 224 | |
210 | 225 | withSum :: String -> (T.Text -> Array -> Parser a) -> Value -> Parser a |
211 | 226 | withSum n k val = case val of |
9 | 9 | import qualified Data.Map.Strict as M |
10 | 10 | import Data.Monoid ((<>)) |
11 | 11 | import Data.Text (Text) |
12 |
import |
|
12 | import qualified Data.Text as T | |
13 | import Data.Text.Encoding (encodeUtf8Builder) | |
13 | 14 | import qualified Data.Vector as V |
14 | 15 | |
15 | 16 | import Data.Adnot.Type |
19 | 20 | |
20 | 21 | buildValue :: Value -> Builder |
21 | 22 | buildValue (Sum n vs) |
22 |
| V.null vs = char7 '(' <> |
|
23 | | V.null vs = char7 '(' <> buildString n <> char7 ')' | |
23 | 24 | | otherwise = |
24 |
char7 '(' <> |
|
25 | char7 '(' <> buildString n <> char7 ' ' <> spaceSepArr vs <> char7 ')' | |
25 | 26 | buildValue (Product ps) = |
26 | 27 | char7 '{' <> buildPairs ps <> char7 '}' |
27 | 28 | buildValue (List vs) = |
28 | 29 | char7 '[' <> spaceSepArr vs <> char7 ']' |
29 | 30 | buildValue (Integer i) = integerDec i |
30 | 31 | buildValue (Double d) = doubleDec d |
31 | buildValue (Symbol t) = ident t | |
32 | buildValue (String t) = | |
33 | char7 '"' <> byteString (encodeUtf8 t) <> char7 '"' | |
32 | buildValue (String t) = buildString t | |
33 | ||
34 | buildString t | |
35 | | isValidSymbol t = encodeUtf8Builder t | |
36 | | otherwise = char7 '"' <> escape t <> char7 '"' | |
37 | ||
38 | escape :: T.Text -> Builder | |
39 | escape = T.foldr go mempty | |
40 | where go '"' r = byteString "\\\"" <> r | |
41 | go '\n' r = byteString "\\n" <> r | |
42 | go '\\' r = byteString "\\\\" <> r | |
43 | go c r = char7 c <> r | |
34 | 44 | |
35 | 45 | spaceSep :: [Builder] -> Builder |
36 | 46 | spaceSep = mconcat . intersperse (char7 ' ') |
38 | 48 | spaceSepArr :: Array -> Builder |
39 | 49 | spaceSepArr = spaceSep . map buildValue . V.toList |
40 | 50 | |
41 | ident :: Text -> Builder | |
42 | ident = byteString . encodeUtf8 | |
43 | ||
44 | 51 | buildPairs :: Product -> Builder |
45 | 52 | buildPairs ps = spaceSep [ go k v | (k, v) <- M.toList ps ] |
46 |
where go k v = |
|
53 | where go k v = buildString k <> char7 ' ' <> buildValue v |
16 | 16 | decodeValue :: ByteString -> Either String Value |
17 | 17 | decodeValue = parseOnly pVal |
18 | 18 | where pVal = ws *> (pSum <|> pProd <|> pList <|> pLit) |
19 | pSum = Sum <$> (char '(' *> ws *> pIdent) | |
20 | <*> (pValueList <* ws <* char ')') | |
19 | pSum = Sum <$> (char '(' *> ws *> (pIdent <|> pString)) | |
20 | <*> (pValueList <* (ws *> char ')')) | |
21 | 21 | pProd = Product . M.fromList |
22 | 22 | <$> (char '{' *> pProdBody <* ws <* char '}') |
23 | 23 | pProdBody = many' pPair |
24 |
pPair = (,) <$> (ws *> |
|
24 | pPair = (,) <$> (ws *> (pIdent <|> pString)) <*> pVal | |
25 | 25 | pList = List <$> (char '[' *> pValueList <* ws <* char ']') |
26 |
pLit = S |
|
26 | pLit = String <$> pIdent | |
27 | 27 | <|> String <$> pString |
28 | 28 | <|> Double <$> double |
29 | 29 | <|> Integer <$> decimal |
30 | pStr = String <$> (pIdent <|> pString) | |
30 | 31 | pValueList = V.fromList <$> many' pVal |
31 | 32 | pIdent = T.pack <$> |
32 | 33 | ((:) <$> (letter_ascii <|> char '_') |
1 | 1 | {-# LANGUAGE DeriveDataTypeable #-} |
2 | 2 | {-# LANGUAGE BangPatterns #-} |
3 | 3 | |
4 |
module Data.Adnot.Type (Value(..), Array, Product |
|
4 | module Data.Adnot.Type (Value(..), Array, Product, isValidSymbol) where | |
5 | 5 | |
6 | 6 | import Control.DeepSeq (NFData(..)) |
7 | import qualified Data.Char as C | |
7 | 8 | import Data.Data (Data) |
8 | 9 | import Data.Typeable (Typeable) |
9 | 10 | import Data.Map.Strict (Map) |
10 | 11 | import qualified Data.Map as M |
11 | 12 | import Data.Text (Text) |
13 | import qualified Data.Text as T | |
12 | 14 | import Data.Vector (Vector) |
13 | 15 | import GHC.Exts (IsString(..)) |
14 | 16 | |
19 | 21 | | List !Array |
20 | 22 | | Integer !Integer |
21 | 23 | | Double !Double |
22 | | Symbol !Text | |
23 | 24 | | String !Text |
24 | 25 | deriving (Eq, Show, Read, Typeable, Data) |
25 | 26 | |
29 | 30 | rnf (List as) = rnf as |
30 | 31 | rnf (Integer i) = rnf i |
31 | 32 | rnf (Double d) = rnf d |
32 | rnf (Symbol t) = rnf t | |
33 | 33 | rnf (String t) = rnf t |
34 | 34 | |
35 | 35 | instance IsString Value where |
37 | 37 | |
38 | 38 | type Array = Vector Value |
39 | 39 | type Product = Map Text Value |
40 | ||
41 | isValidSymbol :: Text -> Bool | |
42 | isValidSymbol t = case T.uncons t of | |
43 | Nothing -> False | |
44 | Just (x, xs) -> C.isAlpha x && T.all C.isAlphaNum xs |
6 | 6 | , module Data.Adnot.Class |
7 | 7 | ) where |
8 | 8 | |
9 | import Data.Adnot.Class | |
9 | 10 | import Data.Adnot.Emit |
10 | 11 | import Data.Adnot.Parse |
11 | 12 | import Data.Adnot.Type |
1 | # Adnot | |
2 | ||
3 | **WARNING**: this repo contains unrepentant bikeshedding and wheel-reinvention. You almost definitely shouldn't use it, and it's probably best to disregard the entire thing! | |
4 | ||
5 | The *Adnot* format is a simple data and configuration format intended to have a slightly enriched data model when compared to JSON or s-expressions but still retain the comparative simplicity of those formats. Unlike JSON, Adnot chooses to avoid redundant structural information like punctuation; unlike s-expressions, Adnot values natively express a wider range of basic data types. | |
6 | ||
7 | *Adnot* is not intended to be a data interchange format, but rather to be a richer and more convenient syntax for certain kinds of data description that might otherwise be done in more unwieldy, complicated formats like YAML. As a first approximation, Adnot may be treated as a more human- and version-control-friendly version of JSON whose data model is intended to resemble the data model of statically typed functional programming languages. | |
8 | ||
9 | A given Adnot value is either one of three basic types—an integer, a double, a string—or one of three composite types: a sequence of values, a mapping of symbols to values, or a tagged sequence of values which begins with a symbol: | |
10 | ||
11 | ``` | |
12 | expr ::= "{" (string expr) * "}" | |
13 | | "(" string expr* ")" | |
14 | | "[" expr* "]" | |
15 | | string | |
16 | | integer | |
17 | | double | |
18 | ``` | |
19 | ||
20 | Strings can be expressed in two different ways: one is quoted strings, which are formatted like JSON strings with the same encoding and the same set of escape sequences; the other is as bare words, in which strings that begin with a character of unicode class `XID_Start` and consist subsequently of zero or more `XID_Continue` characters can be written without quotation marks. | |
21 | ||
22 | The three kinds of composite types are meant to resemble records, sum or variant types, and lists, respectively. Zero or more symbol-expression pairs inside curly brackets form a _mapping_: | |
23 | ||
24 | ``` | |
25 | # a basic mapping | |
26 | { | |
27 | x 2 | |
28 | y 3 | |
29 | "and z" 4 | |
30 | } | |
31 | ``` | |
32 | ||
33 | Pairs do not include colons and are not separated by commas. A mapping _must_ contain an even number of sub-expressions, and every odd subexpression _must_ be a string. Whitespace is ignored except as a separator between tokens, so the above map is identical to | |
34 | ||
35 | ``` | |
36 | {x 2 y 3 "and z" 4} | |
37 | ``` | |
38 | ||
39 | A _list_ is represented by square brackets with zero or more possibly-heterogeneous expressions: | |
40 | ||
41 | ``` | |
42 | # a basic list | |
43 | [ 2 "foo" bar ] | |
44 | ``` | |
45 | ||
46 | A _tagged expression_ is represented by parentheses with a single string followed by zero or more possibly-heterogeneous expressions: | |
47 | ||
48 | ``` | |
49 | # a basic tagged expression | |
50 | (some_tag blah 7.8 "??") | |
51 | ``` | |
52 | ||
53 | These are how tagged data-types are represented: because the thing inside the parens _must_ be a string, it can correspond to a data type in an ML-like language. | |
54 | ||
55 | Adnot values can contain comments, which are line-oriented and begin with a `#` character. |