gdritter repos adnot / f6024b3
Merge branch 'master' of rosencrantz:/srv/git/adnot Getty Ritter 5 years ago
6 changed file(s) with 104 addition(s) and 20 deletion(s). Collapse all Expand all
1515 import Data.Adnot.Parse
1616 import Data.Adnot.Type
1717 import Data.Adnot.Emit
18 import Data.Adnot.Parse
1819 import Data.Int
1920 import Data.Word
2021 import qualified Data.ByteString as BS
172173 instance ToAdnot a => ToAdnot (MS.Map T.Text a) where
173174 toAdnot ls = Product (fmap toAdnot ls)
174175
175 -- Tuples
176 product :: [(T.Text, Value)] -> Value
177 product = Product . MS.fromList
178
179 (.=) :: ToAdnot t => T.Text -> t -> (T.Text, Value)
180 key .= val = (key, toAdnot val)
181
182 -- * Tuples
176183 instance ToAdnot () where
177184 toAdnot () = List []
178185
196203 toAdnot (Right y) = Sum "Right" [toAdnot y]
197204
198205 instance ToAdnot Bool where
199 toAdnot True = Symbol "True"
200 toAdnot False = Symbol "False"
206 toAdnot True = String "True"
207 toAdnot False = String "False"
201208
202209 -- Parsing
203210
206213
207214 type ParseError = String
208215 type Parser a = Either ParseError a
216
217 niceType :: Value -> String
218 niceType Sum {} = "sum"
219 niceType Product {} = "product"
220 niceType List {} = "list"
221 niceType Integer {} = "integer"
222 niceType Double {} = "double"
223 niceType String {} = "string"
209224
210225 withSum :: String -> (T.Text -> Array -> Parser a) -> Value -> Parser a
211226 withSum n k val = case val of
99 import qualified Data.Map.Strict as M
1010 import Data.Monoid ((<>))
1111 import Data.Text (Text)
12 import Data.Text.Encoding (encodeUtf8)
12 import qualified Data.Text as T
13 import Data.Text.Encoding (encodeUtf8Builder)
1314 import qualified Data.Vector as V
1415
1516 import Data.Adnot.Type
1920
2021 buildValue :: Value -> Builder
2122 buildValue (Sum n vs)
22 | V.null vs = char7 '(' <> ident n <> char7 ')'
23 | V.null vs = char7 '(' <> buildString n <> char7 ')'
2324 | otherwise =
24 char7 '(' <> ident n <> char7 ' ' <> spaceSepArr vs <> char7 ')'
25 char7 '(' <> buildString n <> char7 ' ' <> spaceSepArr vs <> char7 ')'
2526 buildValue (Product ps) =
2627 char7 '{' <> buildPairs ps <> char7 '}'
2728 buildValue (List vs) =
2829 char7 '[' <> spaceSepArr vs <> char7 ']'
2930 buildValue (Integer i) = integerDec i
3031 buildValue (Double d) = doubleDec d
31 buildValue (Symbol t) = ident t
32 buildValue (String t) =
33 char7 '"' <> byteString (encodeUtf8 t) <> char7 '"'
32 buildValue (String t) = buildString t
33
34 buildString t
35 | isValidSymbol t = encodeUtf8Builder t
36 | otherwise = char7 '"' <> escape t <> char7 '"'
37
38 escape :: T.Text -> Builder
39 escape = T.foldr go mempty
40 where go '"' r = byteString "\\\"" <> r
41 go '\n' r = byteString "\\n" <> r
42 go '\\' r = byteString "\\\\" <> r
43 go c r = char7 c <> r
3444
3545 spaceSep :: [Builder] -> Builder
3646 spaceSep = mconcat . intersperse (char7 ' ')
3848 spaceSepArr :: Array -> Builder
3949 spaceSepArr = spaceSep . map buildValue . V.toList
4050
41 ident :: Text -> Builder
42 ident = byteString . encodeUtf8
43
4451 buildPairs :: Product -> Builder
4552 buildPairs ps = spaceSep [ go k v | (k, v) <- M.toList ps ]
46 where go k v = ident k <> char7 ' ' <> buildValue v
53 where go k v = buildString k <> char7 ' ' <> buildValue v
1616 decodeValue :: ByteString -> Either String Value
1717 decodeValue = parseOnly pVal
1818 where pVal = ws *> (pSum <|> pProd <|> pList <|> pLit)
19 pSum = Sum <$> (char '(' *> ws *> pIdent)
20 <*> (pValueList <* ws <* char ')')
19 pSum = Sum <$> (char '(' *> ws *> (pIdent <|> pString))
20 <*> (pValueList <* (ws *> char ')'))
2121 pProd = Product . M.fromList
2222 <$> (char '{' *> pProdBody <* ws <* char '}')
2323 pProdBody = many' pPair
24 pPair = (,) <$> (ws *> pIdent) <*> pVal
24 pPair = (,) <$> (ws *> (pIdent <|> pString)) <*> pVal
2525 pList = List <$> (char '[' *> pValueList <* ws <* char ']')
26 pLit = Symbol <$> pIdent
26 pLit = String <$> pIdent
2727 <|> String <$> pString
2828 <|> Double <$> double
2929 <|> Integer <$> decimal
30 pStr = String <$> (pIdent <|> pString)
3031 pValueList = V.fromList <$> many' pVal
3132 pIdent = T.pack <$>
3233 ((:) <$> (letter_ascii <|> char '_')
11 {-# LANGUAGE DeriveDataTypeable #-}
22 {-# LANGUAGE BangPatterns #-}
33
4 module Data.Adnot.Type (Value(..), Array, Product) where
4 module Data.Adnot.Type (Value(..), Array, Product, isValidSymbol) where
55
66 import Control.DeepSeq (NFData(..))
7 import qualified Data.Char as C
78 import Data.Data (Data)
89 import Data.Typeable (Typeable)
910 import Data.Map.Strict (Map)
1011 import qualified Data.Map as M
1112 import Data.Text (Text)
13 import qualified Data.Text as T
1214 import Data.Vector (Vector)
1315 import GHC.Exts (IsString(..))
1416
1921 | List !Array
2022 | Integer !Integer
2123 | Double !Double
22 | Symbol !Text
2324 | String !Text
2425 deriving (Eq, Show, Read, Typeable, Data)
2526
2930 rnf (List as) = rnf as
3031 rnf (Integer i) = rnf i
3132 rnf (Double d) = rnf d
32 rnf (Symbol t) = rnf t
3333 rnf (String t) = rnf t
3434
3535 instance IsString Value where
3737
3838 type Array = Vector Value
3939 type Product = Map Text Value
40
41 isValidSymbol :: Text -> Bool
42 isValidSymbol t = case T.uncons t of
43 Nothing -> False
44 Just (x, xs) -> C.isAlpha x && T.all C.isAlphaNum xs
66 , module Data.Adnot.Class
77 ) where
88
9 import Data.Adnot.Class
910 import Data.Adnot.Emit
1011 import Data.Adnot.Parse
1112 import Data.Adnot.Type
1 # Adnot
2
3 **WARNING**: this repo contains unrepentant bikeshedding and wheel-reinvention. You almost definitely shouldn't use it, and it's probably best to disregard the entire thing!
4
5 The *Adnot* format is a simple data and configuration format intended to have a slightly enriched data model when compared to JSON or s-expressions but still retain the comparative simplicity of those formats. Unlike JSON, Adnot chooses to avoid redundant structural information like punctuation; unlike s-expressions, Adnot values natively express a wider range of basic data types.
6
7 *Adnot* is not intended to be a data interchange format, but rather to be a richer and more convenient syntax for certain kinds of data description that might otherwise be done in more unwieldy, complicated formats like YAML. As a first approximation, Adnot may be treated as a more human- and version-control-friendly version of JSON whose data model is intended to resemble the data model of statically typed functional programming languages.
8
9 A given Adnot value is either one of three basic types—an integer, a double, a string—or one of three composite types: a sequence of values, a mapping of symbols to values, or a tagged sequence of values which begins with a symbol:
10
11 ```
12 expr ::= "{" (string expr) * "}"
13 | "(" string expr* ")"
14 | "[" expr* "]"
15 | string
16 | integer
17 | double
18 ```
19
20 Strings can be expressed in two different ways: one is quoted strings, which are formatted like JSON strings with the same encoding and the same set of escape sequences; the other is as bare words, in which strings that begin with a character of unicode class `XID_Start` and consist subsequently of zero or more `XID_Continue` characters can be written without quotation marks.
21
22 The three kinds of composite types are meant to resemble records, sum or variant types, and lists, respectively. Zero or more symbol-expression pairs inside curly brackets form a _mapping_:
23
24 ```
25 # a basic mapping
26 {
27 x 2
28 y 3
29 "and z" 4
30 }
31 ```
32
33 Pairs do not include colons and are not separated by commas. A mapping _must_ contain an even number of sub-expressions, and every odd subexpression _must_ be a string. Whitespace is ignored except as a separator between tokens, so the above map is identical to
34
35 ```
36 {x 2 y 3 "and z" 4}
37 ```
38
39 A _list_ is represented by square brackets with zero or more possibly-heterogeneous expressions:
40
41 ```
42 # a basic list
43 [ 2 "foo" bar ]
44 ```
45
46 A _tagged expression_ is represented by parentheses with a single string followed by zero or more possibly-heterogeneous expressions:
47
48 ```
49 # a basic tagged expression
50 (some_tag blah 7.8 "??")
51 ```
52
53 These are how tagged data-types are represented: because the thing inside the parens _must_ be a string, it can correspond to a data type in an ML-like language.
54
55 Adnot values can contain comments, which are line-oriented and begin with a `#` character.