gdritter repos adnot / f144540
Change Adnot language slightly + add FromAdnot parsing Getty Ritter 6 years ago
6 changed file(s) with 256 addition(s) and 62 deletion(s). Collapse all Expand all
1212
1313 import Data.Adnot.Type
1414 import Data.Adnot.Emit
15 import Data.Adnot.Parse
1516 import Data.Int
1617 import Data.Word
18 import qualified Data.ByteString as BS
1719 import qualified Data.ByteString.Lazy as BSL
1820 import qualified Data.Foldable as F
1921 import qualified Data.List.NonEmpty as NE
168170 instance ToAdnot a => ToAdnot (MS.Map T.Text a) where
169171 toAdnot ls = Product (fmap toAdnot ls)
170172
173 product :: [(T.Text, Value)] -> Value
174 product = Product . MS.fromList
175
176 (.=) :: ToAdnot t => T.Text -> t -> (T.Text, Value)
177 key .= val = (key, toAdnot val)
178
171179 -- * Tuples
172180 instance ToAdnot () where
173181 toAdnot () = List []
192200 toAdnot (Right y) = Sum "Right" [toAdnot y]
193201
194202 instance ToAdnot Bool where
195 toAdnot True = Symbol "True"
196 toAdnot False = Symbol "False"
203 toAdnot True = String "True"
204 toAdnot False = String "False"
197205
198206 -- * Parsing
199207
200208 type ParseError = String
201209 type Parser a = Either ParseError a
210
211 niceType :: Value -> String
212 niceType Sum {} = "sum"
213 niceType Product {} = "product"
214 niceType List {} = "list"
215 niceType Integer {} = "integer"
216 niceType Double {} = "double"
217 niceType String {} = "string"
202218
203219 withSum :: String -> (T.Text -> Array -> Parser a) -> Value -> Parser a
204220 withSum n k val = case val of
205221 Sum t as -> k t as
206222 _ -> Left ("Expected sum in " ++ n)
207223
224 withProd :: String -> (Product -> Parser a) -> Value -> Parser a
225 withProd n k val = case val of
226 Product as -> k as
227 _ -> Left ("Expected product in " ++ n)
228
229 withList :: String -> (Array -> Parser a) -> Value -> Parser a
230 withList n k val = case val of
231 List as -> k as
232 _ -> Left ("Expected list in " ++ n)
233
234 withString :: String -> (T.Text -> Parser a) -> Value -> Parser a
235 withString n k val = case val of
236 String t -> case k t of
237 Right x -> Right x
238 Left msg -> Left (msg ++ " in " ++ n)
239 _ -> Left ("Expected string in " ++ n)
240
241 decode :: FromAdnot a => BS.ByteString -> Maybe a
242 decode x = case decodeValue x of
243 Left _ -> Nothing
244 Right y -> case parseAdnot y of
245 Left _ -> Nothing
246 Right z -> Just z
247
248 decodeEither :: FromAdnot a => BS.ByteString -> Either String a
249 decodeEither x = do
250 y <- decodeValue x
251 parseAdnot y
252
208253 class FromAdnot a where
209254 parseAdnot :: Value -> Parser a
255
256 instance FromAdnot Value where
257 parseAdnot v = return v
258
259 instance FromAdnot Int where
260 parseAdnot (Integer n) = return (fromIntegral n)
261 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
262
263 instance FromAdnot Integer where
264 parseAdnot (Integer n) = return n
265 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
266
267 instance FromAdnot Int8 where
268 parseAdnot (Integer n) = return (fromIntegral n)
269 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
270
271 instance FromAdnot Int16 where
272 parseAdnot (Integer n) = return (fromIntegral n)
273 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
274
275 instance FromAdnot Int32 where
276 parseAdnot (Integer n) = return (fromIntegral n)
277 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
278
279 instance FromAdnot Int64 where
280 parseAdnot (Integer n) = return (fromIntegral n)
281 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
282
283 instance FromAdnot Word8 where
284 parseAdnot (Integer n) = return (fromIntegral n)
285 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
286
287 instance FromAdnot Word16 where
288 parseAdnot (Integer n) = return (fromIntegral n)
289 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
290
291 instance FromAdnot Word32 where
292 parseAdnot (Integer n) = return (fromIntegral n)
293 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
294
295 instance FromAdnot Word64 where
296 parseAdnot (Integer n) = return (fromIntegral n)
297 parseAdnot n = Left ("Expected integer, found " ++ niceType n)
298
299 -- Rational/Floating types
300
301 instance FromAdnot Double where
302 parseAdnot (Double d) = return d
303 parseAdnot n = Left ("Expected double, found " ++ niceType n)
304
305 instance FromAdnot Float where
306 parseAdnot (Double d) = return (fromRational (toRational d))
307 parseAdnot n = Left ("Expected double, found " ++ niceType n)
308
309 -- String types
310
311 instance FromAdnot T.Text where
312 parseAdnot (String t) = return t
313 parseAdnot n = Left ("Expected string, found " ++ niceType n)
314
315 instance FromAdnot TL.Text where
316 parseAdnot (String t) = return (TL.fromStrict t)
317 parseAdnot n = Left ("Expected string, found " ++ niceType n)
318
319 instance {-# INCOHERENT #-} FromAdnot String where
320 parseAdnot (String t) = return (T.unpack t)
321 parseAdnot n = Left ("Expected string, found " ++ niceType n)
322
323 -- sequence types
324
325 instance FromAdnot t => FromAdnot [t] where
326 parseAdnot (List ts) =
327 fmap (V.toList) (traverse parseAdnot ts)
328 parseAdnot n = Left ("Expected list, found " ++ niceType n)
329
330 instance FromAdnot t => FromAdnot (V.Vector t) where
331 parseAdnot (List ts) = traverse parseAdnot ts
332 parseAdnot n = Left ("Expected list, found " ++ niceType n)
333
334 instance FromAdnot t => FromAdnot (Seq.Seq t) where
335 parseAdnot (List ts) =
336 fmap (Seq.fromList . V.toList) (traverse parseAdnot ts)
337 parseAdnot n = Left ("Expected list, found " ++ niceType n)
338
339 instance FromAdnot t => FromAdnot (NE.NonEmpty t) where
340 parseAdnot (List ts) =
341 fmap (NE.fromList . V.toList) (traverse parseAdnot ts)
342 parseAdnot n = Left ("Expected list, found " ++ niceType n)
343
344 -- tuples
345
346 instance FromAdnot () where
347 parseAdnot (List []) = return ()
348 parseAdnot n = Left ("Expected list of length 0, found " ++ niceType n)
349
350 instance (FromAdnot a, FromAdnot b) => FromAdnot (a, b) where
351 parseAdnot (List [a, b]) = (,) <$> parseAdnot a <*> parseAdnot b
352 parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n)
353
354 instance (FromAdnot a, FromAdnot b, FromAdnot c) => FromAdnot (a, b, c) where
355 parseAdnot (List [a, b, c]) =
356 (,,) <$> parseAdnot a
357 <*> parseAdnot b
358 <*> parseAdnot c
359 parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n)
360
361 instance (FromAdnot a, FromAdnot b, FromAdnot c, FromAdnot d)
362 => FromAdnot (a, b, c, d) where
363 parseAdnot (List [a, b, c, d]) =
364 (,,,) <$> parseAdnot a
365 <*> parseAdnot b
366 <*> parseAdnot c
367 <*> parseAdnot d
368 parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n)
369
370 instance FromAdnot a => FromAdnot (Maybe a) where
371 parseAdnot (Sum "Nothing" []) = return Nothing
372 parseAdnot (Sum "Just" [x]) = Just <$> parseAdnot x
373 parseAdnot (Sum "Nothing" xs) =
374 Left ("Expected 0 arguments to Maybe, but found " ++ show (F.length xs))
375 parseAdnot (Sum "Just" xs) =
376 Left ("Expected 1 argument to Just, but found " ++ show (F.length xs))
377 parseAdnot (Sum t _) =
378 Left ("Expected tag \"Nothing\" or \"Just\", but found " ++ show t)
379 parseAdnot n =
380 Left ("Expected tagged value, but found " ++ niceType n)
381
382 instance (FromAdnot a, FromAdnot b) => FromAdnot (Either a b) where
383 parseAdnot (Sum "Left" [l]) = Left <$> parseAdnot l
384 parseAdnot (Sum "Right" [r]) = Right <$> parseAdnot r
385 parseAdnot (Sum "Left" xs) =
386 Left ("Expected 1 arguments to Maybe, but found " ++ show (F.length xs))
387 parseAdnot (Sum "Right" xs) =
388 Left ("Expected 1 argument to Just, but found " ++ show (F.length xs))
389 parseAdnot (Sum t _) =
390 Left ("Expected tag \"Left\" or \"Right\", but found " ++ show t)
391 parseAdnot n =
392 Left ("Expected tagged value, but found " ++ niceType n)
393
394 instance FromAdnot Bool where
395 parseAdnot (String "True") = return True
396 parseAdnot (String "False") = return False
397 parseAdnot (String t) =
398 Left ("Expected \"True\" or \"False\", but found " ++ show t)
399 parseAdnot n =
400 Left ("Expected string, but found " ++ niceType n)
401
402 -- mapping types
403
404 instance FromAdnot t => FromAdnot (MS.Map T.Text t) where
405 parseAdnot (Product as) =
406 traverse parseAdnot as
407 parseAdnot n = Left ("Expected product, found " ++ niceType n)
408
409
410 (.:) :: FromAdnot a => Product -> T.Text -> Parser a
411 p .: name
412 | Just x <- MS.lookup name p = parseAdnot x
413 | otherwise = Left ("Unable to look up " ++ show name ++ " in product")
99 import qualified Data.Map.Strict as M
1010 import Data.Monoid ((<>))
1111 import Data.Text (Text)
12 import Data.Text.Encoding (encodeUtf8)
12 import qualified Data.Text as T
13 import Data.Text.Encoding (encodeUtf8Builder)
1314 import qualified Data.Vector as V
1415
1516 import Data.Adnot.Type
1920
2021 buildValue :: Value -> Builder
2122 buildValue (Sum n vs)
22 | V.null vs = char7 '(' <> ident n <> char7 ')'
23 | V.null vs = char7 '(' <> buildString n <> char7 ')'
2324 | otherwise =
24 char7 '(' <> ident n <> char7 ' ' <> spaceSepArr vs <> char7 ')'
25 char7 '(' <> buildString n <> char7 ' ' <> spaceSepArr vs <> char7 ')'
2526 buildValue (Product ps) =
2627 char7 '{' <> buildPairs ps <> char7 '}'
2728 buildValue (List vs) =
2829 char7 '[' <> spaceSepArr vs <> char7 ']'
2930 buildValue (Integer i) = integerDec i
3031 buildValue (Double d) = doubleDec d
31 buildValue (Symbol t) = ident t
32 buildValue (String t) =
33 char7 '"' <> byteString (encodeUtf8 t) <> char7 '"'
32 buildValue (String t) = buildString t
33
34 buildString t
35 | isValidSymbol t = encodeUtf8Builder t
36 | otherwise = char7 '"' <> escape t <> char7 '"'
37
38 escape :: T.Text -> Builder
39 escape = T.foldr go mempty
40 where go '"' r = byteString "\\\"" <> r
41 go '\n' r = byteString "\\n" <> r
42 go '\\' r = byteString "\\\\" <> r
43 go c r = char7 c <> r
3444
3545 spaceSep :: [Builder] -> Builder
3646 spaceSep = mconcat . intersperse (char7 ' ')
3848 spaceSepArr :: Array -> Builder
3949 spaceSepArr = spaceSep . map buildValue . V.toList
4050
41 ident :: Text -> Builder
42 ident = byteString . encodeUtf8
43
4451 buildPairs :: Product -> Builder
4552 buildPairs ps = spaceSep [ go k v | (k, v) <- M.toList ps ]
46 where go k v = ident k <> char7 ' ' <> buildValue v
53 where go k v = buildString k <> char7 ' ' <> buildValue v
1414 decodeValue :: ByteString -> Either String Value
1515 decodeValue = parseOnly pVal
1616 where pVal = ws *> (pSum <|> pProd <|> pList <|> pLit)
17 pSum = Sum <$> (char '(' *> ws *> pIdent)
18 <*> (pValueList <* char ')')
17 pSum = Sum <$> (char '(' *> ws *> (pIdent <|> pString))
18 <*> (pValueList <* (ws *> char ')'))
1919 pProd = Product . M.fromList
2020 <$> (char '{' *> pProdBody <* ws <* char '}')
2121 pProdBody = many' pPair
22 pPair = (,) <$> (ws *> pIdent) <*> pVal
22 pPair = (,) <$> (ws *> (pIdent <|> pString)) <*> pVal
2323 pList = List <$> (char '[' *> pValueList <* ws <* char ']')
24 pLit = Symbol <$> pIdent
24 pLit = String <$> pIdent
2525 <|> String <$> pString
2626 <|> Integer <$> decimal
27 pStr = String <$> (pIdent <|> pString)
2728 pValueList = V.fromList <$> many' pVal
2829 pIdent = T.pack <$>
2930 ((:) <$> (letter_ascii <|> char '_')
11 {-# LANGUAGE DeriveDataTypeable #-}
22 {-# LANGUAGE BangPatterns #-}
33
4 module Data.Adnot.Type (Value(..), Array, Product) where
4 module Data.Adnot.Type (Value(..), Array, Product, isValidSymbol) where
55
66 import Control.DeepSeq (NFData(..))
7 import qualified Data.Char as C
78 import Data.Data (Data)
89 import Data.Typeable (Typeable)
910 import Data.Map.Strict (Map)
1011 import qualified Data.Map as M
1112 import Data.Text (Text)
13 import qualified Data.Text as T
1214 import Data.Vector (Vector)
1315 import GHC.Exts (IsString(..))
1416
1921 | List !Array
2022 | Integer !Integer
2123 | Double !Double
22 | Symbol !Text
2324 | String !Text
2425 deriving (Eq, Show, Read, Typeable, Data)
2526
2930 rnf (List as) = rnf as
3031 rnf (Integer i) = rnf i
3132 rnf (Double d) = rnf d
32 rnf (Symbol t) = rnf t
3333 rnf (String t) = rnf t
3434
3535 instance IsString Value where
3737
3838 type Array = Vector Value
3939 type Product = Map Text Value
40
41 isValidSymbol :: Text -> Bool
42 isValidSymbol t = case T.uncons t of
43 Nothing -> False
44 Just (x, xs) -> C.isAlpha x && T.all C.isAlphaNum xs
33 , Product
44 , decodeValue
55 , encodeValue
6 , module Data.Adnot.Class
67 ) where
78
9 import Data.Adnot.Class
810 import Data.Adnot.Emit
911 import Data.Adnot.Parse
1012 import Data.Adnot.Type
11 # Adnot
22
3 The *Adnot* format is a simple data and configuration format intended
4 to have a slightly enriched data model when compared to JSON or
5 s-expressions but still retain the comparative simplicity of those
6 formats. Unlike JSON, Adnot chooses to avoid redundant structural
7 information like punctuation; unlike s-expressions, Adnot values
8 natively express a wider range of basic data types.
3 **WARNING**: this repo contains unrepentant bikeshedding and wheel-reinvention. You almost definitely shouldn't use it, and it's probably best to disregard the entire thing!
94
10 *Adnot* is not intended to be a data interchange format, but rather to
11 be a richer and more convenient syntax for certain kinds of data
12 description that might otherwise be done in more unwieldy formats like
13 YAML. As a first approximation, Adnot may be treated as a more human-
14 and version-control-friendly version of JSON whose data model is
15 intended to resemble the data model of statically typed functional
16 programming languages.
5 The *Adnot* format is a simple data and configuration format intended to have a slightly enriched data model when compared to JSON or s-expressions but still retain the comparative simplicity of those formats. Unlike JSON, Adnot chooses to avoid redundant structural information like punctuation; unlike s-expressions, Adnot values natively express a wider range of basic data types.
176
18 A given Adnot value is either one of four basic types—an integer, a
19 double, a string, or an identifier—or one of three composite types: a
20 sequence of values, a mapping of symbols to values, or a tagged
21 sequence of values which begins with a symbol:
7 *Adnot* is not intended to be a data interchange format, but rather to be a richer and more convenient syntax for certain kinds of data description that might otherwise be done in more unwieldy, complicated formats like YAML. As a first approximation, Adnot may be treated as a more human- and version-control-friendly version of JSON whose data model is intended to resemble the data model of statically typed functional programming languages.
8
9 A given Adnot value is either one of three basic types—an integer, a double, a string—or one of three composite types: a sequence of values, a mapping of symbols to values, or a tagged sequence of values which begins with a symbol:
2210
2311 ```
24 expr ::= "{" (symbol expr) * "}"
25 | "(" symbol expr* ")"
12 expr ::= "{" (string expr) * "}"
13 | "(" string expr* ")"
2614 | "[" expr* "]"
2715 | string
28 | symbol
2916 | integer
3017 | double
3118 ```
3219
33 Strings are understood in the same way as JSON strings, with the same
34 encoding and the same set of escapes. Symbols are unquoted strings
35 that start with a Unicode character with the `XID_Start` and continue
36 with the `XID_Continue` characters, and thus should resemble the
37 identifier syntax for a large number of C-like languages.
20 Strings can be expressed in two different ways: one is quoted strings, which are formatted like JSON strings with the same encoding and the same set of escape sequences; the other is as bare words, in which strings that begin with a character of unicode class `XID_Start` and consist subsequently of zero or more `XID_Continue` characters can be written without quotation marks.
3821
39 The three kinds of composite types are meant to resemble records, sum
40 or variant types, and lists, respectively. Zero or more
41 symbol-expression pairs inside curly brackets form a _map_:
22 The three kinds of composite types are meant to resemble records, sum or variant types, and lists, respectively. Zero or more symbol-expression pairs inside curly brackets form a _mapping_:
4223
4324 ```
44 # a basic map
25 # a basic mapping
4526 {
4627 x 2
4728 y 3
48 z 4
29 "and z" 4
4930 }
5031 ```
5132
52 Pairs do not include colons and are not separated by commas. A map
53 _must_ contain an even number of sub-expressions, and every odd
54 subexpression _must_ be a symbol. (This restriction might be lifted in
55 the future?) Whitespace is ignored except as a separator between
56 tokens, so the above map is identical to
33 Pairs do not include colons and are not separated by commas. A mapping _must_ contain an even number of sub-expressions, and every odd subexpression _must_ be a string. Whitespace is ignored except as a separator between tokens, so the above map is identical to
5734
5835 ```
59 {x 2 y 3 z 4}
36 {x 2 y 3 "and z" 4}
6037 ```
6138
62 A _list_ is represented by square brackets with zero or more
63 possibly-heterogeneous expressions:
39 A _list_ is represented by square brackets with zero or more possibly-heterogeneous expressions:
6440
6541 ```
6642 # a basic list
6743 [ 2 "foo" bar ]
6844 ```
6945
70 A _tagged expression_ is represented by parentheses with a single
71 symbol followed by zero or more possibly-heterogeneous expressions:
46 A _tagged expression_ is represented by parentheses with a single string followed by zero or more possibly-heterogeneous expressions:
7247
7348 ```
7449 # a basic tagged expression
7550 (some_tag blah 7.8 "??")
7651 ```
7752
78 These are how tagged data-types are traditionally represented: because
79 the thing inside the parens _must_ be a symbol, it can correspond to a
80 data type in an ML-like language.
53 These are how tagged data-types are represented: because the thing inside the parens _must_ be a string, it can correspond to a data type in an ML-like language.
54
55 Adnot values can contain comments, which are line-oriented and begin with a `#` character.