Change Adnot language slightly + add FromAdnot parsing
Getty Ritter
7 years ago
12 | 12 | |
13 | 13 | import Data.Adnot.Type |
14 | 14 | import Data.Adnot.Emit |
15 | import Data.Adnot.Parse | |
15 | 16 | import Data.Int |
16 | 17 | import Data.Word |
18 | import qualified Data.ByteString as BS | |
17 | 19 | import qualified Data.ByteString.Lazy as BSL |
18 | 20 | import qualified Data.Foldable as F |
19 | 21 | import qualified Data.List.NonEmpty as NE |
168 | 170 | instance ToAdnot a => ToAdnot (MS.Map T.Text a) where |
169 | 171 | toAdnot ls = Product (fmap toAdnot ls) |
170 | 172 | |
173 | product :: [(T.Text, Value)] -> Value | |
174 | product = Product . MS.fromList | |
175 | ||
176 | (.=) :: ToAdnot t => T.Text -> t -> (T.Text, Value) | |
177 | key .= val = (key, toAdnot val) | |
178 | ||
171 | 179 | -- * Tuples |
172 | 180 | instance ToAdnot () where |
173 | 181 | toAdnot () = List [] |
192 | 200 | toAdnot (Right y) = Sum "Right" [toAdnot y] |
193 | 201 | |
194 | 202 | instance ToAdnot Bool where |
195 | toAdnot True = Symbol "True" | |
196 | toAdnot False = Symbol "False" | |
203 | toAdnot True = String "True" | |
204 | toAdnot False = String "False" | |
197 | 205 | |
198 | 206 | -- * Parsing |
199 | 207 | |
200 | 208 | type ParseError = String |
201 | 209 | type Parser a = Either ParseError a |
210 | ||
211 | niceType :: Value -> String | |
212 | niceType Sum {} = "sum" | |
213 | niceType Product {} = "product" | |
214 | niceType List {} = "list" | |
215 | niceType Integer {} = "integer" | |
216 | niceType Double {} = "double" | |
217 | niceType String {} = "string" | |
202 | 218 | |
203 | 219 | withSum :: String -> (T.Text -> Array -> Parser a) -> Value -> Parser a |
204 | 220 | withSum n k val = case val of |
205 | 221 | Sum t as -> k t as |
206 | 222 | _ -> Left ("Expected sum in " ++ n) |
207 | 223 | |
224 | withProd :: String -> (Product -> Parser a) -> Value -> Parser a | |
225 | withProd n k val = case val of | |
226 | Product as -> k as | |
227 | _ -> Left ("Expected product in " ++ n) | |
228 | ||
229 | withList :: String -> (Array -> Parser a) -> Value -> Parser a | |
230 | withList n k val = case val of | |
231 | List as -> k as | |
232 | _ -> Left ("Expected list in " ++ n) | |
233 | ||
234 | withString :: String -> (T.Text -> Parser a) -> Value -> Parser a | |
235 | withString n k val = case val of | |
236 | String t -> case k t of | |
237 | Right x -> Right x | |
238 | Left msg -> Left (msg ++ " in " ++ n) | |
239 | _ -> Left ("Expected string in " ++ n) | |
240 | ||
241 | decode :: FromAdnot a => BS.ByteString -> Maybe a | |
242 | decode x = case decodeValue x of | |
243 | Left _ -> Nothing | |
244 | Right y -> case parseAdnot y of | |
245 | Left _ -> Nothing | |
246 | Right z -> Just z | |
247 | ||
248 | decodeEither :: FromAdnot a => BS.ByteString -> Either String a | |
249 | decodeEither x = do | |
250 | y <- decodeValue x | |
251 | parseAdnot y | |
252 | ||
208 | 253 | class FromAdnot a where |
209 | 254 | parseAdnot :: Value -> Parser a |
255 | ||
256 | instance FromAdnot Value where | |
257 | parseAdnot v = return v | |
258 | ||
259 | instance FromAdnot Int where | |
260 | parseAdnot (Integer n) = return (fromIntegral n) | |
261 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
262 | ||
263 | instance FromAdnot Integer where | |
264 | parseAdnot (Integer n) = return n | |
265 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
266 | ||
267 | instance FromAdnot Int8 where | |
268 | parseAdnot (Integer n) = return (fromIntegral n) | |
269 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
270 | ||
271 | instance FromAdnot Int16 where | |
272 | parseAdnot (Integer n) = return (fromIntegral n) | |
273 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
274 | ||
275 | instance FromAdnot Int32 where | |
276 | parseAdnot (Integer n) = return (fromIntegral n) | |
277 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
278 | ||
279 | instance FromAdnot Int64 where | |
280 | parseAdnot (Integer n) = return (fromIntegral n) | |
281 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
282 | ||
283 | instance FromAdnot Word8 where | |
284 | parseAdnot (Integer n) = return (fromIntegral n) | |
285 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
286 | ||
287 | instance FromAdnot Word16 where | |
288 | parseAdnot (Integer n) = return (fromIntegral n) | |
289 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
290 | ||
291 | instance FromAdnot Word32 where | |
292 | parseAdnot (Integer n) = return (fromIntegral n) | |
293 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
294 | ||
295 | instance FromAdnot Word64 where | |
296 | parseAdnot (Integer n) = return (fromIntegral n) | |
297 | parseAdnot n = Left ("Expected integer, found " ++ niceType n) | |
298 | ||
299 | -- Rational/Floating types | |
300 | ||
301 | instance FromAdnot Double where | |
302 | parseAdnot (Double d) = return d | |
303 | parseAdnot n = Left ("Expected double, found " ++ niceType n) | |
304 | ||
305 | instance FromAdnot Float where | |
306 | parseAdnot (Double d) = return (fromRational (toRational d)) | |
307 | parseAdnot n = Left ("Expected double, found " ++ niceType n) | |
308 | ||
309 | -- String types | |
310 | ||
311 | instance FromAdnot T.Text where | |
312 | parseAdnot (String t) = return t | |
313 | parseAdnot n = Left ("Expected string, found " ++ niceType n) | |
314 | ||
315 | instance FromAdnot TL.Text where | |
316 | parseAdnot (String t) = return (TL.fromStrict t) | |
317 | parseAdnot n = Left ("Expected string, found " ++ niceType n) | |
318 | ||
319 | instance {-# INCOHERENT #-} FromAdnot String where | |
320 | parseAdnot (String t) = return (T.unpack t) | |
321 | parseAdnot n = Left ("Expected string, found " ++ niceType n) | |
322 | ||
323 | -- sequence types | |
324 | ||
325 | instance FromAdnot t => FromAdnot [t] where | |
326 | parseAdnot (List ts) = | |
327 | fmap (V.toList) (traverse parseAdnot ts) | |
328 | parseAdnot n = Left ("Expected list, found " ++ niceType n) | |
329 | ||
330 | instance FromAdnot t => FromAdnot (V.Vector t) where | |
331 | parseAdnot (List ts) = traverse parseAdnot ts | |
332 | parseAdnot n = Left ("Expected list, found " ++ niceType n) | |
333 | ||
334 | instance FromAdnot t => FromAdnot (Seq.Seq t) where | |
335 | parseAdnot (List ts) = | |
336 | fmap (Seq.fromList . V.toList) (traverse parseAdnot ts) | |
337 | parseAdnot n = Left ("Expected list, found " ++ niceType n) | |
338 | ||
339 | instance FromAdnot t => FromAdnot (NE.NonEmpty t) where | |
340 | parseAdnot (List ts) = | |
341 | fmap (NE.fromList . V.toList) (traverse parseAdnot ts) | |
342 | parseAdnot n = Left ("Expected list, found " ++ niceType n) | |
343 | ||
344 | -- tuples | |
345 | ||
346 | instance FromAdnot () where | |
347 | parseAdnot (List []) = return () | |
348 | parseAdnot n = Left ("Expected list of length 0, found " ++ niceType n) | |
349 | ||
350 | instance (FromAdnot a, FromAdnot b) => FromAdnot (a, b) where | |
351 | parseAdnot (List [a, b]) = (,) <$> parseAdnot a <*> parseAdnot b | |
352 | parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n) | |
353 | ||
354 | instance (FromAdnot a, FromAdnot b, FromAdnot c) => FromAdnot (a, b, c) where | |
355 | parseAdnot (List [a, b, c]) = | |
356 | (,,) <$> parseAdnot a | |
357 | <*> parseAdnot b | |
358 | <*> parseAdnot c | |
359 | parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n) | |
360 | ||
361 | instance (FromAdnot a, FromAdnot b, FromAdnot c, FromAdnot d) | |
362 | => FromAdnot (a, b, c, d) where | |
363 | parseAdnot (List [a, b, c, d]) = | |
364 | (,,,) <$> parseAdnot a | |
365 | <*> parseAdnot b | |
366 | <*> parseAdnot c | |
367 | <*> parseAdnot d | |
368 | parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n) | |
369 | ||
370 | instance FromAdnot a => FromAdnot (Maybe a) where | |
371 | parseAdnot (Sum "Nothing" []) = return Nothing | |
372 | parseAdnot (Sum "Just" [x]) = Just <$> parseAdnot x | |
373 | parseAdnot (Sum "Nothing" xs) = | |
374 | Left ("Expected 0 arguments to Maybe, but found " ++ show (F.length xs)) | |
375 | parseAdnot (Sum "Just" xs) = | |
376 | Left ("Expected 1 argument to Just, but found " ++ show (F.length xs)) | |
377 | parseAdnot (Sum t _) = | |
378 | Left ("Expected tag \"Nothing\" or \"Just\", but found " ++ show t) | |
379 | parseAdnot n = | |
380 | Left ("Expected tagged value, but found " ++ niceType n) | |
381 | ||
382 | instance (FromAdnot a, FromAdnot b) => FromAdnot (Either a b) where | |
383 | parseAdnot (Sum "Left" [l]) = Left <$> parseAdnot l | |
384 | parseAdnot (Sum "Right" [r]) = Right <$> parseAdnot r | |
385 | parseAdnot (Sum "Left" xs) = | |
386 | Left ("Expected 1 arguments to Maybe, but found " ++ show (F.length xs)) | |
387 | parseAdnot (Sum "Right" xs) = | |
388 | Left ("Expected 1 argument to Just, but found " ++ show (F.length xs)) | |
389 | parseAdnot (Sum t _) = | |
390 | Left ("Expected tag \"Left\" or \"Right\", but found " ++ show t) | |
391 | parseAdnot n = | |
392 | Left ("Expected tagged value, but found " ++ niceType n) | |
393 | ||
394 | instance FromAdnot Bool where | |
395 | parseAdnot (String "True") = return True | |
396 | parseAdnot (String "False") = return False | |
397 | parseAdnot (String t) = | |
398 | Left ("Expected \"True\" or \"False\", but found " ++ show t) | |
399 | parseAdnot n = | |
400 | Left ("Expected string, but found " ++ niceType n) | |
401 | ||
402 | -- mapping types | |
403 | ||
404 | instance FromAdnot t => FromAdnot (MS.Map T.Text t) where | |
405 | parseAdnot (Product as) = | |
406 | traverse parseAdnot as | |
407 | parseAdnot n = Left ("Expected product, found " ++ niceType n) | |
408 | ||
409 | ||
410 | (.:) :: FromAdnot a => Product -> T.Text -> Parser a | |
411 | p .: name | |
412 | | Just x <- MS.lookup name p = parseAdnot x | |
413 | | otherwise = Left ("Unable to look up " ++ show name ++ " in product") |
9 | 9 | import qualified Data.Map.Strict as M |
10 | 10 | import Data.Monoid ((<>)) |
11 | 11 | import Data.Text (Text) |
12 |
import |
|
12 | import qualified Data.Text as T | |
13 | import Data.Text.Encoding (encodeUtf8Builder) | |
13 | 14 | import qualified Data.Vector as V |
14 | 15 | |
15 | 16 | import Data.Adnot.Type |
19 | 20 | |
20 | 21 | buildValue :: Value -> Builder |
21 | 22 | buildValue (Sum n vs) |
22 |
| V.null vs = char7 '(' <> |
|
23 | | V.null vs = char7 '(' <> buildString n <> char7 ')' | |
23 | 24 | | otherwise = |
24 |
char7 '(' <> |
|
25 | char7 '(' <> buildString n <> char7 ' ' <> spaceSepArr vs <> char7 ')' | |
25 | 26 | buildValue (Product ps) = |
26 | 27 | char7 '{' <> buildPairs ps <> char7 '}' |
27 | 28 | buildValue (List vs) = |
28 | 29 | char7 '[' <> spaceSepArr vs <> char7 ']' |
29 | 30 | buildValue (Integer i) = integerDec i |
30 | 31 | buildValue (Double d) = doubleDec d |
31 | buildValue (Symbol t) = ident t | |
32 | buildValue (String t) = | |
33 | char7 '"' <> byteString (encodeUtf8 t) <> char7 '"' | |
32 | buildValue (String t) = buildString t | |
33 | ||
34 | buildString t | |
35 | | isValidSymbol t = encodeUtf8Builder t | |
36 | | otherwise = char7 '"' <> escape t <> char7 '"' | |
37 | ||
38 | escape :: T.Text -> Builder | |
39 | escape = T.foldr go mempty | |
40 | where go '"' r = byteString "\\\"" <> r | |
41 | go '\n' r = byteString "\\n" <> r | |
42 | go '\\' r = byteString "\\\\" <> r | |
43 | go c r = char7 c <> r | |
34 | 44 | |
35 | 45 | spaceSep :: [Builder] -> Builder |
36 | 46 | spaceSep = mconcat . intersperse (char7 ' ') |
38 | 48 | spaceSepArr :: Array -> Builder |
39 | 49 | spaceSepArr = spaceSep . map buildValue . V.toList |
40 | 50 | |
41 | ident :: Text -> Builder | |
42 | ident = byteString . encodeUtf8 | |
43 | ||
44 | 51 | buildPairs :: Product -> Builder |
45 | 52 | buildPairs ps = spaceSep [ go k v | (k, v) <- M.toList ps ] |
46 |
where go k v = |
|
53 | where go k v = buildString k <> char7 ' ' <> buildValue v |
14 | 14 | decodeValue :: ByteString -> Either String Value |
15 | 15 | decodeValue = parseOnly pVal |
16 | 16 | where pVal = ws *> (pSum <|> pProd <|> pList <|> pLit) |
17 | pSum = Sum <$> (char '(' *> ws *> pIdent) | |
18 | <*> (pValueList <* char ')') | |
17 | pSum = Sum <$> (char '(' *> ws *> (pIdent <|> pString)) | |
18 | <*> (pValueList <* (ws *> char ')')) | |
19 | 19 | pProd = Product . M.fromList |
20 | 20 | <$> (char '{' *> pProdBody <* ws <* char '}') |
21 | 21 | pProdBody = many' pPair |
22 |
pPair = (,) <$> (ws *> |
|
22 | pPair = (,) <$> (ws *> (pIdent <|> pString)) <*> pVal | |
23 | 23 | pList = List <$> (char '[' *> pValueList <* ws <* char ']') |
24 |
pLit = S |
|
24 | pLit = String <$> pIdent | |
25 | 25 | <|> String <$> pString |
26 | 26 | <|> Integer <$> decimal |
27 | pStr = String <$> (pIdent <|> pString) | |
27 | 28 | pValueList = V.fromList <$> many' pVal |
28 | 29 | pIdent = T.pack <$> |
29 | 30 | ((:) <$> (letter_ascii <|> char '_') |
1 | 1 | {-# LANGUAGE DeriveDataTypeable #-} |
2 | 2 | {-# LANGUAGE BangPatterns #-} |
3 | 3 | |
4 |
module Data.Adnot.Type (Value(..), Array, Product |
|
4 | module Data.Adnot.Type (Value(..), Array, Product, isValidSymbol) where | |
5 | 5 | |
6 | 6 | import Control.DeepSeq (NFData(..)) |
7 | import qualified Data.Char as C | |
7 | 8 | import Data.Data (Data) |
8 | 9 | import Data.Typeable (Typeable) |
9 | 10 | import Data.Map.Strict (Map) |
10 | 11 | import qualified Data.Map as M |
11 | 12 | import Data.Text (Text) |
13 | import qualified Data.Text as T | |
12 | 14 | import Data.Vector (Vector) |
13 | 15 | import GHC.Exts (IsString(..)) |
14 | 16 | |
19 | 21 | | List !Array |
20 | 22 | | Integer !Integer |
21 | 23 | | Double !Double |
22 | | Symbol !Text | |
23 | 24 | | String !Text |
24 | 25 | deriving (Eq, Show, Read, Typeable, Data) |
25 | 26 | |
29 | 30 | rnf (List as) = rnf as |
30 | 31 | rnf (Integer i) = rnf i |
31 | 32 | rnf (Double d) = rnf d |
32 | rnf (Symbol t) = rnf t | |
33 | 33 | rnf (String t) = rnf t |
34 | 34 | |
35 | 35 | instance IsString Value where |
37 | 37 | |
38 | 38 | type Array = Vector Value |
39 | 39 | type Product = Map Text Value |
40 | ||
41 | isValidSymbol :: Text -> Bool | |
42 | isValidSymbol t = case T.uncons t of | |
43 | Nothing -> False | |
44 | Just (x, xs) -> C.isAlpha x && T.all C.isAlphaNum xs |
3 | 3 | , Product |
4 | 4 | , decodeValue |
5 | 5 | , encodeValue |
6 | , module Data.Adnot.Class | |
6 | 7 | ) where |
7 | 8 | |
9 | import Data.Adnot.Class | |
8 | 10 | import Data.Adnot.Emit |
9 | 11 | import Data.Adnot.Parse |
10 | 12 | import Data.Adnot.Type |
1 | 1 | # Adnot |
2 | 2 | |
3 | The *Adnot* format is a simple data and configuration format intended | |
4 | to have a slightly enriched data model when compared to JSON or | |
5 | s-expressions but still retain the comparative simplicity of those | |
6 | formats. Unlike JSON, Adnot chooses to avoid redundant structural | |
7 | information like punctuation; unlike s-expressions, Adnot values | |
8 | natively express a wider range of basic data types. | |
3 | **WARNING**: this repo contains unrepentant bikeshedding and wheel-reinvention. You almost definitely shouldn't use it, and it's probably best to disregard the entire thing! | |
9 | 4 | |
10 | *Adnot* is not intended to be a data interchange format, but rather to | |
11 | be a richer and more convenient syntax for certain kinds of data | |
12 | description that might otherwise be done in more unwieldy formats like | |
13 | YAML. As a first approximation, Adnot may be treated as a more human- | |
14 | and version-control-friendly version of JSON whose data model is | |
15 | intended to resemble the data model of statically typed functional | |
16 |
|
|
5 | The *Adnot* format is a simple data and configuration format intended to have a slightly enriched data model when compared to JSON or s-expressions but still retain the comparative simplicity of those formats. Unlike JSON, Adnot chooses to avoid redundant structural information like punctuation; unlike s-expressions, Adnot values natively express a wider range of basic data types. | |
17 | 6 | |
18 | A given Adnot value is either one of four basic types—an integer, a | |
19 | double, a string, or an identifier—or one of three composite types: a | |
20 | sequence of values, a mapping of symbols to values, or a tagged | |
21 | sequence of values which begins with a symbol: | |
7 | *Adnot* is not intended to be a data interchange format, but rather to be a richer and more convenient syntax for certain kinds of data description that might otherwise be done in more unwieldy, complicated formats like YAML. As a first approximation, Adnot may be treated as a more human- and version-control-friendly version of JSON whose data model is intended to resemble the data model of statically typed functional programming languages. | |
8 | ||
9 | A given Adnot value is either one of three basic types—an integer, a double, a string—or one of three composite types: a sequence of values, a mapping of symbols to values, or a tagged sequence of values which begins with a symbol: | |
22 | 10 | |
23 | 11 | ``` |
24 | expr ::= "{" (symbol expr) * "}" | |
25 | | "(" symbol expr* ")" | |
12 | expr ::= "{" (string expr) * "}" | |
13 | | "(" string expr* ")" | |
26 | 14 | | "[" expr* "]" |
27 | 15 | | string |
28 | | symbol | |
29 | 16 | | integer |
30 | 17 | | double |
31 | 18 | ``` |
32 | 19 | |
33 | Strings are understood in the same way as JSON strings, with the same | |
34 | encoding and the same set of escapes. Symbols are unquoted strings | |
35 | that start with a Unicode character with the `XID_Start` and continue | |
36 | with the `XID_Continue` characters, and thus should resemble the | |
37 |
|
|
20 | Strings can be expressed in two different ways: one is quoted strings, which are formatted like JSON strings with the same encoding and the same set of escape sequences; the other is as bare words, in which strings that begin with a character of unicode class `XID_Start` and consist subsequently of zero or more `XID_Continue` characters can be written without quotation marks. | |
38 | 21 | |
39 | The three kinds of composite types are meant to resemble records, sum | |
40 | or variant types, and lists, respectively. Zero or more | |
41 |
|
|
22 | The three kinds of composite types are meant to resemble records, sum or variant types, and lists, respectively. Zero or more symbol-expression pairs inside curly brackets form a _mapping_: | |
42 | 23 | |
43 | 24 | ``` |
44 |
# a basic map |
|
25 | # a basic mapping | |
45 | 26 | { |
46 | 27 | x 2 |
47 | 28 | y 3 |
48 |
|
|
29 | "and z" 4 | |
49 | 30 | } |
50 | 31 | ``` |
51 | 32 | |
52 | Pairs do not include colons and are not separated by commas. A map | |
53 | _must_ contain an even number of sub-expressions, and every odd | |
54 | subexpression _must_ be a symbol. (This restriction might be lifted in | |
55 | the future?) Whitespace is ignored except as a separator between | |
56 |
|
|
33 | Pairs do not include colons and are not separated by commas. A mapping _must_ contain an even number of sub-expressions, and every odd subexpression _must_ be a string. Whitespace is ignored except as a separator between tokens, so the above map is identical to | |
57 | 34 | |
58 | 35 | ``` |
59 |
{x 2 y 3 |
|
36 | {x 2 y 3 "and z" 4} | |
60 | 37 | ``` |
61 | 38 | |
62 | A _list_ is represented by square brackets with zero or more | |
63 | possibly-heterogeneous expressions: | |
39 | A _list_ is represented by square brackets with zero or more possibly-heterogeneous expressions: | |
64 | 40 | |
65 | 41 | ``` |
66 | 42 | # a basic list |
67 | 43 | [ 2 "foo" bar ] |
68 | 44 | ``` |
69 | 45 | |
70 | A _tagged expression_ is represented by parentheses with a single | |
71 | symbol followed by zero or more possibly-heterogeneous expressions: | |
46 | A _tagged expression_ is represented by parentheses with a single string followed by zero or more possibly-heterogeneous expressions: | |
72 | 47 | |
73 | 48 | ``` |
74 | 49 | # a basic tagged expression |
75 | 50 | (some_tag blah 7.8 "??") |
76 | 51 | ``` |
77 | 52 | |
78 | These are how tagged data-types are traditionally represented: because | |
79 | the thing inside the parens _must_ be a symbol, it can correspond to a | |
80 |
|
|
53 | These are how tagged data-types are represented: because the thing inside the parens _must_ be a string, it can correspond to a data type in an ML-like language. | |
54 | ||
55 | Adnot values can contain comments, which are line-oriented and begin with a `#` character. |