Working parser for the non-executable subset of Virgil; unfinished evaluator, basically no type-checking to speak of
Getty Ritter
8 years ago
3 | 3 | module Language.Vergilius.AST where |
4 | 4 | |
5 | 5 | import Data.ByteString.Lazy (ByteString) |
6 |
import |
|
6 | import Data.HashMap.Strict (HashMap, fromList) | |
7 | import qualified Data.HashMap.Strict as HM | |
7 | 8 | import Data.Scientific (Scientific) |
8 | 9 | import Data.Text (Text) |
9 |
import |
|
10 | import Data.Vector (Vector) | |
11 | import qualified Data.Vector as V | |
10 | 12 | |
11 | 13 | type Ident = ByteString |
12 | 14 | |
15 | -- | A top-level declaration. | |
13 | 16 | data Decl |
14 | 17 | = Fragment ChValue |
15 | 18 | | DFun FunDec |
18 | 21 | |
19 | 22 | type ChObject = HashMap Text ChValue |
20 | 23 | type ChArray = Vector ChValue |
24 | ||
25 | mkArray :: [ChValue] -> ChValue | |
26 | mkArray = ChArray . V.fromList | |
27 | ||
28 | mkObject :: [(Text, ChValue)] -> ChValue | |
29 | mkObject = ChObject . HM.fromList | |
21 | 30 | |
22 | 31 | data ChValue |
23 | 32 | = ChObject ChObject |
83 | 92 | | EVar Ident |
84 | 93 | | ETup [Expr] |
85 | 94 | | EList [Expr] |
86 |
| ERecord [(Ident, |
|
95 | | ERecord [(Ident, Expr)] | |
87 | 96 | | ELam [FnCase] |
88 | 97 | | ETyped Expr Typ |
89 | 98 | | EAccess Expr Ident |
3 | 3 | |
4 | 4 | module Language.Vergilius.Eval where |
5 | 5 | |
6 | import qualified Data.Aeson as Ae | |
6 | 7 | import Data.HashMap.Strict (HashMap) |
7 | 8 | import qualified Data.HashMap.Strict as HM |
8 | 9 | |
9 |
import Language. |
|
10 | import Language.Vergilius.AST | |
10 | 11 | |
12 | evalCh :: ChValue -> Either String Ae.Value | |
13 | evalCh (ChObject obj) = Ae.Object `fmap` mapM evalCh obj | |
14 | evalCh (ChArray arr) = Ae.Array `fmap` mapM evalCh arr | |
15 | evalCh (ChString t) = pure (Ae.String t) | |
16 | evalCh (ChNumber n) = pure (Ae.Number n) | |
17 | evalCh (ChBool b) = pure (Ae.Bool b) | |
18 | evalCh ChNull = pure Ae.Null | |
19 | evalCh (ChQuote _) = Left "no evaluation yet" | |
20 | ||
21 | -- | Match a pattern against a value | |
11 | 22 | match :: Pattern -> Value -> Maybe [(Ident, Value)] |
12 | 23 | match (PConstr i p) (VConstr i' v) |
13 | 24 | | i == i' = match p v |
1 | { | |
2 | {-# OPTIONS -w #-} | |
3 | {-# LANGUAGE OverloadedStrings #-} | |
4 | ||
5 | module Language.Vergilius.Lexer where | |
6 | ||
7 | import Control.Monad (liftM) | |
8 | import Data.Text (Text) | |
9 | import qualified Data.Text as T | |
10 | ||
11 | import Prelude hiding (lex) | |
12 | } | |
13 | ||
14 | %wrapper "monadUserState" | |
15 | ||
16 | $digit = 0-9 | |
17 | $alpha = [A-Za-z] | |
18 | ||
19 | tokens :- | |
20 | $white+ ; | |
21 | "#".* ; | |
22 | ||
23 | $digit+ { lex (TkInt . read . T.unpack) } | |
24 | \"[^\"]*\" { lex strLiteral } -- this is for syntax highlighting -> " | |
25 | ||
26 | true { lex' (TkKw KwTrue) } | |
27 | false { lex' (TkKw KwFalse) } | |
28 | null { lex' (TkKw KwNull) } | |
29 | ||
30 | \{ { lex' TkLCurl } | |
31 | \} { lex' TkRCurl } | |
32 | \[ { lex' TkLBrac } | |
33 | \] { lex' TkRBrac } | |
34 | ||
35 | { | |
36 | data Token = Token AlexPosn TkType deriving (Eq, Show) | |
37 | ||
38 | data KwType | |
39 | = KwTrue | |
40 | | KwFalse | |
41 | | KwNull | |
42 | deriving (Eq, Show) | |
43 | ||
44 | data TkType | |
45 | = TkInt Integer | |
46 | | TkStr Text | |
47 | | TkKw KwType | |
48 | | TkLCurl | |
49 | | TkRCurl | |
50 | | TkLBrac | |
51 | | TkRBrac | |
52 | | TkEOF | |
53 | deriving (Eq, Show) | |
54 | ||
55 | strLiteral :: Text -> TkType | |
56 | strLiteral = TkStr . T.drop 1 . T.dropEnd 1 | |
57 | ||
58 | data AlexUserState = AlexUserState | |
59 | { filePath :: FilePath | |
60 | } deriving (Eq, Show) | |
61 | ||
62 | alexInitUserState :: AlexUserState | |
63 | alexInitUserState = AlexUserState "<unknown>" | |
64 | ||
65 | getFilePath :: Alex FilePath | |
66 | getFilePath = liftM filePath alexGetUserState | |
67 | ||
68 | setFilePath :: FilePath -> Alex () | |
69 | setFilePath = alexSetUserState . AlexUserState | |
70 | ||
71 | alexMonadScan' :: Alex Token | |
72 | alexMonadScan' = do | |
73 | inp <- alexGetInput | |
74 | sc <- alexGetStartCode | |
75 | case alexScan inp sc of | |
76 | AlexEOF -> alexEOF | |
77 | AlexError (p, _, _, s) -> | |
78 | alexError' p ("lexical error at character '" ++ take 1 s ++ "'") | |
79 | AlexSkip inp' len -> do | |
80 | alexSetInput inp' | |
81 | alexMonadScan' | |
82 | AlexToken inp' len action -> do | |
83 | alexSetInput inp' | |
84 | action (ignorePendingBytes inp) len | |
85 | ||
86 | alexError' :: AlexPosn -> String -> Alex a | |
87 | alexError' (AlexPn _ l c) msg = do | |
88 | fp <- getFilePath | |
89 | alexError (fp ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg) | |
90 | ||
91 | alexEOF :: Alex Token | |
92 | alexEOF = do | |
93 | (p,_,_,_) <- alexGetInput | |
94 | return (Token p TkEOF) | |
95 | ||
96 | ||
97 | lex :: (Text -> TkType) -> AlexAction Token | |
98 | lex f = \(p,_,_,s) i -> return (Token p (f (T.pack (take i s)))) | |
99 | ||
100 | lex' :: TkType -> AlexAction Token | |
101 | lex' = lex . const | |
102 | ||
103 | runAlex' :: Alex a -> FilePath -> String -> Either String a | |
104 | runAlex' mote fp input = runAlex input (setFilePath fp >> mote) | |
105 | } |
1 | { | |
2 | {-# LANGUAGE OverloadedStrings #-} | |
3 | ||
4 | module Language.Vergilius.Parser where | |
5 | ||
6 | import Language.Vergilius.AST | |
7 | import Language.Vergilius.Lexer | |
8 | ||
9 | } | |
10 | ||
11 | %name parse | |
12 | %tokentype { Token } | |
13 | %monad { Alex } | |
14 | %lexer { lexwrap } { Token _ TkEOF } | |
15 | %error { happyError } | |
16 | ||
17 | %token | |
18 | '{' { Token _ TkLCurl } | |
19 | '}' { Token _ TkRCurl } | |
20 | '[' { Token _ TkLBrac } | |
21 | ']' { Token _ TkRBrac } | |
22 | ||
23 | true { Token _ (TkKw KwTrue) } | |
24 | false { Token _ (TkKw KwFalse) } | |
25 | null { Token _ (TkKw KwNull) } | |
26 | ||
27 | int { Token _ (TkInt $$) } | |
28 | str { Token _ (TkStr $$) } | |
29 | ||
30 | %% | |
31 | ||
32 | tlexpr | |
33 | : '[' list { mkArray $2 } | |
34 | | '{' dict { mkObject $2 } | |
35 | ||
36 | expr | |
37 | : tlexpr { $1 } | |
38 | | int { ChNumber (fromIntegral $1) } | |
39 | | str { ChString $1 } | |
40 | | true { ChBool True } | |
41 | | false { ChBool False} | |
42 | | null { ChNull } | |
43 | ||
44 | list | |
45 | : ']' { [] } | |
46 | | expr list { $1 : $2 } | |
47 | ||
48 | dict | |
49 | : '}' { [] } | |
50 | | str expr dict { ($1, $2) : $3 } | |
51 | ||
52 | { | |
53 | ||
54 | -- info :: a -> Alex (Info a) | |
55 | -- info x = do | |
56 | -- (AlexPn _ ln cl,_,_,_) <- alexGetInput | |
57 | -- fp <- getFilePath | |
58 | -- return $ Info { node = x | |
59 | -- , srcLine = ln | |
60 | -- , srcCol = cl | |
61 | -- , srcFile = fp | |
62 | -- } | |
63 | ||
64 | lexwrap :: (Token -> Alex a) -> Alex a | |
65 | lexwrap = (alexMonadScan' >>=) | |
66 | ||
67 | happyError :: Token -> Alex a | |
68 | happyError (Token p t) = | |
69 | alexError' p ("parse error at token " ++ show t) | |
70 | ||
71 | parseFile :: FilePath -> String -> Either String ChValue | |
72 | parseFile = runAlex' parse | |
73 | } |
1 | module Language.Vergilius.Pretty where | |
2 | ||
3 | import Data.Monoid ((<>)) | |
4 | import Text.PrettyPrint.HughesPJ.Class | |
5 | import Language.Vergilius.AST | |
6 | ||
7 | instance Pretty ChValue where | |
8 | pPrint (ChObject obj) = braces $ text "..." | |
9 | pPrint |
1 |
module Language. |
|
1 | module Language.Vergilius | |
2 | ( -- * Vergil Parsers | |
3 | decode | |
4 | , eitherDecode | |
5 | , decodeStrict | |
6 | , eitherDecodeStrict | |
7 | -- * Re-Exported Aeson Types | |
8 | -- ** Core JSON Types | |
9 | , Aeson.Value(..) | |
10 | , Aeson.Array | |
11 | , Aeson.Object | |
12 | -- ** Type Conversion | |
13 | , Aeson.FromJSON(..) | |
14 | , Aeson.ToJSON(..) | |
15 | , Aeson.Result(..) | |
16 | , Aeson.fromJSON | |
17 | -- ** Inspecting Values | |
18 | , Aeson.withObject | |
19 | , Aeson.withText | |
20 | , Aeson.withArray | |
21 | , Aeson.withScientific | |
22 | , Aeson.withBool | |
23 | -- ** Accessors | |
24 | , (Aeson..:) | |
25 | , (Aeson..:?) | |
26 | , (Aeson..:!) | |
27 | , (Aeson..!=) | |
28 | ) where | |
2 | 29 | |
3 | import Data.Aeson | |
4 | import Data.ByteString.Lazy (ByteString) | |
30 | import Data.Aeson (FromJSON, Result(..), fromJSON) | |
31 | import qualified Data.Aeson as Aeson | |
32 | import qualified Data.ByteString as BSE | |
33 | import Data.ByteString.Lazy (ByteString) | |
34 | import qualified Data.ByteString.Lazy.Char8 as BS | |
35 | import Language.Vergilius.AST (ChValue) | |
36 | import Language.Vergilius.Eval | |
37 | import Language.Vergilius.Lexer | |
38 | import Language.Vergilius.Parser | |
5 | 39 | |
40 | parseVirgil :: ByteString -> Either String ChValue | |
41 | parseVirgil = runAlex' parse "<input>" . BS.unpack | |
42 | ||
43 | -- | Deserialize and execute a Virgil program. If parsing fails, | |
44 | -- or if type-checking fails, or if the execution of the Virgil | |
45 | -- program otherwise fails at runtime, then this function will | |
46 | -- return 'Nothing'. | |
47 | -- | |
48 | -- Note that Virgil rules out recursive programs, so executing a | |
49 | -- Virgil program won't loop forever---but it may still take | |
50 | -- a fair amount longer than parsing the equivalent JSON! | |
6 | 51 | decode :: FromJSON a => ByteString -> Maybe a |
7 | eitherDecode :: FromJSON a => ByteString -> Either String a | |
52 | decode bs = case eitherDecode bs of | |
53 | Error err -> Nothing | |
54 | Success val -> pure val | |
55 | ||
56 | -- | Deserialize and execute a Virgil program. If parsing fails, | |
57 | -- or if type-checking fails, or if the execution of the Virgil | |
58 | -- program otherwise fails at runtime, then this function will | |
59 | -- return an approrpriately informative error message. | |
60 | eitherDecode :: FromJSON a => ByteString -> Result a | |
61 | eitherDecode bs = case parseVirgil bs of | |
62 | Left err -> Error err | |
63 | Right ch -> case evalCh ch of | |
64 | Left err -> Error err | |
65 | Right v -> fromJSON v | |
66 | ||
67 | decodeStrict :: FromJSON a => BSE.ByteString -> Maybe a | |
68 | decodeStrict = decode . BS.fromStrict | |
69 | ||
70 | eitherDecodeStrict :: FromJSON a => BSE.ByteString -> Result a | |
71 | eitherDecodeStrict = eitherDecode . BS.fromStrict |
1 | { | |
2 | ||
3 | # comments start with an octothorpe | |
4 | # objects are created with key-value pairs, but don't use | |
5 | # commas or colons | |
6 | ||
7 | "one" 1 # integers | |
8 | ||
9 | "two" 2.0 # floats | |
10 | ||
11 | "three" [ # lists use square brackets | |
12 | "s" # the string syntax is identical to JSON's | |
13 | "s" # again, no commas are necessary | |
14 | "0" | |
15 | ] | |
16 | ||
17 | "four" # stitched expressions are fully contained in parens | |
18 | ( 2 + 2 ) | |
19 | ||
20 | # the with keyword introduces a binding, of the form | |
21 | # with ( [ident] [arg1 arg2 ... argn] = [expr] ) | |
22 | # these can occur anywhere and are ignored, but are scoped | |
23 | # to the data structure in which they occur | |
24 | with ( | |
25 | incr x = x + 1 | |
26 | ) | |
27 | "five" (incr 4) | |
28 | ||
29 | # this entire file denotes the JSON document | |
30 | # { "one": 1, | |
31 | # "two": 2.0, | |
32 | # "three": ["s", "s", "0"], | |
33 | # "four": 4, | |
34 | # "five": 5 | |
35 | # } | |
36 | ||
37 | } |
1 | { | |
2 | ||
3 | # comments start with an octothorpe | |
4 | # objects are created with key-value pairs, but don't use | |
5 | # commas or colons | |
6 | ||
7 | "one" 1 # integers | |
8 | ||
9 | # "two" 2.0 # floats | |
10 | ||
11 | # lists use square brackets | |
12 | "three" | |
13 | [ | |
14 | "s" # the string syntax is identical to JSON's | |
15 | "s" # again, no commas are necessary | |
16 | "0" | |
17 | ] | |
18 | ||
19 | } |
13 | 13 | cabal-version: >=1.10 |
14 | 14 | |
15 | 15 | library |
16 | -- exposed-modules: | |
17 | -- other-modules: | |
18 | -- other-extensions: | |
19 | build-depends: base >=4.7 && <4.8 | |
20 |
|
|
16 | exposed-modules: Language.Vergilius, | |
17 | Language.Vergilius.Lexer, | |
18 | Language.Vergilius.Parser, | |
19 | Language.Vergilius.Eval | |
20 | -- other-modules: | |
21 | -- other-extensions: | |
22 | build-depends: base >=4.7 && <4.9, | |
23 | aeson, | |
24 | array, | |
25 | bytestring, | |
26 | unordered-containers, | |
27 | scientific, | |
28 | text, | |
29 | vector, | |
30 | pretty | |
31 | build-tools: alex | |
21 | 32 | default-language: Haskell2010 |