gdritter repos virgil / master Language / Virgil / Lexer.x
master

Tree @master (Download .tar.gz)

Lexer.x @masterraw · history · blame

{
{-# OPTIONS -w #-}
{-# LANGUAGE OverloadedStrings #-}

module Language.Virgil.Lexer where

import           Control.Monad (liftM)
import           Data.Monoid ((<>))
import           Data.Scientific
import           Data.Text (Text)
import qualified Data.Text as T

import           Prelude hiding (lex)
}

%wrapper "monadUserState"

$digit = 0-9
$alpha = [A-Za-z]

@int = $digit+
@exp = [Ee] [\-\+]? @int
@frac = @int \. @int @exp? | @int @exp
@sign = \-?

@str = \"[^\"]*\"

tokens :-
  $white+ ;
  "#".* ;

  @sign @int \. @int      { lex (float True False) }
  @sign @int \. @int @exp { lex (float True True)  }
  @sign @int         @exp { lex (float False True) }

  @sign @int  { lex (TkInt . read . T.unpack) }
  @str  { lex strLiteral }

  with  { lex' (TkKw KwWith) }
  true  { lex' (TkKw KwTrue) }
  false { lex' (TkKw KwFalse) }
  null  { lex' (TkKw KwNull) }

  \{  { lex' TkLCurl }
  \}  { lex' TkRCurl }
  \[  { lex' TkLBrac }
  \]  { lex' TkRBrac }
  \(  { lex' TkLParn }
  \)  { lex' TkRParn }
  \:  { lex' TkColon }
  \;  { lex' TkSemi  }
  \,  { lex' TkComma }

{
data Token = Token AlexPosn TkType deriving (Eq, Show)

data KwType
  = KwTrue
  | KwFalse
  | KwNull
  | KwWith
    deriving (Eq, Show)

data TkType
  = TkInt Integer
  | TkFloat Scientific
  | TkStr Text
  | TkKw KwType
  | TkColon
  | TkSemi
  | TkLCurl
  | TkRCurl
  | TkLBrac
  | TkRBrac
  | TkLParn
  | TkRParn
  | TkComma
  | TkEOF
    deriving (Eq, Show)

-- Parsing floats with exponents and whatnot
-- XXX: MAKE MORE EFFICIENT AND LESS GROSS
float :: Bool -> Bool -> Text -> TkType
float False False _ = error "[unreachable]"
float True  False t = -- dot but not exponent
  let [a,b] = T.splitOn "." t
  in TkFloat (scientific (read (T.unpack (a <> b))) (negate (T.length b)))
float False True  t = -- exponent but no dot
  let [a,e] = if T.any (== 'e') t
                then T.splitOn "e" t
				else T.splitOn "E" t
  in TkFloat (scientific (read (T.unpack a)) (read (T.unpack e)))
float True  True  t = -- exponent and dot
  let [t',e] = if T.any (== 'e') t
                 then T.splitOn "e" t
                 else T.splitOn "E" t
      [a, b] = T.splitOn "." t'
  in TkFloat (scientific (read (T.unpack (a <> b)))
                         (negate (T.length b) + read (T.unpack e)))

strLiteral :: Text -> TkType
strLiteral = TkStr . T.drop 1 . T.dropEnd 1

data AlexUserState = AlexUserState
  { filePath :: FilePath
  } deriving (Eq, Show)

alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState "<unknown>"

getFilePath :: Alex FilePath
getFilePath = liftM filePath alexGetUserState

setFilePath :: FilePath -> Alex ()
setFilePath = alexSetUserState . AlexUserState

alexMonadScan' :: Alex Token
alexMonadScan' = do
  inp <- alexGetInput
  sc <- alexGetStartCode
  case alexScan inp sc of
    AlexEOF -> alexEOF
    AlexError (p, _, _, s) ->
        alexError' p ("lexical error at character '" ++ take 1 s ++ "'")
    AlexSkip  inp' len -> do
        alexSetInput inp'
        alexMonadScan'
    AlexToken inp' len action -> do
        alexSetInput inp'
        action (ignorePendingBytes inp) len

alexError' :: AlexPosn -> String -> Alex a
alexError' (AlexPn _ l c) msg = do
  fp <- getFilePath
  alexError (fp ++ ":" ++ show l ++ ":" ++ show c ++ ": " ++ msg)

alexEOF :: Alex Token
alexEOF = do
  (p,_,_,_) <- alexGetInput
  return (Token p TkEOF)


lex :: (Text -> TkType) -> AlexAction Token
lex f = \(p,_,_,s) i -> return (Token p (f (T.pack (take i s))))

lex' :: TkType -> AlexAction Token
lex' = lex . const

runAlex' :: Alex a -> FilePath -> String -> Either String a
runAlex' mote fp input = runAlex input (setFilePath fp >> mote)
}