gdritter repos sml / master src / SML / Lexer.x
master

Tree @master (Download .tar.gz)

Lexer.x @masterraw · history · blame

{
{-# LANGUAGE RecordWildCards #-}

module SML.Lexer where

import           AlexTools
import qualified Data.Char as Char
import qualified Data.Text as T
}

$digit = [0-9]

:-

<0> {
$white+ ;

"abstype"   { keyword KAbstype}
"and"       { keyword KAnd }
"andalso"   { keyword KAndAlso }
"as"        { keyword KAs }
"case"      { keyword KCase }
"datatype"  { keyword KDatatype }
"do"        { keyword KDo }
"else"      { keyword KElse }
"end"       { keyword KEnd }
"exception" { keyword KException }
"fn"        { keyword KFn }
"fun"       { keyword KFun }
"handle"    { keyword KHandle }
"if"        { keyword KIf }
"in"        { keyword KIn }
"infix"     { keyword KInfix }
"infixr"    { keyword KInfixr }
"let"       { keyword KLet }
"local"     { keyword KLocal }
"nonfix"    { keyword KNonfix }
"of"        { keyword KOf }
"op"        { keyword KOp }
"open"      { keyword KOpen }
"orelse"    { keyword KOrElse }
"raise"     { keyword KRaise }
"rec"       { keyword KRec }
"then"      { keyword KThen }
"type"      { keyword KType }
"val"       { keyword KVal }
"with"      { keyword KWith }
"withType"  { keyword KWithType }
"while"     { keyword KWhile }

"["   { token TkLBrac }
"]"   { token TkRBrac }
"("   { token TkLPar }
")"   { token TkRPar }
"{"   { token TkLCurl }
"}"   { token TkRCurl }
","   { token TkComma }
":"   { token TkColon }
";"   { token TkSemi }
"..." { token TkEllipsis }
"_"   { token TkUnder }
"|"   { token TkBar }
"="   { token TkEq }
"=>"  { token TkFatArrow }
"->"  { token TkArrow }
"#"   { token TkOcto }

}

{
data Token
  = TkKeyword Keyword
  | TkLBrac
  | TkRBrac
  | TkLPar
  | TkRPar
  | TkLCurl
  | TkRCurl
  | TkComma
  | TkColon
  | TkSemi
  | TkEllipsis
  | TkUnder
  | TkBar
  | TkEq
  | TkFatArrow
  | TkArrow
  | TkOcto
    deriving (Eq, Show)

data Keyword
  = KAbstype
  | KAnd
  | KAndAlso
  | KAs
  | KCase
  | KDatatype
  | KDo
  | KElse
  | KEnd
  | KException
  | KFn
  | KFun
  | KHandle
  | KIf
  | KIn
  | KInfix
  | KInfixr
  | KLet
  | KLocal
  | KNonfix
  | KOf
  | KOp
  | KOpen
  | KOrElse
  | KRaise
  | KRec
  | KThen
  | KType
  | KVal
  | KWith
  | KWithType
  | KWhile
    deriving (Eq, Show)

data Mode
  = Normal
    deriving (Show)


emits :: (T.Text -> Token) -> Action Mode [Lexeme Token]
emits mkToken =
  do lexemeText  <- matchText
     lexemeRange <- matchRange
     return [Lexeme { lexemeToken = mkToken lexemeText, .. }]

token :: Token -> Action Mode [Lexeme Token]
token tok = emits (const tok)

keyword :: Keyword -> Action Mode [Lexeme Token]
keyword k = token (TkKeyword k)

alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte = makeAlexGetByte $ \c ->
  if Char.isAscii c
    then toEnum (fromEnum c)
    else 0x1

}