gdritter repos endsay / master src / Parse.hs
master

Tree @master (Download .tar.gz)

Parse.hs @masterraw · history · blame

module Parse where

import           Control.Monad (when)
import           Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import           Data.Char (ord)
import           Data.List.NonEmpty (NonEmpty(..))
import           Data.Text (Text)
import qualified Data.Text as T
import           Text.Megaparsec
import           Text.Megaparsec.Text

import Types

-- Okay, yeah, 'tokenize' would be better, sure

initialParserState :: Text -> State Text
initialParserState t = State
  { stateInput    = t
  , statePos      = SourcePos "<file>" (unsafePos 1) (unsafePos 1) :| []
  , stateTabWidth = unsafePos 8
  }

pNextParsedObject :: State Text -> Either String (ParsedObject, State Text)
pNextParsedObject st = result
  where result = case runParser' (pWhitespace *> pParsedObject) st of
          (_, Left err) -> Left (show err)
          (s, Right v)  -> Right (v, s)

pWhitespace :: Parser ()
pWhitespace = skipMany spaceChar

specialChars :: String
specialChars = "()<>[]{}/% \t\r\n"

pParsedObject :: Parser ParsedObject
pParsedObject =
      uncurry PORadix <$> try pRadix
  <|> POReal       <$> pReal
  <|> POInteger    <$> pInteger
  <|> POString     <$> pString
  <|> POByteString <$> pByteString
  <|> POSymbol     <$> pSymbol
  <|> POName       <$> pName

pSign :: Num a => Parser (a -> a)
pSign =  negate <$ char '-'
     <|> id     <$ char '+'
     <|> pure id

pRadix :: Parser (Int, Integer)
pRadix = do
  base <- read <$> some digitChar
  _ <- char '#'
  when (base < 2 || base > 36) $
    fail ("Invalid base: " ++ show base)
  num <- some (digitForBase base)
  return (base, readAsBase base num)

readAsBase :: Num a => Int -> String -> a
readAsBase _ [] = error "[unreachable]"
readAsBase _ [c] = numValOf c
readAsBase b (c:cs) = (numValOf c * fromIntegral b) + readAsBase b cs

digitForBase :: Int -> Parser Char
digitForBase n = oneOf chars
  where chars = take n ['0'..'9'] ++
                take (n-1) ['A'..'Z'] ++
                take (n-1) ['a'..'z']

pReal :: Parser Double
pReal = fail "unimplemented"

pInteger :: Parser Integer
pInteger = do
  sign <- pSign
  num <- some digitChar
  return $ sign (read num)

pString :: Parser Text
pString = T.pack <$> (char '(' *> go)
  where go =
              "" <$ char ')'
          <|> (++) <$> some (noneOf ['(',')','\\']) <*> go -- XXX: handle rest!

pByteString :: Parser ByteString
pByteString = char '<' *> (go <$> some hexDigitChar) <* char '>'
  where go = BS.pack . toVals
        toVals (x:y:xs) = (numValOf x * 16 + numValOf y) : toVals xs
        toVals (x:[])   = [ numValOf x * 16 ]
        toVals []       = []

numValOf :: Num b => Char -> b
numValOf n
  | n >= 'a' && n <= 'z' = fromIntegral (ord n) - 0x57
  | n >= 'A' && n <= 'Z' = fromIntegral (ord n) - 0x37
  | otherwise            = fromIntegral (ord n) - 0x30

pSymbol :: Parser Text
pSymbol = T.pack <$> (char '/' *> many (noneOf specialChars))

pName :: Parser Text
pName = T.pack <$> some (noneOf specialChars)