gdritter repos courriel / master src / Network / Courriel / Parse.hs
master

Tree @master (Download .tar.gz)

Parse.hs @masterraw · history · blame

module Network.Courriel.Parse where


import           Control.Applicative
import           Control.Monad (void)
import           Data.Attoparsec.ByteString (Parser)
import qualified Data.Attoparsec.ByteString as Atto
import           Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Builder as BB
import qualified Data.Char as Char
import qualified Data.Foldable as Fold
import           Data.Monoid ((<>))
import           Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import           Data.Time.Clock (UTCTime)
import           Data.Word (Word8)
import           System.Directory (listDirectory)

import           Network.Courriel.Types

-- The comments here are taken directly from RFC 5322

w8 :: Char -> Word8
w8 = fromIntegral . Char.ord

char :: Char -> Parser ()
char n = void (Atto.word8 (fromIntegral (Char.ord n)))

pSeq :: Parser a -> Parser (Seq a)
pSeq p = Seq.fromList <$> Atto.many' p

pMsgId :: Parser MessageId
pMsgId = do
  pCfwsMb
  char '<'
  left <- pDotAtom
  char '@'
  right <- pDotAtom <|> pDtext
  char '>'
  return (MessageId (left <> "@" <> right))

pMsgIdLiberal :: Parser MessageId
pMsgIdLiberal = do
  pCfwsMb
  char '<'
  bs <- Atto.takeWhile (/= (w8 '>'))
  char '>'
  return (MessageId bs)

pDtext :: Parser ByteString
pDtext = do
  void (Atto.word8 91)
  rs <- Atto.takeWhile (\ c -> (c >= 33 && c <= 90) || (c >= 94 && c <= 128))
  void (Atto.word8 93)
  return rs

-- word            =   atom / quoted-string
-- phrase          =   1*word / obs-phrase
pPhrase :: Parser ByteString
pPhrase = pQuotedString <|>
  (B.intercalate (B.singleton 32) <$> Atto.many' pWord)

pWord :: Parser ByteString
pWord = pCfwsMb *> Atto.takeWhile1 atext <* pCfwsMb

pQuotedString :: Parser ByteString
pQuotedString = do
  pCfwsMb
  void (Atto.word8 34)
  let ch = Atto.satisfy (\ c -> c /= 92 && c /= 34) <|> pQuotedPair
  text <- Atto.many' $ do
    w <- pFwsMb
    c <- BB.word8 <$> ch
    return (w `mappend` c)
  rs <- pFwsMb
  void (Atto.word8 34)
  pCfwsMb
  return (BL.toStrict (BB.toLazyByteString (mconcat text `mappend` rs)))

-- mailbox         =   name-addr / addr-spec
-- name-addr       =   [display-name] angle-addr
-- angle-addr      =   [CFWS] "<" addr-spec ">" [CFWS] /
--                     obs-angle-addr
pMailbox :: Parser Mailbox
pMailbox = nameAddr <|> (Mailbox Nothing <$> pAddrSpec)
  where nameAddr =
          Mailbox <$> ((Just <$> pPhrase) <|> pure Nothing)
                  <*> (pCfwsMb *> Atto.word8 60 *> pAddrSpec <* Atto.word8 62 <* pCfwsMb)

-- (mailbox *("," mailbox)) / obs-mbox-list

pMailboxList :: Parser (Seq Mailbox)
pMailboxList = do
  m <- pMailbox
  ms <- Atto.option Seq.empty (Atto.word8 44 *> pMailboxList)
  return (m Seq.<| ms)

-- address-list    =   (address *("," address)) / obs-addr-list

pAddressList :: Parser (Seq MBGroup)
pAddressList = do
  m <- pAddress
  ms <- Atto.option Seq.empty (Atto.word8 44 *> pAddressList)
  return (m Seq.<| ms)

pAddress :: Parser MBGroup
pAddress = MBSingle <$> pMailbox <|> MBGroup <$> pPhrase <*> grp
  where grp = (Atto.word8 58 *> (Atto.option mempty pGroupList) <* Atto.word8 59 <* pCfwsMb)

pGroupList :: Parser (Seq Mailbox)
pGroupList = pMailboxList <|> (mempty <$ pCfws)

-- dot-atom        =   [CFWS] dot-atom-text [CFWS]
-- dot-atom-text   =   1*atext *("." 1*atext)
pDotAtom :: Parser ByteString
pDotAtom = pCfwsMb *> Atto.takeWhile (\ c -> atext c || c == 46) <* pCfwsMb

pAddrSpec :: Parser Address
pAddrSpec = Address <$> pLocalPart <*> (Atto.word8 64 *> pDomain)
  where pLocalPart = pDotAtom
        pDomain = pDotAtom

atext :: Word8 -> Bool
atext c =
  let c' = Char.chr (fromIntegral c)
  in Char.isAlpha c' || Char.isDigit c' || c' `elem` ("!#$%&'*+-/=?^_`{|}~" :: [Char])

getSubject :: RawEmail -> ByteString
getSubject RawEmail { _rawEmailHeaders = hs } =
  head [ subj | HeaderSubject subj <- Fold.toList hs ]

readBox :: FilePath -> IO [(FilePath, Either String RawEmail)]
readBox fp = do
  paths <- listDirectory fp
  let files = [ fp ++ "/" ++ p | p <- paths ]
  raws <- sequence [ (,) f `fmap` B.readFile f | f <- files ]
  return [ (f, Atto.parseOnly pEmail r) | (f, r) <- raws ]

pEmail :: Parser RawEmail
pEmail = do
  headers <- Atto.manyTill pHeader pCrlf
  body <- Body `fmap` pBody
  return (RawEmail (Seq.fromList headers) (Seq.singleton body))

pHeader :: Parser Header
pHeader = do
  name <- Atto.takeWhile1 (\ c -> c /= 58 && not (isSpace c))
  void $ Atto.word8 58
  case B8.map Char.toLower name of
    "origdate"    -> HeaderOther "arglbargl" <$> foldingHeader
    "subject"     -> HeaderSubject <$> foldingHeader
    "comments"    -> HeaderComments <$> foldingHeader
    "keywords"    -> HeaderKeywords <$> foldingHeader
    "sender"      -> HeaderSender <$> pMailbox <* pCrlf
    "from"        -> HeaderFrom <$> pMailboxList <* pCrlf
    "reply-to"    -> HeaderReplyTo <$> pAddressList <* pCrlf
    "to"          -> HeaderTo <$> pAddressList <* pCrlf
    "cc"          -> HeaderCc <$> pAddressList <* pCrlf
    "bcc"         -> HeaderBcc <$> pAddressList <* pCrlf
    "message-id"  -> HeaderMessageId <$> pMsgId <* pCrlf
    "in-reply-to" -> HeaderInReplyTo <$> pSeq pMsgIdLiberal <* pCrlf
    "references"  -> HeaderReferences <$> pSeq pMsgIdLiberal <* pCrlf
    _             -> HeaderOther name <$> foldingHeader

pDateTime :: Parser UTCTime
pDateTime = undefined

overFoldingHeader :: Parser a -> Parser a
overFoldingHeader parse = do
  bs <- foldingHeader
  case Atto.parseOnly parse bs of
    Left err -> fail err
    Right x  -> return x

foldingHeader :: Parser ByteString
foldingHeader = do
  skipSpace
  line <- Atto.takeTill (== 10)
  void $ Atto.word8 10
  next <- Atto.peekWord8'
  if isSpace next
    then skipSpace *> (B.append line <$> foldingHeader)
    else return line

skipSpace :: Parser ()
skipSpace = void $ Atto.takeWhile isSpace

isSpace :: (Num a, Eq a) => a -> Bool
isSpace c = c == 32 || c == 9

isWS :: (Num a, Eq a) => a -> Bool
isWS c = c == 32 || c == 9

pCfwsMb :: Parser ()
pCfwsMb = Atto.option () pCfws

pCfws :: Parser ()
pCfws =
  (Atto.skipMany1 (pFwsMb *> pComment) *> void pFwsMb) <|> pFws

pFwsMb :: Parser BB.Builder
pFwsMb = do
  next <- Atto.peekWord8'
  if isSpace next
     then BB.word8 32 <$ pFws
     else return mempty

pFws :: Parser ()
pFws =
  Atto.skipMany (Atto.skipWhile isSpace *> pCrlf) *>
  Atto.skipMany1 (Atto.satisfy isSpace)

pComment :: Parser ()
pComment = void (Atto.word8 40 *> void body *> Atto.word8 41)
  where body = Atto.skipMany (pFwsMb *> content) *> pFwsMb
        content = void (Atto.satisfy ctext) <|> void pQuotedPair <|> pComment
        ctext c = c /= 40 && c /= 41 && c /= 92

pQuotedPair :: Parser Word8
pQuotedPair = Atto.word8 92 *> Atto.anyWord8

pCrlf :: Parser ()
pCrlf = void (Atto.word8 10)

pBody :: Parser ByteString
pBody = B.concat <$> Atto.many' (pText <* pCrlf)

pText :: Parser ByteString
pText = Atto.takeWhile (\ c -> c /= 10 && c /= 13)