module Network.Courriel.Parse where
import Control.Applicative
import Control.Monad (void)
import Data.Attoparsec.ByteString (Parser)
import qualified Data.Attoparsec.ByteString as Atto
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Builder as BB
import qualified Data.Char as Char
import qualified Data.Foldable as Fold
import Data.Monoid ((<>))
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Time.Clock (UTCTime)
import Data.Word (Word8)
import System.Directory (listDirectory)
import Network.Courriel.Types
-- The comments here are taken directly from RFC 5322
w8 :: Char -> Word8
w8 = fromIntegral . Char.ord
char :: Char -> Parser ()
char n = void (Atto.word8 (fromIntegral (Char.ord n)))
pSeq :: Parser a -> Parser (Seq a)
pSeq p = Seq.fromList <$> Atto.many' p
pMsgId :: Parser MessageId
pMsgId = do
pCfwsMb
char '<'
left <- pDotAtom
char '@'
right <- pDotAtom <|> pDtext
char '>'
return (MessageId (left <> "@" <> right))
pMsgIdLiberal :: Parser MessageId
pMsgIdLiberal = do
pCfwsMb
char '<'
bs <- Atto.takeWhile (/= (w8 '>'))
char '>'
return (MessageId bs)
pDtext :: Parser ByteString
pDtext = do
void (Atto.word8 91)
rs <- Atto.takeWhile (\ c -> (c >= 33 && c <= 90) || (c >= 94 && c <= 128))
void (Atto.word8 93)
return rs
-- word = atom / quoted-string
-- phrase = 1*word / obs-phrase
pPhrase :: Parser ByteString
pPhrase = pQuotedString <|>
(B.intercalate (B.singleton 32) <$> Atto.many' pWord)
pWord :: Parser ByteString
pWord = pCfwsMb *> Atto.takeWhile1 atext <* pCfwsMb
pQuotedString :: Parser ByteString
pQuotedString = do
pCfwsMb
void (Atto.word8 34)
let ch = Atto.satisfy (\ c -> c /= 92 && c /= 34) <|> pQuotedPair
text <- Atto.many' $ do
w <- pFwsMb
c <- BB.word8 <$> ch
return (w `mappend` c)
rs <- pFwsMb
void (Atto.word8 34)
pCfwsMb
return (BL.toStrict (BB.toLazyByteString (mconcat text `mappend` rs)))
-- mailbox = name-addr / addr-spec
-- name-addr = [display-name] angle-addr
-- angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
-- obs-angle-addr
pMailbox :: Parser Mailbox
pMailbox = nameAddr <|> (Mailbox Nothing <$> pAddrSpec)
where nameAddr =
Mailbox <$> ((Just <$> pPhrase) <|> pure Nothing)
<*> (pCfwsMb *> Atto.word8 60 *> pAddrSpec <* Atto.word8 62 <* pCfwsMb)
-- (mailbox *("," mailbox)) / obs-mbox-list
pMailboxList :: Parser (Seq Mailbox)
pMailboxList = do
m <- pMailbox
ms <- Atto.option Seq.empty (Atto.word8 44 *> pMailboxList)
return (m Seq.<| ms)
-- address-list = (address *("," address)) / obs-addr-list
pAddressList :: Parser (Seq MBGroup)
pAddressList = do
m <- pAddress
ms <- Atto.option Seq.empty (Atto.word8 44 *> pAddressList)
return (m Seq.<| ms)
pAddress :: Parser MBGroup
pAddress = MBSingle <$> pMailbox <|> MBGroup <$> pPhrase <*> grp
where grp = (Atto.word8 58 *> (Atto.option mempty pGroupList) <* Atto.word8 59 <* pCfwsMb)
pGroupList :: Parser (Seq Mailbox)
pGroupList = pMailboxList <|> (mempty <$ pCfws)
-- dot-atom = [CFWS] dot-atom-text [CFWS]
-- dot-atom-text = 1*atext *("." 1*atext)
pDotAtom :: Parser ByteString
pDotAtom = pCfwsMb *> Atto.takeWhile (\ c -> atext c || c == 46) <* pCfwsMb
pAddrSpec :: Parser Address
pAddrSpec = Address <$> pLocalPart <*> (Atto.word8 64 *> pDomain)
where pLocalPart = pDotAtom
pDomain = pDotAtom
atext :: Word8 -> Bool
atext c =
let c' = Char.chr (fromIntegral c)
in Char.isAlpha c' || Char.isDigit c' || c' `elem` ("!#$%&'*+-/=?^_`{|}~" :: [Char])
getSubject :: RawEmail -> ByteString
getSubject RawEmail { _rawEmailHeaders = hs } =
head [ subj | HeaderSubject subj <- Fold.toList hs ]
readBox :: FilePath -> IO [(FilePath, Either String RawEmail)]
readBox fp = do
paths <- listDirectory fp
let files = [ fp ++ "/" ++ p | p <- paths ]
raws <- sequence [ (,) f `fmap` B.readFile f | f <- files ]
return [ (f, Atto.parseOnly pEmail r) | (f, r) <- raws ]
pEmail :: Parser RawEmail
pEmail = do
headers <- Atto.manyTill pHeader pCrlf
body <- Body `fmap` pBody
return (RawEmail (Seq.fromList headers) (Seq.singleton body))
pHeader :: Parser Header
pHeader = do
name <- Atto.takeWhile1 (\ c -> c /= 58 && not (isSpace c))
void $ Atto.word8 58
case B8.map Char.toLower name of
"origdate" -> HeaderOther "arglbargl" <$> foldingHeader
"subject" -> HeaderSubject <$> foldingHeader
"comments" -> HeaderComments <$> foldingHeader
"keywords" -> HeaderKeywords <$> foldingHeader
"sender" -> HeaderSender <$> pMailbox <* pCrlf
"from" -> HeaderFrom <$> pMailboxList <* pCrlf
"reply-to" -> HeaderReplyTo <$> pAddressList <* pCrlf
"to" -> HeaderTo <$> pAddressList <* pCrlf
"cc" -> HeaderCc <$> pAddressList <* pCrlf
"bcc" -> HeaderBcc <$> pAddressList <* pCrlf
"message-id" -> HeaderMessageId <$> pMsgId <* pCrlf
"in-reply-to" -> HeaderInReplyTo <$> pSeq pMsgIdLiberal <* pCrlf
"references" -> HeaderReferences <$> pSeq pMsgIdLiberal <* pCrlf
_ -> HeaderOther name <$> foldingHeader
pDateTime :: Parser UTCTime
pDateTime = undefined
overFoldingHeader :: Parser a -> Parser a
overFoldingHeader parse = do
bs <- foldingHeader
case Atto.parseOnly parse bs of
Left err -> fail err
Right x -> return x
foldingHeader :: Parser ByteString
foldingHeader = do
skipSpace
line <- Atto.takeTill (== 10)
void $ Atto.word8 10
next <- Atto.peekWord8'
if isSpace next
then skipSpace *> (B.append line <$> foldingHeader)
else return line
skipSpace :: Parser ()
skipSpace = void $ Atto.takeWhile isSpace
isSpace :: (Num a, Eq a) => a -> Bool
isSpace c = c == 32 || c == 9
isWS :: (Num a, Eq a) => a -> Bool
isWS c = c == 32 || c == 9
pCfwsMb :: Parser ()
pCfwsMb = Atto.option () pCfws
pCfws :: Parser ()
pCfws =
(Atto.skipMany1 (pFwsMb *> pComment) *> void pFwsMb) <|> pFws
pFwsMb :: Parser BB.Builder
pFwsMb = do
next <- Atto.peekWord8'
if isSpace next
then BB.word8 32 <$ pFws
else return mempty
pFws :: Parser ()
pFws =
Atto.skipMany (Atto.skipWhile isSpace *> pCrlf) *>
Atto.skipMany1 (Atto.satisfy isSpace)
pComment :: Parser ()
pComment = void (Atto.word8 40 *> void body *> Atto.word8 41)
where body = Atto.skipMany (pFwsMb *> content) *> pFwsMb
content = void (Atto.satisfy ctext) <|> void pQuotedPair <|> pComment
ctext c = c /= 40 && c /= 41 && c /= 92
pQuotedPair :: Parser Word8
pQuotedPair = Atto.word8 92 *> Atto.anyWord8
pCrlf :: Parser ()
pCrlf = void (Atto.word8 10)
pBody :: Parser ByteString
pBody = B.concat <$> Atto.many' (pText <* pCrlf)
pText :: Parser ByteString
pText = Atto.takeWhile (\ c -> c /= 10 && c /= 13)