gdritter repos courriel / master src / Network / Courriel / Parse.hs
master

Tree @master (Download .tar.gz)

Parse.hs @master

fbcaff2
 
 
 
 
 
 
 
 
00ea186
cf6f62f
 
 
fbcaff2
00ea186
fbcaff2
 
 
 
 
 
 
 
cf6f62f
 
00ea186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbcaff2
cf6f62f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbcaff2
 
 
cf6f62f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbcaff2
cf6f62f
 
 
 
 
fbcaff2
cf6f62f
fbcaff2
 
 
 
 
 
 
cf6f62f
 
 
fbcaff2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00ea186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbcaff2
 
 
 
cf6f62f
 
 
 
 
 
 
fbcaff2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf6f62f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbcaff2
 
 
 
 
 
 
 
module Network.Courriel.Parse where


import           Control.Applicative
import           Control.Monad (void)
import           Data.Attoparsec.ByteString (Parser)
import qualified Data.Attoparsec.ByteString as Atto
import           Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Builder as BB
import qualified Data.Char as Char
import qualified Data.Foldable as Fold
import           Data.Monoid ((<>))
import           Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import           Data.Time.Clock (UTCTime)
import           Data.Word (Word8)
import           System.Directory (listDirectory)

import           Network.Courriel.Types

-- The comments here are taken directly from RFC 5322

w8 :: Char -> Word8
w8 = fromIntegral . Char.ord

char :: Char -> Parser ()
char n = void (Atto.word8 (fromIntegral (Char.ord n)))

pSeq :: Parser a -> Parser (Seq a)
pSeq p = Seq.fromList <$> Atto.many' p

pMsgId :: Parser MessageId
pMsgId = do
  pCfwsMb
  char '<'
  left <- pDotAtom
  char '@'
  right <- pDotAtom <|> pDtext
  char '>'
  return (MessageId (left <> "@" <> right))

pMsgIdLiberal :: Parser MessageId
pMsgIdLiberal = do
  pCfwsMb
  char '<'
  bs <- Atto.takeWhile (/= (w8 '>'))
  char '>'
  return (MessageId bs)

pDtext :: Parser ByteString
pDtext = do
  void (Atto.word8 91)
  rs <- Atto.takeWhile (\ c -> (c >= 33 && c <= 90) || (c >= 94 && c <= 128))
  void (Atto.word8 93)
  return rs

-- word            =   atom / quoted-string
-- phrase          =   1*word / obs-phrase
pPhrase :: Parser ByteString
pPhrase = pQuotedString <|>
  (B.intercalate (B.singleton 32) <$> Atto.many' pWord)

pWord :: Parser ByteString
pWord = pCfwsMb *> Atto.takeWhile1 atext <* pCfwsMb

pQuotedString :: Parser ByteString
pQuotedString = do
  pCfwsMb
  void (Atto.word8 34)
  let ch = Atto.satisfy (\ c -> c /= 92 && c /= 34) <|> pQuotedPair
  text <- Atto.many' $ do
    w <- pFwsMb
    c <- BB.word8 <$> ch
    return (w `mappend` c)
  rs <- pFwsMb
  void (Atto.word8 34)
  pCfwsMb
  return (BL.toStrict (BB.toLazyByteString (mconcat text `mappend` rs)))

-- mailbox         =   name-addr / addr-spec
-- name-addr       =   [display-name] angle-addr
-- angle-addr      =   [CFWS] "<" addr-spec ">" [CFWS] /
--                     obs-angle-addr
pMailbox :: Parser Mailbox
pMailbox = nameAddr <|> (Mailbox Nothing <$> pAddrSpec)
  where nameAddr =
          Mailbox <$> ((Just <$> pPhrase) <|> pure Nothing)
                  <*> (pCfwsMb *> Atto.word8 60 *> pAddrSpec <* Atto.word8 62 <* pCfwsMb)

-- (mailbox *("," mailbox)) / obs-mbox-list

pMailboxList :: Parser (Seq Mailbox)
pMailboxList = do
  m <- pMailbox
  ms <- Atto.option Seq.empty (Atto.word8 44 *> pMailboxList)
  return (m Seq.<| ms)

-- address-list    =   (address *("," address)) / obs-addr-list

pAddressList :: Parser (Seq MBGroup)
pAddressList = do
  m <- pAddress
  ms <- Atto.option Seq.empty (Atto.word8 44 *> pAddressList)
  return (m Seq.<| ms)

pAddress :: Parser MBGroup
pAddress = MBSingle <$> pMailbox <|> MBGroup <$> pPhrase <*> grp
  where grp = (Atto.word8 58 *> (Atto.option mempty pGroupList) <* Atto.word8 59 <* pCfwsMb)

pGroupList :: Parser (Seq Mailbox)
pGroupList = pMailboxList <|> (mempty <$ pCfws)

-- dot-atom        =   [CFWS] dot-atom-text [CFWS]
-- dot-atom-text   =   1*atext *("." 1*atext)
pDotAtom :: Parser ByteString
pDotAtom = pCfwsMb *> Atto.takeWhile (\ c -> atext c || c == 46) <* pCfwsMb

pAddrSpec :: Parser Address
pAddrSpec = Address <$> pLocalPart <*> (Atto.word8 64 *> pDomain)
  where pLocalPart = pDotAtom
        pDomain = pDotAtom

atext :: Word8 -> Bool
atext c =
  let c' = Char.chr (fromIntegral c)
  in Char.isAlpha c' || Char.isDigit c' || c' `elem` ("!#$%&'*+-/=?^_`{|}~" :: [Char])

getSubject :: RawEmail -> ByteString
getSubject RawEmail { _rawEmailHeaders = hs } =
  head [ subj | HeaderSubject subj <- Fold.toList hs ]

readBox :: FilePath -> IO [(FilePath, Either String RawEmail)]
readBox fp = do
  paths <- listDirectory fp
  let files = [ fp ++ "/" ++ p | p <- paths ]
  raws <- sequence [ (,) f `fmap` B.readFile f | f <- files ]
  return [ (f, Atto.parseOnly pEmail r) | (f, r) <- raws ]

pEmail :: Parser RawEmail
pEmail = do
  headers <- Atto.manyTill pHeader pCrlf
  body <- Body `fmap` pBody
  return (RawEmail (Seq.fromList headers) (Seq.singleton body))

pHeader :: Parser Header
pHeader = do
  name <- Atto.takeWhile1 (\ c -> c /= 58 && not (isSpace c))
  void $ Atto.word8 58
  case B8.map Char.toLower name of
    "origdate"    -> HeaderOther "arglbargl" <$> foldingHeader
    "subject"     -> HeaderSubject <$> foldingHeader
    "comments"    -> HeaderComments <$> foldingHeader
    "keywords"    -> HeaderKeywords <$> foldingHeader
    "sender"      -> HeaderSender <$> pMailbox <* pCrlf
    "from"        -> HeaderFrom <$> pMailboxList <* pCrlf
    "reply-to"    -> HeaderReplyTo <$> pAddressList <* pCrlf
    "to"          -> HeaderTo <$> pAddressList <* pCrlf
    "cc"          -> HeaderCc <$> pAddressList <* pCrlf
    "bcc"         -> HeaderBcc <$> pAddressList <* pCrlf
    "message-id"  -> HeaderMessageId <$> pMsgId <* pCrlf
    "in-reply-to" -> HeaderInReplyTo <$> pSeq pMsgIdLiberal <* pCrlf
    "references"  -> HeaderReferences <$> pSeq pMsgIdLiberal <* pCrlf
    _             -> HeaderOther name <$> foldingHeader

pDateTime :: Parser UTCTime
pDateTime = undefined

overFoldingHeader :: Parser a -> Parser a
overFoldingHeader parse = do
  bs <- foldingHeader
  case Atto.parseOnly parse bs of
    Left err -> fail err
    Right x  -> return x

foldingHeader :: Parser ByteString
foldingHeader = do
  skipSpace
  line <- Atto.takeTill (== 10)
  void $ Atto.word8 10
  next <- Atto.peekWord8'
  if isSpace next
    then skipSpace *> (B.append line <$> foldingHeader)
    else return line

skipSpace :: Parser ()
skipSpace = void $ Atto.takeWhile isSpace

isSpace :: (Num a, Eq a) => a -> Bool
isSpace c = c == 32 || c == 9

isWS :: (Num a, Eq a) => a -> Bool
isWS c = c == 32 || c == 9

pCfwsMb :: Parser ()
pCfwsMb = Atto.option () pCfws

pCfws :: Parser ()
pCfws =
  (Atto.skipMany1 (pFwsMb *> pComment) *> void pFwsMb) <|> pFws

pFwsMb :: Parser BB.Builder
pFwsMb = do
  next <- Atto.peekWord8'
  if isSpace next
     then BB.word8 32 <$ pFws
     else return mempty

pFws :: Parser ()
pFws =
  Atto.skipMany (Atto.skipWhile isSpace *> pCrlf) *>
  Atto.skipMany1 (Atto.satisfy isSpace)

pComment :: Parser ()
pComment = void (Atto.word8 40 *> void body *> Atto.word8 41)
  where body = Atto.skipMany (pFwsMb *> content) *> pFwsMb
        content = void (Atto.satisfy ctext) <|> void pQuotedPair <|> pComment
        ctext c = c /= 40 && c /= 41 && c /= 92

pQuotedPair :: Parser Word8
pQuotedPair = Atto.word8 92 *> Atto.anyWord8

pCrlf :: Parser ()
pCrlf = void (Atto.word8 10)

pBody :: Parser ByteString
pBody = B.concat <$> Atto.many' (pText <* pCrlf)

pText :: Parser ByteString
pText = Atto.takeWhile (\ c -> c /= 10 && c /= 13)