gdritter repos courriel / fbcaff2
Very beginnings of email parsers Getty Ritter 7 years ago
5 changed file(s) with 205 addition(s) and 0 deletion(s). Collapse all Expand all
1 *~
2 dist
3 dist-newstyle
4 cabal.sandbox.config
5 .cabal-sandbox
1 Copyright (c) 2017, Getty Ritter
2 All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
8 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9
10 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1 name: courriel
2 version: 0.1.0.0
3 -- synopsis:
4 -- description:
5 license: BSD3
6 license-file: LICENSE
7 author: Getty Ritter <gdritter@galois.com>
8 maintainer: Getty Ritter <gdritter@galois.com>
9 copyright: ©2017 Getty Ritter
10 -- category:
11 build-type: Simple
12 cabal-version: >= 1.14
13
14 library
15 hs-source-dirs: src
16 exposed-modules: Network.Courriel.Types
17 , Network.Courriel.Parse
18 ghc-options: -Wall
19 build-depends: base >=4.7 && <4.10
20 , bytestring
21 , microlens-platform
22 , time
23 , containers
24 , attoparsec
25 , directory
26 default-language: Haskell2010
27 default-extensions: OverloadedStrings,
28 ScopedTypeVariables
1 module Network.Courriel.Parse where
2
3
4 import Control.Applicative
5 import Control.Monad (void)
6 import Data.Attoparsec.ByteString (Parser)
7 import qualified Data.Attoparsec.ByteString as Atto
8 import Data.ByteString (ByteString)
9 import qualified Data.ByteString as B
10 import qualified Data.Foldable as Fold
11 import Data.Sequence (Seq)
12 import qualified Data.Sequence as Seq
13 import Data.Time.Clock (UTCTime)
14 import Data.Word (Word8)
15 import System.Directory (listDirectory)
16
17 import Network.Courriel.Types
18
19 pPhrase :: Parser ByteString
20 pPhrase = undefined
21
22 pMailbox :: Parser Mailbox
23 pMailbox = nameAddr <|> (Mailbox Nothing <$> pAddrSpec)
24 where nameAddr =
25 Mailbox <$> (Just <$> pPhrase)
26 <*> (Atto.word8 60 *> pAddrSpec <* Atto.word8 62)
27
28 pDotAtom :: Parser ByteString
29 pDotAtom = Atto.takeWhile (\ c -> atext c || c == 46)
30
31 pAddrSpec :: Parser Address
32 pAddrSpec = Address <$> pLocalPart <*> (Atto.word8 64 *> pDomain)
33 where pLocalPart = pDotAtom
34 pDomain = pDotAtom
35
36 atext :: Word8 -> Bool
37 atext c = c < 128 && c /= 40 && c /= 41
38 && c /= 58 && c /= 59
39 && c /= 60 && c /= 62
40 && c /= 64 && c /= 91
41 && c /= 92 && c /= 93
42 && c /= 34
43
44 getSubject :: RawEmail -> ByteString
45 getSubject RawEmail { _rawEmailHeaders = hs } =
46 head [ subj | HeaderSubject subj <- Fold.toList hs ]
47
48 readBox :: FilePath -> IO [(FilePath, Either String RawEmail)]
49 readBox fp = do
50 paths <- listDirectory fp
51 let files = [ fp ++ "/" ++ p | p <- paths ]
52 raws <- sequence [ (,) f `fmap` B.readFile f | f <- files ]
53 return [ (f, Atto.parseOnly pEmail r) | (f, r) <- raws ]
54
55 pEmail :: Parser RawEmail
56 pEmail = do
57 headers <- Atto.manyTill pHeader pCrlf
58 body <- Body `fmap` pBody
59 return (RawEmail (Seq.fromList headers) (Seq.singleton body))
60
61 pHeader :: Parser Header
62 pHeader = do
63 name <- Atto.takeWhile1 (\ c -> c /= 58 && not (isSpace c))
64 void $ Atto.word8 58
65 case name of
66 "OrigDate" -> HeaderOther "arglbargl" <$> foldingHeader
67 "Subject" -> HeaderSubject <$> foldingHeader
68 "Comments" -> HeaderComments <$> foldingHeader
69 "Keywords" -> HeaderKeywords <$> foldingHeader
70 _ -> HeaderOther name <$> foldingHeader
71
72 pDateTime :: Parser UTCTime
73 pDateTime = undefined
74
75 foldingHeader :: Parser ByteString
76 foldingHeader = do
77 skipSpace
78 line <- Atto.takeTill (== 10)
79 void $ Atto.word8 10
80 next <- Atto.peekWord8'
81 if isSpace next
82 then skipSpace *> (B.append line <$> foldingHeader)
83 else return line
84
85 skipSpace :: Parser ()
86 skipSpace = void $ Atto.takeWhile isSpace
87
88 isSpace :: (Num a, Eq a) => a -> Bool
89 isSpace c = c == 32 || c == 9
90
91 pCrlf :: Parser ()
92 pCrlf = void (Atto.word8 10)
93
94 pBody :: Parser ByteString
95 pBody = B.concat <$> Atto.many' (pText <* pCrlf)
96
97 pText :: Parser ByteString
98 pText = Atto.takeWhile (\ c -> c /= 10 && c /= 13)
1 {-# LANGUAGE TemplateHaskell #-}
2
3 module Network.Courriel.Types where
4
5 import Data.ByteString (ByteString)
6 import Data.Sequence (Seq)
7 import Data.Time.Clock (UTCTime)
8 import Lens.Micro.Platform (makeLenses)
9
10 data Address = Address
11 { _addrLocalPart :: ByteString
12 , _addrDomain :: ByteString
13 } deriving (Eq, Show, Read)
14
15 data Mailbox = Mailbox
16 { _mbName :: Maybe ByteString
17 , _mbAddr :: Address
18 } deriving (Eq, Show, Read)
19
20 data Header
21 = HeaderOrigDate UTCTime
22 | HeaderFrom (Seq Mailbox)
23 | HeaderSender Mailbox
24 | HeaderReplyTo (Seq Address)
25 | HeaderTo (Seq Address)
26 | HeaderCc (Seq Address)
27 | HeaderBcc (Seq Address)
28 | HeaderMessageId MessageId
29 | HeaderInReplyTo MessageId
30 | HeaderReferences MessageId
31 | HeaderSubject ByteString
32 | HeaderComments ByteString
33 | HeaderKeywords ByteString
34 | HeaderOther ByteString ByteString
35 deriving (Eq, Show, Read)
36
37 newtype MessageId = MessageId { _fromMessageId :: ByteString }
38 deriving (Eq, Show, Read)
39
40 data Body = Body { _fromBody :: ByteString }
41 deriving (Eq, Show, Read)
42
43 data RawEmail = RawEmail
44 { _rawEmailHeaders :: Seq Header
45 , _rawEmailBodies :: Seq Body
46 } deriving (Eq, Show, Read)
47
48 data Email = Email
49 { _emailFilename :: FilePath
50 , _emailFrom :: Address
51 , _emailTo :: Seq Address
52 , _emailCc :: Seq Address
53 , _emailBcc :: Seq Address
54 , _emailHeaders :: Seq Header
55 , _emailBodies :: Seq Body
56 } deriving (Eq, Show, Read)
57
58 makeLenses ''Address
59 makeLenses ''Mailbox
60 makeLenses ''MessageId
61 makeLenses ''Body
62 makeLenses ''Email