gdritter repos courriel / cf6f62f
Working parsers for mailboxes and addresses Getty Ritter 6 years ago
3 changed file(s) with 116 addition(s) and 15 deletion(s). Collapse all Expand all
1616 exposed-modules: Network.Courriel.Types
1717 , Network.Courriel.Parse
1818 ghc-options: -Wall
19 build-depends: base >=4.7 && <4.10
19 build-depends: base >=4.7 && <5
2020 , bytestring
2121 , microlens-platform
2222 , time
77 import qualified Data.Attoparsec.ByteString as Atto
88 import Data.ByteString (ByteString)
99 import qualified Data.ByteString as B
10 import qualified Data.ByteString.Lazy as BL
11 import qualified Data.ByteString.Builder as BB
12 import qualified Data.Char as Char
1013 import qualified Data.Foldable as Fold
1114 import Data.Sequence (Seq)
1215 import qualified Data.Sequence as Seq
1619
1720 import Network.Courriel.Types
1821
22 -- The comments here are taken directly from RFC 5322
23
24
25 -- word = atom / quoted-string
26 -- phrase = 1*word / obs-phrase
1927 pPhrase :: Parser ByteString
20 pPhrase = undefined
28 pPhrase = pQuotedString <|>
29 (B.intercalate (B.singleton 32) <$> Atto.many' pWord)
2130
31 pWord :: Parser ByteString
32 pWord = pCfwsMb *> Atto.takeWhile1 atext <* pCfwsMb
33
34 pQuotedString :: Parser ByteString
35 pQuotedString = do
36 pCfwsMb
37 void (Atto.word8 34)
38 let ch = Atto.satisfy (\ c -> c /= 92 && c /= 34) <|> pQuotedPair
39 text <- Atto.many' $ do
40 w <- pFwsMb
41 c <- BB.word8 <$> ch
42 return (w `mappend` c)
43 rs <- pFwsMb
44 void (Atto.word8 34)
45 pCfwsMb
46 return (BL.toStrict (BB.toLazyByteString (mconcat text `mappend` rs)))
47
48 -- mailbox = name-addr / addr-spec
49 -- name-addr = [display-name] angle-addr
50 -- angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
51 -- obs-angle-addr
2252 pMailbox :: Parser Mailbox
2353 pMailbox = nameAddr <|> (Mailbox Nothing <$> pAddrSpec)
2454 where nameAddr =
25 Mailbox <$> (Just <$> pPhrase)
26 <*> (Atto.word8 60 *> pAddrSpec <* Atto.word8 62)
55 Mailbox <$> ((Just <$> pPhrase) <|> pure Nothing)
56 <*> (pCfwsMb *> Atto.word8 60 *> pAddrSpec <* Atto.word8 62 <* pCfwsMb)
2757
58 -- (mailbox *("," mailbox)) / obs-mbox-list
59
60 pMailboxList :: Parser (Seq Mailbox)
61 pMailboxList = do
62 m <- pMailbox
63 ms <- Atto.option Seq.empty (Atto.word8 44 *> pMailboxList)
64 return (m Seq.<| ms)
65
66 -- address-list = (address *("," address)) / obs-addr-list
67
68 pAddressList :: Parser (Seq MBGroup)
69 pAddressList = do
70 m <- pAddress
71 ms <- Atto.option Seq.empty (Atto.word8 44 *> pAddressList)
72 return (m Seq.<| ms)
73
74 pAddress :: Parser MBGroup
75 pAddress = MBSingle <$> pMailbox <|> MBGroup <$> pPhrase <*> grp
76 where grp = (Atto.word8 58 *> (Atto.option mempty pGroupList) <* Atto.word8 59 <* pCfwsMb)
77
78 pGroupList :: Parser (Seq Mailbox)
79 pGroupList = pMailboxList <|> (mempty <$ pCfws)
80
81 -- dot-atom = [CFWS] dot-atom-text [CFWS]
82 -- dot-atom-text = 1*atext *("." 1*atext)
2883 pDotAtom :: Parser ByteString
29 pDotAtom = Atto.takeWhile (\ c -> atext c || c == 46)
84 pDotAtom = pCfwsMb *> Atto.takeWhile (\ c -> atext c || c == 46) <* pCfwsMb
3085
3186 pAddrSpec :: Parser Address
3287 pAddrSpec = Address <$> pLocalPart <*> (Atto.word8 64 *> pDomain)
3489 pDomain = pDotAtom
3590
3691 atext :: Word8 -> Bool
37 atext c = c < 128 && c /= 40 && c /= 41
38 && c /= 58 && c /= 59
39 && c /= 60 && c /= 62
40 && c /= 64 && c /= 91
41 && c /= 92 && c /= 93
42 && c /= 34
92 atext c =
93 let c' = Char.chr (fromIntegral c)
94 in Char.isAlpha c' || Char.isDigit c' || c' `elem` ("!#$%&'*+-/=?^_`{|}~" :: [Char])
4395
4496 getSubject :: RawEmail -> ByteString
4597 getSubject RawEmail { _rawEmailHeaders = hs } =
67119 "Subject" -> HeaderSubject <$> foldingHeader
68120 "Comments" -> HeaderComments <$> foldingHeader
69121 "Keywords" -> HeaderKeywords <$> foldingHeader
122 "Sender" -> HeaderSender <$> pMailbox <* pCrlf
123 "From" -> HeaderFrom <$> pMailboxList <* pCrlf
124 "Reply-To" -> HeaderReplyTo <$> pAddressList <* pCrlf
125 "To" -> HeaderTo <$> pAddressList <* pCrlf
126 "Cc" -> HeaderCc <$> pAddressList <* pCrlf
127 "Bcc" -> HeaderBcc <$> pAddressList <* pCrlf
70128 _ -> HeaderOther name <$> foldingHeader
71129
72130 pDateTime :: Parser UTCTime
73131 pDateTime = undefined
132
133 overFoldingHeader :: Parser a -> Parser a
134 overFoldingHeader parse = do
135 bs <- foldingHeader
136 case Atto.parseOnly parse bs of
137 Left err -> fail err
138 Right x -> return x
74139
75140 foldingHeader :: Parser ByteString
76141 foldingHeader = do
88153 isSpace :: (Num a, Eq a) => a -> Bool
89154 isSpace c = c == 32 || c == 9
90155
156 isWS :: (Num a, Eq a) => a -> Bool
157 isWS c = c == 32 || c == 9
158
159 pCfwsMb :: Parser ()
160 pCfwsMb = Atto.option () pCfws
161
162 pCfws :: Parser ()
163 pCfws =
164 (Atto.skipMany1 (pFwsMb *> pComment) *> void pFwsMb) <|> pFws
165
166 pFwsMb :: Parser BB.Builder
167 pFwsMb = do
168 next <- Atto.peekWord8'
169 if isSpace next
170 then BB.word8 32 <$ pFws
171 else return mempty
172
173 pFws :: Parser ()
174 pFws =
175 Atto.skipMany (Atto.skipWhile isSpace *> pCrlf) *>
176 Atto.skipMany1 (Atto.satisfy isSpace)
177
178 pComment :: Parser ()
179 pComment = void (Atto.word8 40 *> void body *> Atto.word8 41)
180 where body = Atto.skipMany (pFwsMb *> content) *> pFwsMb
181 content = void (Atto.satisfy ctext) <|> void pQuotedPair <|> pComment
182 ctext c = c /= 40 && c /= 41 && c /= 92
183
184 pQuotedPair :: Parser Word8
185 pQuotedPair = Atto.word8 92 *> Atto.anyWord8
186
91187 pCrlf :: Parser ()
92188 pCrlf = void (Atto.word8 10)
93189
1717 , _mbAddr :: Address
1818 } deriving (Eq, Show, Read)
1919
20 data MBGroup
21 = MBSingle Mailbox
22 | MBGroup ByteString (Seq Mailbox)
23 deriving (Eq, Show, Read)
24
2025 data Header
2126 = HeaderOrigDate UTCTime
2227 | HeaderFrom (Seq Mailbox)
2328 | HeaderSender Mailbox
24 | HeaderReplyTo (Seq Address)
25 | HeaderTo (Seq Address)
26 | HeaderCc (Seq Address)
27 | HeaderBcc (Seq Address)
29 | HeaderReplyTo (Seq MBGroup)
30 | HeaderTo (Seq MBGroup)
31 | HeaderCc (Seq MBGroup)
32 | HeaderBcc (Seq MBGroup)
2833 | HeaderMessageId MessageId
2934 | HeaderInReplyTo MessageId
3035 | HeaderReferences MessageId