gdritter repos xleb / 7b5cc16
Initial documented Xleb implementation w/ simple Atom example Getty Ritter 7 years ago
6 changed file(s) with 518 addition(s) and 0 deletion(s). Collapse all Expand all
1 *~
2 cabal.project.local
3 dist
4 dist-newstyle
5 .cabal-sandbox
6 .ghc.environment*
1 Copyright (c) 2017, Getty Ritter
2 All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
8 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9
10 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE RecordWildCards #-}
3
4 module Main where
5
6 import Control.Applicative (optional)
7 import qualified Text.Show.Pretty as PP
8 import qualified Text.XML.Light as XML
9 import qualified Text.XML.Xleb as X
10
11 data Feed = Feed
12 { feedTitle :: String
13 , feedSubtitle :: String
14 , feedLinks :: [Link]
15 , feedId :: String
16 , feedUpdated :: String
17 , feedEntries :: [Entry]
18 } deriving (Show)
19
20 data Link = Link
21 { linkHref :: String
22 , linkRel :: Maybe String
23 } deriving (Show)
24
25 data Entry = Entry
26 { entryTitle :: String
27 , entryLinks :: [Link]
28 , entryId :: String
29 , entryUpdated :: String
30 , entrySummary :: String
31 , entryAuthor :: Author
32 , entryContent :: Content
33 } deriving (Show)
34
35 data Content
36 = XHTMLContent XML.Element
37 | HTMLContent String
38 | TextContent String
39 deriving (Show)
40
41 data Author = Author
42 { authorName :: String
43 , authorEmail :: String
44 } deriving (Show)
45
46 feed :: X.Xleb Feed
47 feed = X.elem "feed" $ do
48 feedTitle <- X.child "title" (X.contents X.string)
49 feedSubtitle <- X.child "subtitle" (X.contents X.string)
50 feedLinks <- X.children "link" link
51 feedId <- X.child "id" (X.contents X.string)
52 feedUpdated <- X.child "updated" (X.contents X.string)
53 feedEntries <- X.children "entry" entry
54 return Feed { .. }
55
56 link :: X.Xleb Link
57 link =
58 Link <$> X.attr "href" X.string
59 <*> optional (X.attr "rel" X.string)
60
61 entry :: X.Xleb Entry
62 entry = X.elem "entry" $ do
63 entryTitle <- X.child "title" (X.contents X.string)
64 entryLinks <- X.children "link" link
65 entryId <- X.child "id" (X.contents X.string)
66 entryUpdated <- X.child "updated" (X.contents X.string)
67 entrySummary <- X.child "summary" (X.contents X.string)
68 entryAuthor <- X.child "author" author
69 entryContent <- X.child "content" content
70 return Entry { .. }
71
72 content :: X.Xleb Content
73 content = do
74 typ <- X.attr "type" X.string
75 case typ of
76 "xhtml" -> XHTMLContent <$> X.rawElement
77 "html" -> HTMLContent <$> X.contents X.string
78 "text" -> TextContent <$> X.contents X.string
79 _ -> fail "Unknown content type"
80
81 author :: X.Xleb Author
82 author =
83 Author <$> X.child "name" (X.contents X.string)
84 <*> X.child "email" (X.contents X.string)
85
86 main :: IO ()
87 main = do
88 cs <- getContents
89 PP.pPrint (X.runXleb cs feed)
1 <?xml version="1.0" encoding="utf-8"?>
2
3 <feed xmlns="http://www.w3.org/2005/Atom">
4
5 <title>Example Feed</title>
6 <subtitle>A subtitle.</subtitle>
7 <link href="http://example.org/feed/" rel="self" />
8 <link href="http://example.org/" />
9 <id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
10 <updated>2003-12-13T18:30:02Z</updated>
11
12
13 <entry>
14 <title>Atom-Powered Robots Run Amok</title>
15 <link href="http://example.org/2003/12/13/atom03" />
16 <link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
17 <link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
18 <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
19 <updated>2003-12-13T18:30:02Z</updated>
20 <summary>Some text.</summary>
21 <content type="xhtml">
22 <div xmlns="http://www.w3.org/1999/xhtml">
23 <p>This is the entry content.</p>
24 </div>
25 </content>
26 <author>
27 <name>John Doe</name>
28 <email>johndoe@example.com</email>
29 </author>
30 </entry>
31
32 </feed>
1 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
2 {-|
3 Module : Text.XML.Xleb
4 Description : The Xleb XML-parsing monad
5 Copyright : (c) Getty Ritter, 2017
6 License : BSD
7 Maintainer : Getty Ritter <xleb@infinitenegativeutility.com>
8 Stability : experimental
9
10 The 'Xleb' monad (and the corresponding 'XlebT' monad transformer) is
11 a monadic sublanguage for easily parsing XML structures.
12
13 This module is intended to be imported qualified, to avoid name
14 clashes with 'Prelude' functions. e.g.
15
16 > import qualified Text.XML.Xleb as X
17
18 -}
19
20
21 module Text.XML.Xleb
22 ( -- * How To Use 'Xleb'
23 -- $use
24
25 -- * The 'Xleb' monad
26 Xleb
27 , runXleb
28 -- ** The 'XlebT' monad transformer
29 , XlebT
30 , runXlebT
31 -- * Errors
32 , XlebError(..)
33 , errorString
34 -- * Element Structure
35 , elem
36 , attr
37 , contents
38 , rawElement
39 , child
40 , children
41 -- * Parsing contained string data
42 , Parse
43 , string
44 , number
45 , reader
46 -- * Selecting Elements
47 , Selector
48 , byTag
49 , any
50 ) where
51
52 import Prelude hiding (any, elem)
53
54 import Control.Applicative (Alternative(..))
55 import qualified Control.Monad.Fail as M
56 import qualified Control.Monad.Except as M
57 import qualified Control.Monad.Reader as M
58 import qualified GHC.Exts as GHC
59 import qualified Data.Functor.Identity as M
60 import qualified Text.XML.Light as XML
61
62 -- | The 'XlebT' monad transformer describes a computation used to
63 -- parse a fragment of XML from a particular element of an XML
64 -- structure. This may fail with an error, or it may produce a value.
65 newtype XlebT m a =
66 Xleb (M.ReaderT XML.Element (M.ExceptT XlebError m) a)
67 deriving (Functor, Applicative, Monad, Alternative)
68
69 -- | The 'Xleb' monad describes a computation used to parse a fragment
70 -- of XML from a particular element of an XML structure. This may fail
71 -- with an error, or it may produce a value.
72 type Xleb a = XlebT M.Identity a
73
74 -- | The 'XlebError' type describes the various errors that can occur
75 -- in the course of parsing an XML structure. If you simply want the
76 -- human-readable string that corresponds to your error, then use the
77 -- 'errorString' function.
78 data XlebError
79 = XEInElem String XlebError
80 -- ^ Describes the element context in which an error occurred
81 | XEInAttr String XlebError
82 -- ^ Describes the attribute context in which an error occurred
83 | XEParseFailure String
84 -- ^ Some parser function was unable to produce a value from the
85 -- string embedded in an XML element
86 | XENoSuchAttribute String
87 -- ^ A 'XlebT' computation required an attribute that wasn't
88 -- found in the specified element.
89 | XEUnexpectedElement String String
90 -- ^ A 'XlebT' computation expected one element but found another
91 | XENoMatchingElement Selector
92 -- ^ A 'XlebT' computation used a selector which did not
93 -- successfully describe any child elements
94 | XEAmbiguousElement Selector
95 -- ^ A 'XlebT' computation used a selector as though it would
96 -- unambiguously name a single child, but instead multiple child
97 -- elements matched the selector
98 | XEBadXML
99 -- ^ The "xml" library was unable to parse the document as XML.
100 | XOtherError String
101 -- ^ Another error occurred which was not described by the above
102 -- constructors
103 deriving (Eq, Show)
104
105 instance Monoid XlebError where
106 mappend x _ = x
107 mempty = XOtherError "unknown error"
108
109 -- | Convert a 'XlebError' value to the corresponding human-readable
110 -- string.
111 errorString :: XlebError -> String
112 errorString = gatherContext ""
113 where gatherContext ctx (XEInElem el err) =
114 gatherContext (ctx ++ el ++ "/") err
115 gatherContext ctx (XEInAttr at err) =
116 gatherContext (ctx ++ "[@" ++ at ++ "]") err
117 gatherContext ctx err =
118 ctx ++ ": " ++ showError err
119 showError (XEParseFailure err) = err
120 showError XEBadXML =
121 "Unable to parse input string as XML"
122 showError (XENoSuchAttribute str) =
123 "No attribute called '" ++ str ++ "'"
124 showError (XEUnexpectedElement e1 e2) =
125 "Unexpected element " ++ e1 ++ "; expected " ++ e2
126 showError (XENoMatchingElement sel) =
127 "No elements were found maching selector " ++ show sel
128 showError (XEAmbiguousElement sel) =
129 "Multiple elements matched the selector " ++ show sel
130 showError (XOtherError str) = str
131 showError (XEInElem _ _) = error "[unexpected]"
132 showError (XEInAttr _ _) = error "[unexpected]"
133
134 instance Monad m => M.MonadFail (XlebT m) where
135 fail = Xleb . M.throwError . XOtherError
136
137 -- | A value of type @'Parse' t@ is a function that can either produce
138 -- a value of type @t@ or fail with a string message.
139 type Parse t = String -> Either String t
140
141 -- | A 'Selector' represents some criteria by which child elements are
142 -- matched.
143 data Selector
144 = SelByName String
145 | SelAny
146 deriving (Eq, Show)
147
148 instance GHC.IsString Selector where
149 fromString = SelByName
150
151 toPred :: Selector -> XML.Element -> Bool
152 toPred SelAny _ = True
153 toPred (SelByName n) el =
154 XML.showQName (XML.elName el) == n
155
156 -- | Find an attribute on the current focus element and parse it to a
157 -- value of type @t@. If the parse function fails, then this will fail
158 -- with 'XEParseFailure'.
159 attr :: Monad m => String -> Parse t -> XlebT m t
160 attr name parser = Xleb $ do
161 el <- M.ask
162 case XML.findAttr (XML.unqual name) el of
163 Nothing -> M.throwError (XENoSuchAttribute name)
164 Just a -> case parser a of
165 Left err -> M.throwError (XEInAttr name (XEParseFailure err))
166 Right x -> return x
167
168 -- | Take the string content of the current element and parse it to a
169 -- value of type @t@. If the parse function fails, then this will fail
170 -- with 'XEParseFailure'.
171 contents :: Monad m => Parse t -> XlebT m t
172 contents parser = Xleb $ do
173 cnt <- XML.strContent `fmap` M.ask
174 case parser cnt of
175 Left err -> M.throwError (XEParseFailure err)
176 Right x -> return x
177
178 -- | Access the raw underlying XML element that we are
179 -- processing. This is sometimes necessary for working with free-form
180 -- XML data.
181 rawElement :: Monad m => XlebT m XML.Element
182 rawElement = Xleb M.ask
183
184 -- | Use a 'Selector' that unambiguously identifies a single child
185 -- element of the current element and then parse it according to a
186 -- given 'XlebT' computation focused on that element. If no child
187 -- matches the provided 'Selector', then this will fail with
188 -- 'XENoMatchingElement'. If multiple children match the provided
189 -- 'Selector', then this will fail with 'XEAmbiguousElement'.
190 child :: Monad m => Selector -> XlebT m t -> XlebT m t
191 child sel (Xleb mote) = Xleb $ do
192 cld <- XML.filterChildren (toPred sel) `fmap` M.ask
193 case cld of
194 [] -> M.throwError (XENoMatchingElement sel)
195 [x] -> M.local (const x) mote
196 _ -> M.throwError (XEAmbiguousElement sel)
197
198 -- | Use a 'Selector' that identifies some child elements of the
199 -- current element and parse each according to a given 'XlebT'
200 -- computation, which will be repeated with focus on each child
201 -- element, and returning the resulting values as a list. If no child
202 -- elements match the 'Selector', then this will return an empty list.
203 children :: Monad m => Selector -> XlebT m t -> XlebT m [t]
204 children sel (Xleb mote) = Xleb $ do
205 cld <- XML.filterChildren (toPred sel) `fmap` M.ask
206 sequence [ M.local (const x) mote | x <- cld ]
207
208 -- | A 'Parse' function that parses numeric values according to their
209 -- Haskell 'Read' instance.
210 number :: (Read n, Num n) => Parse n
211 number = Right . read
212
213 -- | A 'Parse' function that accepts arbitrary string input without
214 -- failing.
215 string :: Parse String
216 string = Right
217
218 -- | A 'Parse' function that parses Haskell values according to their
219 -- 'Read' instance.
220 reader :: Read a => Parse a
221 reader = Right . read
222
223 -- | Creates a 'Selector' which expects an exact tag name.
224 byTag :: String -> Selector
225 byTag = SelByName
226
227 -- | Creates a 'Selector' which matches any possible child element.
228 any :: Selector
229 any = SelAny
230
231 -- | @'elem' n t@ will ensure that the currently focused element is a
232 -- tag named @n@ and will then evaluate it using the computation
233 -- @t@. This will fail with 'XEUnexpectedElement' if the tag is named
234 -- something else.
235 elem :: Monad m => String -> XlebT m t -> XlebT m t
236 elem name (Xleb mote) = Xleb $ do
237 el <- M.ask
238 case el of
239 XML.Element { XML.elName = qname }
240 | XML.showQName qname == name -> mote
241 | otherwise -> M.throwError
242 (XEUnexpectedElement (XML.showQName qname) name)
243
244 doXleb :: XML.Element -> XlebT m t -> m (Either XlebError t)
245 doXleb el (Xleb mote) =
246 M.runExceptT (M.runReaderT mote el)
247
248 -- | Run a 'Xleb' computation over a string containing XML data,
249 -- producing either the resulting value or an error. If the XML data
250 -- contained in the argument string is invalid, then this will fail
251 -- with 'XEBadXML'.
252 runXleb :: String -> Xleb t -> Either XlebError t
253 runXleb raw xleb = case XML.parseXMLDoc raw of
254 Nothing -> Left XEBadXML
255 Just x -> M.runIdentity (doXleb x xleb)
256
257 -- | Run a 'XlebT' computation over a string containing XML data,
258 -- producing either the resulting monadic value or an error. If the
259 -- XML data contained in the argument string is invalid, then this
260 -- will fail with 'XEBadXML'.
261 runXlebT :: Monad m => String -> XlebT m t -> m (Either XlebError t)
262 runXlebT raw xleb = case XML.parseXMLDoc raw of
263 Nothing -> return (Left XEBadXML)
264 Just x -> doXleb x xleb
265
266 {- $use
267
268 The 'Xleb' monad describes both parsing /and/ traversing a given XML
269 structure: several of the functions to produce 'Xleb' computations
270 take other 'Xleb' computations, which are run on various sub-parts of
271 the XML tree. Consequently, instead of decomposing an XML structure
272 and passing it around to various functions, the 'Xleb' language treats
273 "the current location in the tree" as an implicit piece of data in the
274 'Xleb' monad.
275
276 You will generally want to identify your root note with the 'elem'
277 function to ensure that your root note has the tag you
278 expect. Children of that node can be accessed using the 'child' or
279 'children' function to either unambiguously find a specific child
280 element, or to find all child elements that match a given selector and
281 apply a 'Xleb' computation to each of them.
282
283 @
284 a <- X.child (X.byTag "a") parseA
285 b <- X.children (X.byTag "b") parseB
286 @
287
288 Leaf data tends to come in two forms in XML: attribute values (like
289 @\<tag attr="value"\>@) or tag content (like
290 @\<tag\>value\<\/tag\>@). In both cases, the 'Xleb' functions allow
291 you to parse that content however you'd like by providing an arbitrary
292 function of type @'String' -> 'Either' 'String' a@. The "xleb" library
293 provides several built-in functions of this type for common
294 situations.
295
296 @
297 c <- X.attr "index" X.number
298 d <- X.contents X.string
299 @
300
301 Finally, the `Xleb` monad has `Alternative` instances which allow for
302 concise expression of optional values or multiple possibilities.
303
304 @
305 e \<- X.children X.any (parseA \<|\> parseB)
306 f \<- optional (X.attr "total" X.number)
307 @
308
309 Consequently, for an XML structure like the following:
310
311 @
312 \<feed\>
313 \<title\>Feed Name\<\/title\>
314 \<author\>Pierre Menard\<\/author\>
315 \<entry title="Entry 01"\>First Post\<\/entry\>
316 \<entry title="Entry 02"\>Second Post Post\<\/entry\>
317 \<\/feed\>
318 @
319
320 We can write a 'Xleb' computation which is capable of parsing this
321 structure in a handful of lines:
322
323 @
324 import Control.Applicative (optional)
325 import qualified Text.XML.Xleb as X
326
327 feed :: X.Xleb (String, Maybe String, [(String, String)])
328 feed = X.elem "feed" $ do
329 feedTitle <- X.child (X.byTag "title") $
330 X.contents X.string
331 feedAuthor <- optional $ X.child (X.byTag "author") $
332 X.contents X.string
333 feedEntries <- X.children (X.byTag "entry") entry
334 return (feedTitle, feedAuthor, feedEntries)
335
336 entry :: X.Xleb (String, String)
337 entry = (,) \<$\> X.attr "title" X.string \<*\> X.contents X.string
338 @
339
340 -}
1 name: xleb
2 version: 0.1.0.0
3 synopsis: A simple monadic language for parsing XML structures.
4 description:
5 A simple monadic language for parsing XML structures.
6 license: BSD3
7 license-file: LICENSE
8 author: Getty Ritter <gdritter@galois.com>
9 maintainer: Getty Ritter <gdritter@galois.com>
10 copyright: ©2017 Getty Ritter
11 category: XML
12 build-type: Simple
13 cabal-version: >= 1.14
14
15 flag build-examples
16 description: Build example applications
17 default: False
18
19 library
20 exposed-modules: Text.XML.Xleb
21 hs-source-dirs: src
22 ghc-options: -Wall -Werror
23 build-depends: base >=4.7 && <5
24 , xml
25 , containers
26 , mtl
27 default-language: Haskell2010
28
29 executable atom
30 if !flag(build-examples)
31 buildable: False
32 hs-source-dirs: examples/atom
33 main-is: Main.hs
34 ghc-options: -Wall -Werror
35 build-depends: base >=4.7 && <5
36 , xleb
37 , xml
38 , pretty-show
39 default-language: Haskell2010