| 1 |
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
| 2 |
{-|
|
| 3 |
Module : Text.XML.Xleb
|
| 4 |
Description : The Xleb XML-parsing monad
|
| 5 |
Copyright : (c) Getty Ritter, 2017
|
| 6 |
License : BSD
|
| 7 |
Maintainer : Getty Ritter <xleb@infinitenegativeutility.com>
|
| 8 |
Stability : experimental
|
| 9 |
|
| 10 |
The 'Xleb' monad (and the corresponding 'XlebT' monad transformer) is
|
| 11 |
a monadic sublanguage for easily parsing XML structures.
|
| 12 |
|
| 13 |
This module is intended to be imported qualified, to avoid name
|
| 14 |
clashes with 'Prelude' functions. e.g.
|
| 15 |
|
| 16 |
> import qualified Text.XML.Xleb as X
|
| 17 |
|
| 18 |
-}
|
| 19 |
|
| 20 |
|
| 21 |
module Text.XML.Xleb
|
| 22 |
( -- * How To Use 'Xleb'
|
| 23 |
-- $use
|
| 24 |
|
| 25 |
-- * The 'Xleb' monad
|
| 26 |
Xleb
|
| 27 |
, runXleb
|
| 28 |
-- ** The 'XlebT' monad transformer
|
| 29 |
, XlebT
|
| 30 |
, runXlebT
|
| 31 |
-- * Errors
|
| 32 |
, XlebError(..)
|
| 33 |
, errorString
|
| 34 |
-- * Element Structure
|
| 35 |
, elem
|
| 36 |
, attr
|
| 37 |
, contents
|
| 38 |
, rawElement
|
| 39 |
, child
|
| 40 |
, children
|
| 41 |
-- * Parsing contained string data
|
| 42 |
, Parse
|
| 43 |
, string
|
| 44 |
, number
|
| 45 |
, reader
|
| 46 |
-- * Selecting Elements
|
| 47 |
, Selector
|
| 48 |
, byTag
|
| 49 |
, any
|
| 50 |
) where
|
| 51 |
|
| 52 |
import Prelude hiding (any, elem)
|
| 53 |
|
| 54 |
import Control.Applicative (Alternative(..))
|
| 55 |
import qualified Control.Monad.Fail as M
|
| 56 |
import qualified Control.Monad.Except as M
|
| 57 |
import qualified Control.Monad.Reader as M
|
| 58 |
import qualified GHC.Exts as GHC
|
| 59 |
import qualified Data.Functor.Identity as M
|
| 60 |
import qualified Text.XML.Light as XML
|
| 61 |
|
| 62 |
-- | The 'XlebT' monad transformer describes a computation used to
|
| 63 |
-- parse a fragment of XML from a particular element of an XML
|
| 64 |
-- structure. This may fail with an error, or it may produce a value.
|
| 65 |
newtype XlebT m a =
|
| 66 |
Xleb (M.ReaderT XML.Element (M.ExceptT XlebError m) a)
|
| 67 |
deriving (Functor, Applicative, Monad, Alternative)
|
| 68 |
|
| 69 |
-- | The 'Xleb' monad describes a computation used to parse a fragment
|
| 70 |
-- of XML from a particular element of an XML structure. This may fail
|
| 71 |
-- with an error, or it may produce a value.
|
| 72 |
type Xleb a = XlebT M.Identity a
|
| 73 |
|
| 74 |
-- | The 'XlebError' type describes the various errors that can occur
|
| 75 |
-- in the course of parsing an XML structure. If you simply want the
|
| 76 |
-- human-readable string that corresponds to your error, then use the
|
| 77 |
-- 'errorString' function.
|
| 78 |
data XlebError
|
| 79 |
= XEInElem String XlebError
|
| 80 |
-- ^ Describes the element context in which an error occurred
|
| 81 |
| XEInAttr String XlebError
|
| 82 |
-- ^ Describes the attribute context in which an error occurred
|
| 83 |
| XEParseFailure String
|
| 84 |
-- ^ Some parser function was unable to produce a value from the
|
| 85 |
-- string embedded in an XML element
|
| 86 |
| XENoSuchAttribute String
|
| 87 |
-- ^ A 'XlebT' computation required an attribute that wasn't
|
| 88 |
-- found in the specified element.
|
| 89 |
| XEUnexpectedElement String String
|
| 90 |
-- ^ A 'XlebT' computation expected one element but found another
|
| 91 |
| XENoMatchingElement Selector
|
| 92 |
-- ^ A 'XlebT' computation used a selector which did not
|
| 93 |
-- successfully describe any child elements
|
| 94 |
| XEAmbiguousElement Selector
|
| 95 |
-- ^ A 'XlebT' computation used a selector as though it would
|
| 96 |
-- unambiguously name a single child, but instead multiple child
|
| 97 |
-- elements matched the selector
|
| 98 |
| XEBadXML
|
| 99 |
-- ^ The "xml" library was unable to parse the document as XML.
|
| 100 |
| XOtherError String
|
| 101 |
-- ^ Another error occurred which was not described by the above
|
| 102 |
-- constructors
|
| 103 |
deriving (Eq, Show)
|
| 104 |
|
| 105 |
instance Monoid XlebError where
|
| 106 |
mappend x _ = x
|
| 107 |
mempty = XOtherError "unknown error"
|
| 108 |
|
| 109 |
-- | Convert a 'XlebError' value to the corresponding human-readable
|
| 110 |
-- string.
|
| 111 |
errorString :: XlebError -> String
|
| 112 |
errorString = gatherContext ""
|
| 113 |
where gatherContext ctx (XEInElem el err) =
|
| 114 |
gatherContext (ctx ++ el ++ "/") err
|
| 115 |
gatherContext ctx (XEInAttr at err) =
|
| 116 |
gatherContext (ctx ++ "[@" ++ at ++ "]") err
|
| 117 |
gatherContext ctx err =
|
| 118 |
ctx ++ ": " ++ showError err
|
| 119 |
showError (XEParseFailure err) = err
|
| 120 |
showError XEBadXML =
|
| 121 |
"Unable to parse input string as XML"
|
| 122 |
showError (XENoSuchAttribute str) =
|
| 123 |
"No attribute called '" ++ str ++ "'"
|
| 124 |
showError (XEUnexpectedElement e1 e2) =
|
| 125 |
"Unexpected element " ++ e1 ++ "; expected " ++ e2
|
| 126 |
showError (XENoMatchingElement sel) =
|
| 127 |
"No elements were found maching selector " ++ show sel
|
| 128 |
showError (XEAmbiguousElement sel) =
|
| 129 |
"Multiple elements matched the selector " ++ show sel
|
| 130 |
showError (XOtherError str) = str
|
| 131 |
showError (XEInElem _ _) = error "[unexpected]"
|
| 132 |
showError (XEInAttr _ _) = error "[unexpected]"
|
| 133 |
|
| 134 |
instance Monad m => M.MonadFail (XlebT m) where
|
| 135 |
fail = Xleb . M.throwError . XOtherError
|
| 136 |
|
| 137 |
-- | A value of type @'Parse' t@ is a function that can either produce
|
| 138 |
-- a value of type @t@ or fail with a string message.
|
| 139 |
type Parse t = String -> Either String t
|
| 140 |
|
| 141 |
-- | A 'Selector' represents some criteria by which child elements are
|
| 142 |
-- matched.
|
| 143 |
data Selector
|
| 144 |
= SelByName String
|
| 145 |
| SelAny
|
| 146 |
deriving (Eq, Show)
|
| 147 |
|
| 148 |
instance GHC.IsString Selector where
|
| 149 |
fromString = SelByName
|
| 150 |
|
| 151 |
toPred :: Selector -> XML.Element -> Bool
|
| 152 |
toPred SelAny _ = True
|
| 153 |
toPred (SelByName n) el =
|
| 154 |
XML.showQName (XML.elName el) == n
|
| 155 |
|
| 156 |
-- | Find an attribute on the current focus element and parse it to a
|
| 157 |
-- value of type @t@. If the parse function fails, then this will fail
|
| 158 |
-- with 'XEParseFailure'.
|
| 159 |
attr :: Monad m => String -> Parse t -> XlebT m t
|
| 160 |
attr name parser = Xleb $ do
|
| 161 |
el <- M.ask
|
| 162 |
case XML.findAttr (XML.unqual name) el of
|
| 163 |
Nothing -> M.throwError (XENoSuchAttribute name)
|
| 164 |
Just a -> case parser a of
|
| 165 |
Left err -> M.throwError (XEInAttr name (XEParseFailure err))
|
| 166 |
Right x -> return x
|
| 167 |
|
| 168 |
-- | Take the string content of the current element and parse it to a
|
| 169 |
-- value of type @t@. If the parse function fails, then this will fail
|
| 170 |
-- with 'XEParseFailure'.
|
| 171 |
contents :: Monad m => Parse t -> XlebT m t
|
| 172 |
contents parser = Xleb $ do
|
| 173 |
cnt <- XML.strContent `fmap` M.ask
|
| 174 |
case parser cnt of
|
| 175 |
Left err -> M.throwError (XEParseFailure err)
|
| 176 |
Right x -> return x
|
| 177 |
|
| 178 |
-- | Access the raw underlying XML element that we are
|
| 179 |
-- processing. This is sometimes necessary for working with free-form
|
| 180 |
-- XML data.
|
| 181 |
rawElement :: Monad m => XlebT m XML.Element
|
| 182 |
rawElement = Xleb M.ask
|
| 183 |
|
| 184 |
-- | Use a 'Selector' that unambiguously identifies a single child
|
| 185 |
-- element of the current element and then parse it according to a
|
| 186 |
-- given 'XlebT' computation focused on that element. If no child
|
| 187 |
-- matches the provided 'Selector', then this will fail with
|
| 188 |
-- 'XENoMatchingElement'. If multiple children match the provided
|
| 189 |
-- 'Selector', then this will fail with 'XEAmbiguousElement'.
|
| 190 |
child :: Monad m => Selector -> XlebT m t -> XlebT m t
|
| 191 |
child sel (Xleb mote) = Xleb $ do
|
| 192 |
cld <- XML.filterChildren (toPred sel) `fmap` M.ask
|
| 193 |
case cld of
|
| 194 |
[] -> M.throwError (XENoMatchingElement sel)
|
| 195 |
[x] -> M.local (const x) mote
|
| 196 |
_ -> M.throwError (XEAmbiguousElement sel)
|
| 197 |
|
| 198 |
-- | Use a 'Selector' that identifies some child elements of the
|
| 199 |
-- current element and parse each according to a given 'XlebT'
|
| 200 |
-- computation, which will be repeated with focus on each child
|
| 201 |
-- element, and returning the resulting values as a list. If no child
|
| 202 |
-- elements match the 'Selector', then this will return an empty list.
|
| 203 |
children :: Monad m => Selector -> XlebT m t -> XlebT m [t]
|
| 204 |
children sel (Xleb mote) = Xleb $ do
|
| 205 |
cld <- XML.filterChildren (toPred sel) `fmap` M.ask
|
| 206 |
sequence [ M.local (const x) mote | x <- cld ]
|
| 207 |
|
| 208 |
-- | A 'Parse' function that parses numeric values according to their
|
| 209 |
-- Haskell 'Read' instance.
|
| 210 |
number :: (Read n, Num n) => Parse n
|
| 211 |
number = Right . read
|
| 212 |
|
| 213 |
-- | A 'Parse' function that accepts arbitrary string input without
|
| 214 |
-- failing.
|
| 215 |
string :: Parse String
|
| 216 |
string = Right
|
| 217 |
|
| 218 |
-- | A 'Parse' function that parses Haskell values according to their
|
| 219 |
-- 'Read' instance.
|
| 220 |
reader :: Read a => Parse a
|
| 221 |
reader = Right . read
|
| 222 |
|
| 223 |
-- | Creates a 'Selector' which expects an exact tag name.
|
| 224 |
byTag :: String -> Selector
|
| 225 |
byTag = SelByName
|
| 226 |
|
| 227 |
-- | Creates a 'Selector' which matches any possible child element.
|
| 228 |
any :: Selector
|
| 229 |
any = SelAny
|
| 230 |
|
| 231 |
-- | @'elem' n t@ will ensure that the currently focused element is a
|
| 232 |
-- tag named @n@ and will then evaluate it using the computation
|
| 233 |
-- @t@. This will fail with 'XEUnexpectedElement' if the tag is named
|
| 234 |
-- something else.
|
| 235 |
elem :: Monad m => String -> XlebT m t -> XlebT m t
|
| 236 |
elem name (Xleb mote) = Xleb $ do
|
| 237 |
el <- M.ask
|
| 238 |
case el of
|
| 239 |
XML.Element { XML.elName = qname }
|
| 240 |
| XML.showQName qname == name -> mote
|
| 241 |
| otherwise -> M.throwError
|
| 242 |
(XEUnexpectedElement (XML.showQName qname) name)
|
| 243 |
|
| 244 |
doXleb :: XML.Element -> XlebT m t -> m (Either XlebError t)
|
| 245 |
doXleb el (Xleb mote) =
|
| 246 |
M.runExceptT (M.runReaderT mote el)
|
| 247 |
|
| 248 |
-- | Run a 'Xleb' computation over a string containing XML data,
|
| 249 |
-- producing either the resulting value or an error. If the XML data
|
| 250 |
-- contained in the argument string is invalid, then this will fail
|
| 251 |
-- with 'XEBadXML'.
|
| 252 |
runXleb :: String -> Xleb t -> Either XlebError t
|
| 253 |
runXleb raw xleb = case XML.parseXMLDoc raw of
|
| 254 |
Nothing -> Left XEBadXML
|
| 255 |
Just x -> M.runIdentity (doXleb x xleb)
|
| 256 |
|
| 257 |
-- | Run a 'XlebT' computation over a string containing XML data,
|
| 258 |
-- producing either the resulting monadic value or an error. If the
|
| 259 |
-- XML data contained in the argument string is invalid, then this
|
| 260 |
-- will fail with 'XEBadXML'.
|
| 261 |
runXlebT :: Monad m => String -> XlebT m t -> m (Either XlebError t)
|
| 262 |
runXlebT raw xleb = case XML.parseXMLDoc raw of
|
| 263 |
Nothing -> return (Left XEBadXML)
|
| 264 |
Just x -> doXleb x xleb
|
| 265 |
|
| 266 |
{- $use
|
| 267 |
|
| 268 |
The 'Xleb' monad describes both parsing /and/ traversing a given XML
|
| 269 |
structure: several of the functions to produce 'Xleb' computations
|
| 270 |
take other 'Xleb' computations, which are run on various sub-parts of
|
| 271 |
the XML tree. Consequently, instead of decomposing an XML structure
|
| 272 |
and passing it around to various functions, the 'Xleb' language treats
|
| 273 |
"the current location in the tree" as an implicit piece of data in the
|
| 274 |
'Xleb' monad.
|
| 275 |
|
| 276 |
You will generally want to identify your root note with the 'elem'
|
| 277 |
function to ensure that your root note has the tag you
|
| 278 |
expect. Children of that node can be accessed using the 'child' or
|
| 279 |
'children' function to either unambiguously find a specific child
|
| 280 |
element, or to find all child elements that match a given selector and
|
| 281 |
apply a 'Xleb' computation to each of them.
|
| 282 |
|
| 283 |
@
|
| 284 |
a <- X.child (X.byTag "a") parseA
|
| 285 |
b <- X.children (X.byTag "b") parseB
|
| 286 |
@
|
| 287 |
|
| 288 |
Leaf data tends to come in two forms in XML: attribute values (like
|
| 289 |
@\<tag attr="value"\>@) or tag content (like
|
| 290 |
@\<tag\>value\<\/tag\>@). In both cases, the 'Xleb' functions allow
|
| 291 |
you to parse that content however you'd like by providing an arbitrary
|
| 292 |
function of type @'String' -> 'Either' 'String' a@. The "xleb" library
|
| 293 |
provides several built-in functions of this type for common
|
| 294 |
situations.
|
| 295 |
|
| 296 |
@
|
| 297 |
c <- X.attr "index" X.number
|
| 298 |
d <- X.contents X.string
|
| 299 |
@
|
| 300 |
|
| 301 |
Finally, the `Xleb` monad has `Alternative` instances which allow for
|
| 302 |
concise expression of optional values or multiple possibilities.
|
| 303 |
|
| 304 |
@
|
| 305 |
e \<- X.children X.any (parseA \<|\> parseB)
|
| 306 |
f \<- optional (X.attr "total" X.number)
|
| 307 |
@
|
| 308 |
|
| 309 |
Consequently, for an XML structure like the following:
|
| 310 |
|
| 311 |
@
|
| 312 |
\<feed\>
|
| 313 |
\<title\>Feed Name\<\/title\>
|
| 314 |
\<author\>Pierre Menard\<\/author\>
|
| 315 |
\<entry title="Entry 01"\>First Post\<\/entry\>
|
| 316 |
\<entry title="Entry 02"\>Second Post Post\<\/entry\>
|
| 317 |
\<\/feed\>
|
| 318 |
@
|
| 319 |
|
| 320 |
We can write a 'Xleb' computation which is capable of parsing this
|
| 321 |
structure in a handful of lines:
|
| 322 |
|
| 323 |
@
|
| 324 |
import Control.Applicative (optional)
|
| 325 |
import qualified Text.XML.Xleb as X
|
| 326 |
|
| 327 |
feed :: X.Xleb (String, Maybe String, [(String, String)])
|
| 328 |
feed = X.elem "feed" $ do
|
| 329 |
feedTitle <- X.child (X.byTag "title") $
|
| 330 |
X.contents X.string
|
| 331 |
feedAuthor <- optional $ X.child (X.byTag "author") $
|
| 332 |
X.contents X.string
|
| 333 |
feedEntries <- X.children (X.byTag "entry") entry
|
| 334 |
return (feedTitle, feedAuthor, feedEntries)
|
| 335 |
|
| 336 |
entry :: X.Xleb (String, String)
|
| 337 |
entry = (,) \<$\> X.attr "title" X.string \<*\> X.contents X.string
|
| 338 |
@
|
| 339 |
|
| 340 |
-}
|