Major repo change: moved telml-markup into this repo and started using cabal.project format
Getty Ritter
9 years ago
| 1 | {-# LANGUAGE LambdaCase #-} | |
| 2 | ||
| 3 | module Data.TeLML.Parser (Fragment(..), Document, parse) where | |
| 4 | ||
| 5 | import Data.Char (isAlpha, isAlphaNum, isSpace) | |
| 6 | import Data.TeLML.Type | |
| 7 | ||
| 8 | type Result a = Either String (String, a) | |
| 9 | type Parse a = String -> Result a | |
| 10 | ||
| 11 | -- All of these characters are the ones which need escaping if used | |
| 12 | -- inside a document. | |
| 13 | isSpecial :: Char -> Bool | |
| 14 | isSpecial c = c `elem` "\\{}|" | |
| 15 | ||
| 16 | -- This is here for, uh, aesthetic reasons. | |
| 17 | throw :: a -> Either a b | |
| 18 | throw = Left | |
| 19 | ||
| 20 | -- This is 'fmap' named in such a way that it does not conflict with | |
| 21 | -- 'fmap'. | |
| 22 | over :: (a -> b) -> Result a -> Result b | |
| 23 | over _ (Left err) = Left err | |
| 24 | over f (Right (s, x)) = Right (s, f x) | |
| 25 | ||
| 26 | {- And this is a monadic bind. You'll note that this basically has the | |
| 27 | - same type as almost any other monadic parser combinator library, so | |
| 28 | - you might be wondering why I'm not just using parsec. On the other | |
| 29 | - hand, this is literally the only function I actually need there, so | |
| 30 | - why have another dependency I need to keep up-to-date when I could | |
| 31 | - just define this trivial function here? | |
| 32 | - | |
| 33 | - I'm also not defining this for `Monad` itself because then I'd have | |
| 34 | - to deal with newtype wrappers and defining methods for the whole | |
| 35 | - Functor=>Applicative=>Monad hierarchy, when all I really need is | |
| 36 | - this. | |
| 37 | -} | |
| 38 | bind :: Result a -> ((String, a) -> Result b) -> Result b | |
| 39 | bind (Left err) _ = Left err | |
| 40 | bind (Right a) f = f a | |
| 41 | ||
| 42 | -- Parse a text fragment, handling escapes. This will end as soon as it | |
| 43 | -- sees any non-escaped special character. | |
| 44 | pText :: Parse Fragment | |
| 45 | pText = over Text . go | |
| 46 | where go ('\\':x:xs) | |
| 47 | | isSpecial x = (x:) `over` go xs | |
| 48 | go i@(x:xs) | |
| 49 | | isSpecial x = return (i, "") | |
| 50 | | otherwise = (x:) `over` go xs | |
| 51 | go "" = return ("", "") | |
| 52 | ||
| 53 | -- Parse a tag name of length >= 0. | |
| 54 | pTagName :: Parse String | |
| 55 | pTagName s = go s `bind` ensureName | |
| 56 | where go i@(x:xs) | |
| 57 | | isAlphaNum x = (x:) `over` go xs | |
| 58 | | elem x "-_" = (x:) `over` go xs | |
| 59 | | otherwise = return (i, "") | |
| 60 | go [] = throw "unexpected end-of-document while parsing tag" | |
| 61 | ensureName (xs, name) | |
| 62 | | length name == 0 = | |
| 63 | throw "expected tag name after `\\'" | |
| 64 | | not (isAlpha (head name)) = | |
| 65 | throw "tag names must begin with an alphabetic character" | |
| 66 | | otherwise = return (xs, name) | |
| 67 | ||
| 68 | -- Skip any space charaters, returning () for the first non-space | |
| 69 | -- character (including EOF). | |
| 70 | skipSpace :: Parse () | |
| 71 | skipSpace i@(x:xs) | |
| 72 | | isSpace x = skipSpace xs | |
| 73 | | otherwise = return (i, ()) | |
| 74 | skipSpace _ = return ("", ()) | |
| 75 | ||
| 76 | -- Parse a tag assuming that a backslash has already been encountered. | |
| 77 | pTag :: Parse Fragment | |
| 78 | pTag i = | |
| 79 | bind (pTagName i) $ \ (i', name) -> | |
| 80 | bind (skipSpace i') $ \case | |
| 81 | ('{':i'', ()) -> Tag name `over` pArgs i'' | |
| 82 | ("",_) -> throw "unexpected end-of-document while parsing tag" | |
| 83 | _ -> throw "expected start of block" | |
| 84 | ||
| 85 | -- Parse the vertical-bar-separated arguments to a tag, ending when a | |
| 86 | -- right curly brace is encountered. | |
| 87 | pArgs :: Parse [Document] | |
| 88 | pArgs ('}':xs) = return (xs, []) | |
| 89 | pArgs s = bind (pFragments s) $ \case | |
| 90 | ('|':xs, cs) -> (cs:) `over` pArgs xs | |
| 91 | ('}':xs, cs) -> return (xs, [cs]) | |
| 92 | _ -> throw "[unreachable]" | |
| 93 | ||
| 94 | -- Parse any fragment, deciding whether to parse it as a tag or a text chunk | |
| 95 | pFragment :: Parse Fragment | |
| 96 | pFragment s@('\\':c:_) | |
| 97 | | isSpecial c = pText s | |
| 98 | pFragment ('\\':xs) = pTag xs | |
| 99 | pFragment s = pText s | |
| 100 | ||
| 101 | -- Parse multiple fragments, ending when it encounters a }, or |, or end-of-file. | |
| 102 | pFragments :: Parse Document | |
| 103 | pFragments "" = return ("", []) | |
| 104 | pFragments ('{':s) = bind (pFragments s) $ \case | |
| 105 | ('}':xs, cs) -> bind (pFragments xs) $ \(xs', cs') -> return (xs', cs ++ cs') | |
| 106 | (x:_, _) -> throw ("unexpected " ++ show x ++ "; expected '}'") | |
| 107 | ([], _) -> throw ("unexpected end-of-document while parsing block") | |
| 108 | pFragments s@(x:_) | |
| 109 | | x `elem` "}|" = return (s, []) | |
| 110 | | otherwise = | |
| 111 | bind (pFragment s) $ \case | |
| 112 | (s', c) -> (c:) `over` pFragments s' | |
| 113 | ||
| 114 | -- | Parse a string into a @TeLML@ 'Fragment'. | |
| 115 | parse :: String -> Either String Document | |
| 116 | parse str = case pFragments str of | |
| 117 | Right ("", r) -> return r | |
| 118 | Right ('}':_, _) -> throw ("Found unmatched '}' in document") | |
| 119 | Right (s, _) -> throw ("expected end of document but found " ++ show s) | |
| 120 | Left err -> throw err |
| 1 | {-# LANGUAGE DeriveDataTypeable #-} | |
| 2 | ||
| 3 | module Data.TeLML.Type (Document, Fragment(..)) where | |
| 4 | ||
| 5 | import Control.DeepSeq (NFData(..)) | |
| 6 | import Data.Data (Data) | |
| 7 | import Data.Typeable (Typeable) | |
| 8 | import Data.String (IsString(..)) | |
| 9 | ||
| 10 | -- | A 'Document' is zero or more 'Fragment's. | |
| 11 | type Document = [Fragment] | |
| 12 | ||
| 13 | -- | A 'Fragment' is either a snippet of text (as indicated by the | |
| 14 | -- 'Text' constructor) or a tag (as indicated by the 'Tag' | |
| 15 | -- constructor). The former is a raw string, and the latter consists | |
| 16 | -- of a name followed by zero or more 'Document's. | |
| 17 | data Fragment | |
| 18 | = Text String | |
| 19 | | Tag String [Document] | |
| 20 | deriving (Eq, Show, Typeable, Data) | |
| 21 | ||
| 22 | instance IsString Fragment where | |
| 23 | fromString = Text | |
| 24 | ||
| 25 | instance NFData Fragment where | |
| 26 | rnf (Text s) = rnf s | |
| 27 | rnf (Tag s l) = rnf s `seq` rnf l |
| 1 | Copyright (c) 2014, Getty Ritter | |
| 2 | ||
| 3 | All rights reserved. | |
| 4 | ||
| 5 | Redistribution and use in source and binary forms, with or without | |
| 6 | modification, are permitted provided that the following conditions are met: | |
| 7 | ||
| 8 | * Redistributions of source code must retain the above copyright | |
| 9 | notice, this list of conditions and the following disclaimer. | |
| 10 | ||
| 11 | * Redistributions in binary form must reproduce the above | |
| 12 | copyright notice, this list of conditions and the following | |
| 13 | disclaimer in the documentation and/or other materials provided | |
| 14 | with the distribution. | |
| 15 | ||
| 16 | * Neither the name of Getty Ritter nor the names of other | |
| 17 | contributors may be used to endorse or promote products derived | |
| 18 | from this software without specific prior written permission. | |
| 19 | ||
| 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 1 |
# |
|
| 1 | # telml | |
| 2 | 2 | |
| 3 | A Tex-Like Markup Language—which is a bit of a misnomer, because | |
| 4 | as presented, it's more of a structured data language optimized for | |
| 5 | text documents than an actual markup language. A markup-oriented | |
| 6 | use of this library can be found in the | |
| 3 | This is the repository for both the `telml` and the `telml-markup` | |
| 4 | packages. The former implements a TeX-inspired data format, while | |
| 5 | the latter uses that data format to implement a lightweight, | |
| 6 | extensible markup format. | |
| 7 | ||
| 8 | This repository contains a | |
| 9 | [cabal.project file](http://blog.ezyang.com/2016/05/announcing-cabal-new-build-nix-style-local-builds/), which facilitates building multiple packages together; | |
| 10 | this repository had previously been used for just the `telml` package, | |
| 11 | and the corresponding | |
| 7 | 12 | [`telml-markup`](https://github.com/aisamanra/telml-markup) |
| 8 | package. | |
| 9 | ||
| 10 | A _TeLML Document_ is a sequence of textual | |
| 11 | fragments interspersed with "tags", which have the format | |
| 12 | ||
| 13 | ~~~~ | |
| 14 | \tagname{ document | document | ... } | |
| 15 | ~~~~ | |
| 16 | ||
| 17 | Whitespace is _not_ allowed between the backslash and the tag | |
| 18 | name, but _is_ allowed between the tag name and the following | |
| 19 | block. This means that the following is allowed: | |
| 20 | ||
| 21 | ~~~~ | |
| 22 | \tag | |
| 23 | { element_1 | |
| 24 | | element_2 | |
| 25 | | ... | |
| 26 | | element_n | |
| 27 | } | |
| 28 | ~~~~ | |
| 29 | ||
| 30 | In contrast to TeX, the block is _obligatory_, i.e. | |
| 31 | the following is **not** a valid document: | |
| 32 | ||
| 33 | ~~~~ | |
| 34 | \p{this \br and that} | |
| 35 | ~~~~ | |
| 36 | ||
| 37 | Whereas this is: | |
| 38 | ||
| 39 | ~~~~ | |
| 40 | \p{this \br{} and that} | |
| 41 | ~~~~ | |
| 42 | ||
| 43 | (This restriction might at some point be lifted based on experience | |
| 44 | with the format.) | |
| 45 | ||
| 46 | The intended use for TeLML is as a building-block for specific | |
| 47 | markup formats in which you might want to have arbitrary new | |
| 48 | tags, but don't want to use an XML-based solution. | |
| 49 | ||
| 50 | # Formal Grammar | |
| 51 | ||
| 52 | ~~~~ | |
| 53 | <document> ::= <fragment>* | |
| 54 | <fragment> ::= <tag> | <text> | "{" <document> "}" | |
| 55 | ||
| 56 | <text> ::= /([^\]|[\][\{}|])*/ | |
| 57 | ||
| 58 | <tag> ::= "\" <tagname> <spaces> "{" <arglist> "}" | |
| 59 | <tagname> ::= /[A-Za-z][A-Za-z0-9_-]*/ | |
| 60 | <arglist> ::= <document> ("|" <document>)* | |
| 61 | <spaces> ::= /[ \t\r\n]*/ | |
| 62 | ~~~~ | |
| 63 | ||
| 64 | # Possible Future Modifications | |
| 65 | ||
| 66 | Variations on this theme that might be possible: | |
| 67 | ||
| 68 | - It might (as stated above) be worthwhile to attempt to lift the | |
| 69 | restriction that every tag has an argument block, if nullary | |
| 70 | tags are commonly used. | |
| 71 | - It might be nice to optionally allow `\begin{...}` and `\end{...}` | |
| 72 | tags to be parsed as a special case of certain delimiters. | |
| 73 | - Maybe a special case for named arguments of the form | |
| 74 | `\tag{arg=stuff|arg=stuff}`, although this could be | |
| 75 | handled at present by using nested tags as in | |
| 76 |
|
|
| 13 | repository is left but will not be updated. |
| 1 | packages: telml/telml.cabal, telml-markup/telml-markup.cabal |
| 1 | {-# LANGUAGE LambdaCase #-} | |
| 2 | ||
| 3 | module Data.TeLML.Parser (Fragment(..), Document, parse) where | |
| 4 | ||
| 5 | import Data.Char (isAlpha, isAlphaNum, isSpace) | |
| 6 | import Data.TeLML.Type | |
| 7 | ||
| 8 | type Result a = Either String (String, a) | |
| 9 | type Parse a = String -> Result a | |
| 10 | ||
| 11 | -- All of these characters are the ones which need escaping if used | |
| 12 | -- inside a document. | |
| 13 | isSpecial :: Char -> Bool | |
| 14 | isSpecial c = c `elem` "\\{}|" | |
| 15 | ||
| 16 | -- This is here for, uh, aesthetic reasons. | |
| 17 | throw :: a -> Either a b | |
| 18 | throw = Left | |
| 19 | ||
| 20 | -- This is 'fmap' named in such a way that it does not conflict with | |
| 21 | -- 'fmap'. | |
| 22 | over :: (a -> b) -> Result a -> Result b | |
| 23 | over _ (Left err) = Left err | |
| 24 | over f (Right (s, x)) = Right (s, f x) | |
| 25 | ||
| 26 | {- And this is a monadic bind. You'll note that this basically has the | |
| 27 | - same type as almost any other monadic parser combinator library, so | |
| 28 | - you might be wondering why I'm not just using parsec. On the other | |
| 29 | - hand, this is literally the only function I actually need there, so | |
| 30 | - why have another dependency I need to keep up-to-date when I could | |
| 31 | - just define this trivial function here? | |
| 32 | - | |
| 33 | - I'm also not defining this for `Monad` itself because then I'd have | |
| 34 | - to deal with newtype wrappers and defining methods for the whole | |
| 35 | - Functor=>Applicative=>Monad hierarchy, when all I really need is | |
| 36 | - this. | |
| 37 | -} | |
| 38 | bind :: Result a -> ((String, a) -> Result b) -> Result b | |
| 39 | bind (Left err) _ = Left err | |
| 40 | bind (Right a) f = f a | |
| 41 | ||
| 42 | -- Parse a text fragment, handling escapes. This will end as soon as it | |
| 43 | -- sees any non-escaped special character. | |
| 44 | pText :: Parse Fragment | |
| 45 | pText = over Text . go | |
| 46 | where go ('\\':x:xs) | |
| 47 | | isSpecial x = (x:) `over` go xs | |
| 48 | go i@(x:xs) | |
| 49 | | isSpecial x = return (i, "") | |
| 50 | | otherwise = (x:) `over` go xs | |
| 51 | go "" = return ("", "") | |
| 52 | ||
| 53 | -- Parse a tag name of length >= 0. | |
| 54 | pTagName :: Parse String | |
| 55 | pTagName s = go s `bind` ensureName | |
| 56 | where go i@(x:xs) | |
| 57 | | isAlphaNum x = (x:) `over` go xs | |
| 58 | | elem x "-_" = (x:) `over` go xs | |
| 59 | | otherwise = return (i, "") | |
| 60 | go [] = throw "unexpected end-of-document while parsing tag" | |
| 61 | ensureName (xs, name) | |
| 62 | | length name == 0 = | |
| 63 | throw "expected tag name after `\\'" | |
| 64 | | not (isAlpha (head name)) = | |
| 65 | throw "tag names must begin with an alphabetic character" | |
| 66 | | otherwise = return (xs, name) | |
| 67 | ||
| 68 | -- Skip any space charaters, returning () for the first non-space | |
| 69 | -- character (including EOF). | |
| 70 | skipSpace :: Parse () | |
| 71 | skipSpace i@(x:xs) | |
| 72 | | isSpace x = skipSpace xs | |
| 73 | | otherwise = return (i, ()) | |
| 74 | skipSpace _ = return ("", ()) | |
| 75 | ||
| 76 | -- Parse a tag assuming that a backslash has already been encountered. | |
| 77 | pTag :: Parse Fragment | |
| 78 | pTag i = | |
| 79 | bind (pTagName i) $ \ (i', name) -> | |
| 80 | bind (skipSpace i') $ \case | |
| 81 | ('{':i'', ()) -> Tag name `over` pArgs i'' | |
| 82 | ("",_) -> throw "unexpected end-of-document while parsing tag" | |
| 83 | _ -> throw "expected start of block" | |
| 84 | ||
| 85 | -- Parse the vertical-bar-separated arguments to a tag, ending when a | |
| 86 | -- right curly brace is encountered. | |
| 87 | pArgs :: Parse [Document] | |
| 88 | pArgs ('}':xs) = return (xs, []) | |
| 89 | pArgs s = bind (pFragments s) $ \case | |
| 90 | ('|':xs, cs) -> (cs:) `over` pArgs xs | |
| 91 | ('}':xs, cs) -> return (xs, [cs]) | |
| 92 | _ -> throw "[unreachable]" | |
| 93 | ||
| 94 | -- Parse any fragment, deciding whether to parse it as a tag or a text chunk | |
| 95 | pFragment :: Parse Fragment | |
| 96 | pFragment s@('\\':c:_) | |
| 97 | | isSpecial c = pText s | |
| 98 | pFragment ('\\':xs) = pTag xs | |
| 99 | pFragment s = pText s | |
| 100 | ||
| 101 | -- Parse multiple fragments, ending when it encounters a }, or |, or end-of-file. | |
| 102 | pFragments :: Parse Document | |
| 103 | pFragments "" = return ("", []) | |
| 104 | pFragments ('{':s) = bind (pFragments s) $ \case | |
| 105 | ('}':xs, cs) -> bind (pFragments xs) $ \(xs', cs') -> return (xs', cs ++ cs') | |
| 106 | (x:_, _) -> throw ("unexpected " ++ show x ++ "; expected '}'") | |
| 107 | ([], _) -> throw ("unexpected end-of-document while parsing block") | |
| 108 | pFragments s@(x:_) | |
| 109 | | x `elem` "}|" = return (s, []) | |
| 110 | | otherwise = | |
| 111 | bind (pFragment s) $ \case | |
| 112 | (s', c) -> (c:) `over` pFragments s' | |
| 113 | ||
| 114 | -- | Parse a string into a @TeLML@ 'Fragment'. | |
| 115 | parse :: String -> Either String Document | |
| 116 | parse str = case pFragments str of | |
| 117 | Right ("", r) -> return r | |
| 118 | Right ('}':_, _) -> throw ("Found unmatched '}' in document") | |
| 119 | Right (s, _) -> throw ("expected end of document but found " ++ show s) | |
| 120 | Left err -> throw err |
| 1 | {-# LANGUAGE DeriveDataTypeable #-} | |
| 2 | ||
| 3 | module Data.TeLML.Type (Document, Fragment(..)) where | |
| 4 | ||
| 5 | import Control.DeepSeq (NFData(..)) | |
| 6 | import Data.Data (Data) | |
| 7 | import Data.Typeable (Typeable) | |
| 8 | import Data.String (IsString(..)) | |
| 9 | ||
| 10 | -- | A 'Document' is zero or more 'Fragment's. | |
| 11 | type Document = [Fragment] | |
| 12 | ||
| 13 | -- | A 'Fragment' is either a snippet of text (as indicated by the | |
| 14 | -- 'Text' constructor) or a tag (as indicated by the 'Tag' | |
| 15 | -- constructor). The former is a raw string, and the latter consists | |
| 16 | -- of a name followed by zero or more 'Document's. | |
| 17 | data Fragment | |
| 18 | = Text String | |
| 19 | | Tag String [Document] | |
| 20 | deriving (Eq, Show, Typeable, Data) | |
| 21 | ||
| 22 | instance IsString Fragment where | |
| 23 | fromString = Text | |
| 24 | ||
| 25 | instance NFData Fragment where | |
| 26 | rnf (Text s) = rnf s | |
| 27 | rnf (Tag s l) = rnf s `seq` rnf l |
| 1 | Copyright (c) 2014, Getty Ritter | |
| 2 | ||
| 3 | All rights reserved. | |
| 4 | ||
| 5 | Redistribution and use in source and binary forms, with or without | |
| 6 | modification, are permitted provided that the following conditions are met: | |
| 7 | ||
| 8 | * Redistributions of source code must retain the above copyright | |
| 9 | notice, this list of conditions and the following disclaimer. | |
| 10 | ||
| 11 | * Redistributions in binary form must reproduce the above | |
| 12 | copyright notice, this list of conditions and the following | |
| 13 | disclaimer in the documentation and/or other materials provided | |
| 14 | with the distribution. | |
| 15 | ||
| 16 | * Neither the name of Getty Ritter nor the names of other | |
| 17 | contributors may be used to endorse or promote products derived | |
| 18 | from this software without specific prior written permission. | |
| 19 | ||
| 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 1 | # TeLML | |
| 2 | ||
| 3 | A Tex-Like Markup Language—which is a bit of a misnomer, because | |
| 4 | as presented, it's more of a structured data language optimized for | |
| 5 | text documents than an actual markup language. A markup-oriented | |
| 6 | use of this library can be found in the | |
| 7 | [`telml-markup`](https://github.com/aisamanra/telml-markup) | |
| 8 | package. | |
| 9 | ||
| 10 | A _TeLML Document_ is a sequence of textual | |
| 11 | fragments interspersed with "tags", which have the format | |
| 12 | ||
| 13 | ~~~~ | |
| 14 | \tagname{ document | document | ... } | |
| 15 | ~~~~ | |
| 16 | ||
| 17 | Whitespace is _not_ allowed between the backslash and the tag | |
| 18 | name, but _is_ allowed between the tag name and the following | |
| 19 | block. This means that the following is allowed: | |
| 20 | ||
| 21 | ~~~~ | |
| 22 | \tag | |
| 23 | { element_1 | |
| 24 | | element_2 | |
| 25 | | ... | |
| 26 | | element_n | |
| 27 | } | |
| 28 | ~~~~ | |
| 29 | ||
| 30 | In contrast to TeX, the block is _obligatory_, i.e. | |
| 31 | the following is **not** a valid document: | |
| 32 | ||
| 33 | ~~~~ | |
| 34 | \p{this \br and that} | |
| 35 | ~~~~ | |
| 36 | ||
| 37 | Whereas this is: | |
| 38 | ||
| 39 | ~~~~ | |
| 40 | \p{this \br{} and that} | |
| 41 | ~~~~ | |
| 42 | ||
| 43 | (This restriction might at some point be lifted based on experience | |
| 44 | with the format.) | |
| 45 | ||
| 46 | The intended use for TeLML is as a building-block for specific | |
| 47 | markup formats in which you might want to have arbitrary new | |
| 48 | tags, but don't want to use an XML-based solution. | |
| 49 | ||
| 50 | # Formal Grammar | |
| 51 | ||
| 52 | ~~~~ | |
| 53 | <document> ::= <fragment>* | |
| 54 | <fragment> ::= <tag> | <text> | "{" <document> "}" | |
| 55 | ||
| 56 | <text> ::= /([^\]|[\][\{}|])*/ | |
| 57 | ||
| 58 | <tag> ::= "\" <tagname> <spaces> "{" <arglist> "}" | |
| 59 | <tagname> ::= /[A-Za-z][A-Za-z0-9_-]*/ | |
| 60 | <arglist> ::= <document> ("|" <document>)* | |
| 61 | <spaces> ::= /[ \t\r\n]*/ | |
| 62 | ~~~~ | |
| 63 | ||
| 64 | # Possible Future Modifications | |
| 65 | ||
| 66 | Variations on this theme that might be possible: | |
| 67 | ||
| 68 | - It might (as stated above) be worthwhile to attempt to lift the | |
| 69 | restriction that every tag has an argument block, if nullary | |
| 70 | tags are commonly used. | |
| 71 | - It might be nice to optionally allow `\begin{...}` and `\end{...}` | |
| 72 | tags to be parsed as a special case of certain delimiters. | |
| 73 | - Maybe a special case for named arguments of the form | |
| 74 | `\tag{arg=stuff|arg=stuff}`, although this could be | |
| 75 | handled at present by using nested tags as in | |
| 76 | `\tag{\arg{stuff}|\arg{stuff}}`. |
| 1 | name: telml | |
| 2 | version: 0.1.0.0 | |
| 3 | synopsis: A lightweight TeX-like markup format. | |
| 4 | description: A lightweight TeX-like markup format. | |
| 5 | license: BSD3 | |
| 6 | license-file: LICENSE | |
| 7 | author: Getty Ritter | |
| 8 | maintainer: gdritter@galois.com | |
| 9 | copyright: ©2015 Getty Ritter | |
| 10 | category: Data | |
| 11 | build-type: Simple | |
| 12 | cabal-version: >=1.10 | |
| 13 | ||
| 14 | library | |
| 15 | exposed-modules: Data.TeLML | |
| 16 | ghc-options: -Wall | |
| 17 | other-modules: Data.TeLML.Parser, Data.TeLML.Type | |
| 18 | build-depends: base >=4.7 && <4.9, | |
| 19 | deepseq >=1.4 && <2 | |
| 20 | default-language: Haskell2010 |
| 1 | {-# LANGUAGE LambdaCase #-} | |
| 2 | ||
| 3 | module Data.TeLML.Markup where | |
| 4 | ||
| 5 | import Control.Monad (void) | |
| 6 | import Data.TeLML | |
| 7 | import Text.Blaze.Html | |
| 8 | import Text.Blaze.Html5 hiding (map, head, html) | |
| 9 | import Text.Blaze.Html5.Attributes hiding (name) | |
| 10 | ||
| 11 | -- | Render a TeLML document with an extra set of possible tags. | |
| 12 | renderWith :: [(String, Renderer)] -> Document -> Either String Html | |
| 13 | renderWith rs = | |
| 14 | fmap (void . sequence) . mapM (renderPara (basicTags ++ rs)) . gatherPara | |
| 15 | ||
| 16 | -- | Render a TeLML document with the default set of tags. | |
| 17 | render :: Document -> Either String Html | |
| 18 | render = renderWith [] | |
| 19 | ||
| 20 | -- This is a gross function, but I'm not sure how to decompose it any | |
| 21 | -- other way. It takes a Document---i.e. a set of Fragments---and | |
| 22 | -- splits it apart whenever it comes across double newlines. | |
| 23 | gatherPara :: Document -> [Document] | |
| 24 | gatherPara = reverse . map reverse . go [[]] | |
| 25 | where go rs [] = rs | |
| 26 | go (r:rs) (t@Tag {}:ts) = go ((t:r):rs) ts | |
| 27 | go (r:rs) (Text s:ts) = case splitString s of | |
| 28 | [] -> go (r:rs) ts | |
| 29 | [x] -> go ((Text x:r):rs) ts | |
| 30 | xs -> go (map ((:[]) . Text) (tail xs) ++ | |
| 31 | ((Text (head xs):r) : rs)) ts | |
| 32 | go _ _ = error "[unreachable]" | |
| 33 | ||
| 34 | -- Split a string at double-newlines. | |
| 35 | splitString :: String -> [String] | |
| 36 | splitString = filter (/= "") . go | |
| 37 | where go ('\n':'\n':xs) = "\n":go xs | |
| 38 | go (x:xs) = let r:rs = go xs in ((x:r):rs) | |
| 39 | go "" = [""] | |
| 40 | ||
| 41 | -- This is just to make type signatures shorter | |
| 42 | type HtmlE = Either String Html | |
| 43 | ||
| 44 | type Renderer = (Fragment -> HtmlE, [Document]) -> HtmlE | |
| 45 | ||
| 46 | -- The built-in set of tags (subject to change) | |
| 47 | basicTags :: [(String, Renderer)] | |
| 48 | basicTags = | |
| 49 | [ ("em" | |
| 50 | , \case (f,[rs]) -> fmap (em . sequence_) (mapM f rs) | |
| 51 | _ -> Left "wrong arity for em/1" | |
| 52 | ) | |
| 53 | , ("strong" | |
| 54 | , \case (f,[rs]) -> fmap (strong . sequence_) (mapM f rs) | |
| 55 | _ -> Left "wrong arity for strong/1" | |
| 56 | ) | |
| 57 | , ("code" | |
| 58 | , \case (f,[rs]) -> fmap (code . sequence_) (mapM f rs) | |
| 59 | _ -> Left "wrong arity for code/1" | |
| 60 | ) | |
| 61 | , ("link" | |
| 62 | , \case (f,[[Text l],r]) -> let go h = a ! href (stringValue l) $ h | |
| 63 | in fmap (go . sequence_) (mapM f r) | |
| 64 | (_,[_,_]) -> Left "link target should be string" | |
| 65 | _ -> Left "wrong arity for link/1" | |
| 66 | ) | |
| 67 | ] | |
| 68 | ||
| 69 | -- render a single paragraph | |
| 70 | renderPara :: [(String, Renderer)] -> Document -> Either String Html | |
| 71 | renderPara taglist ds = fmap (p . sequence_) (mapM go ds) | |
| 72 | where go (Text ts) = Right (toMarkup ts) | |
| 73 | go (Tag tx rs) = exec tx rs taglist | |
| 74 | exec name args ((tag, func):tags) | |
| 75 | | name == tag = case func (go, args) of | |
| 76 | Right html -> Right html | |
| 77 | Left {} -> exec name args tags | |
| 78 | exec name args (_:tags) = exec name args tags | |
| 79 | exec name args [] = Left $ | |
| 80 | "Error: no match for tag " ++ name ++ "/" ++ show (length args) |
| 1 | Copyright (c) 2015, Getty Ritter | |
| 2 | ||
| 3 | All rights reserved. | |
| 4 | ||
| 5 | Redistribution and use in source and binary forms, with or without | |
| 6 | modification, are permitted provided that the following conditions are met: | |
| 7 | ||
| 8 | * Redistributions of source code must retain the above copyright | |
| 9 | notice, this list of conditions and the following disclaimer. | |
| 10 | ||
| 11 | * Redistributions in binary form must reproduce the above | |
| 12 | copyright notice, this list of conditions and the following | |
| 13 | disclaimer in the documentation and/or other materials provided | |
| 14 | with the distribution. | |
| 15 | ||
| 16 | * Neither the name of Getty Ritter nor the names of other | |
| 17 | contributors may be used to endorse or promote products derived | |
| 18 | from this software without specific prior written permission. | |
| 19 | ||
| 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 1 | # telml-markup | |
| 2 | ||
| 3 | The `telml-markup` package provides a simple, LaTeX-inspired markup | |
| 4 | language with the possibility of writing extensible instructions. | |
| 5 | ||
| 6 | **This is in early phases, and so should not be relied upon for any | |
| 7 | kind of stability at this stage.** In particular, the set of tags | |
| 8 | and how they are understood could change radically during development. | |
| 9 | ||
| 10 | ## Sample Document | |
| 11 | ||
| 12 | ~~~~ | |
| 13 | \code{telml-markup} uses a mostly HTML-like set of names, so we create | |
| 14 | \em{emphasis} with \code{\\em}, \strong{bolding} with \em{\\strong}, | |
| 15 | and \code{fixed-width text} with \code{\\code}. | |
| 16 | ||
| 17 | One major difference is the \code{\\link} tag, which takes the form | |
| 18 | \code{\\link\{url\|text\}}, | |
| 19 | \link{https://github.com/aisamanra/telml-markup|like this}. | |
| 20 | ~~~~ | |
| 21 | ||
| 22 | This produces the following rendered HTML: | |
| 23 | ||
| 24 | > <p><code>telml-markup</code> uses an HTML-like set of names, so we create | |
| 25 | > <em>emphasis</em> with <code>\em</code>, <strong>bolding</strong> with <code>\strong</code>, | |
| 26 | > and <code>fixed-width text</code> with <code>\code</code>. | |
| 27 | > </p><p>One major difference is the <code>\link</code> tag, which takes the form | |
| 28 | > <code>\link{url|text}</code>, | |
| 29 | > <a href="https://github.com/aisamanra/telml-markup">like this</a>. | |
| 30 | > </p> | |
| 31 | ||
| 32 | ## Basic Usage | |
| 33 | ||
| 34 | The `render` function takes a [`telml`](https://github.com/aisamanra/telml) | |
| 35 | document and renders it into the | |
| 36 | [`blaze-html`](http://hackage.haskell.org/package/blaze-html-0.8.0.2) | |
| 37 | [`Html`](http://hackage.haskell.org/package/blaze-html-0.8.0.2/docs/Text-Blaze-Html.html#t:Html) | |
| 38 | type, which can then be rendered into HTML. For example, the following | |
| 39 | is a minimal program which interprets input on `stdin` and prints the | |
| 40 | rendered HTML (or an error message) to `stdout`. | |
| 41 | ||
| 42 | ~~~~{.haskell} | |
| 43 | module Main | |
| 44 | ||
| 45 | import Control.Monad ((>=>)) | |
| 46 | import Data.TeLML (parse) | |
| 47 | import Data.TeLML.Markup (render) | |
| 48 | import System.Exit (exitFailure) | |
| 49 | import Text.Blaze.Renderer.String (renderMarkup) | |
| 50 | ||
| 51 | main :: IO () | |
| 52 | main = do | |
| 53 | str <- getContents | |
| 54 | case (parse >=> render) str of | |
| 55 | Left err -> putStrLn err >> exitFailure | |
| 56 | Right doc -> putStrLn (renderMarkup doc) | |
| 57 | ~~~~ | |
| 58 | ||
| 59 | We could invoke it at the command line like so: | |
| 60 | ||
| 61 | ~~~~ | |
| 62 | $ ./telml-markup-test <<EOF | |
| 63 | > This should be \em{emphasized}. | |
| 64 | > | |
| 65 | > This, on the other hand, is \strong{bold}. | |
| 66 | > EOF | |
| 67 | <p>This should be<em>emphasized</em>. | |
| 68 | </p><p>This, on the other hand, is <strong>bold</strong>. | |
| 69 | </p> | |
| 70 | ~~~~ | |
| 71 | ||
| 72 | If we give it an unknown tag, or a tag with the wrong arity, it will | |
| 73 | give us an error: | |
| 74 | ||
| 75 | ~~~~ | |
| 76 | $ ./telml-markup-test <<EOF | |
| 77 | > This is a \fake{tag}. | |
| 78 | > EOF | |
| 79 | Error: no match for tag fake/1 | |
| 80 | $ ./telml-markup-test <<EOF | |
| 81 | > This is a tag with \em{too|many|arguments}. | |
| 82 | > EOF | |
| 83 | Error: no match for tag em/3 | |
| 84 | ~~~~ | |
| 85 | ||
| 86 | ## Extended Usage | |
| 87 | ||
| 88 | The `renderWith` function takes a list of additional tags and their | |
| 89 | denotations (in the form of functions from `telml` fragments to | |
| 90 | `blaze-html` fragments.) This allows you to add new tags to the | |
| 91 | markup for particular purposes. | |
| 92 | ||
| 93 | For example, here we add a tag so that `\hello{...}` will render out to | |
| 94 | the HTML string `<strong>Hello, ...!</strong>`: | |
| 95 | ||
| 96 | ~~~~{.haskell} | |
| 97 | module Main where | |
| 98 | ||
| 99 | import Control.Monad ((>=>)) | |
| 100 | import Data.TeLML (parse) | |
| 101 | import Data.TeLML.Markup (Renderer, renderWith) | |
| 102 | import System.Exit (exitFailure) | |
| 103 | import Text.Blaze.Html5 (strong, toMarkup) | |
| 104 | import Text.Blaze.Renderer.String (renderMarkup) | |
| 105 | ||
| 106 | myTags :: [(String, Renderer)] | |
| 107 | myTags = | |
| 108 | [ ("hello", \ c -> case c of | |
| 109 | (render, [name]) -> do | |
| 110 | rName <- mapM render name | |
| 111 | return $ strong $ do | |
| 112 | toMarkup "Hello, " | |
| 113 | sequence_ rName | |
| 114 | toMarkup "!" | |
| 115 | (_, args) -> Left ("Did not match hello/" ++ show (length args)) | |
| 116 | ) | |
| 117 | ] | |
| 118 | ||
| 119 | main :: IO () | |
| 120 | main = do | |
| 121 | str <- getContents | |
| 122 | case (parse >=> renderWith myTags) str of | |
| 123 | Left err -> putStrLn err >> exitFailure | |
| 124 | Right doc -> putStrLn (renderMarkup doc) | |
| 125 | ~~~~ | |
| 126 | ||
| 127 | We can execute this to test it: | |
| 128 | ||
| 129 | ~~~~ | |
| 130 | $ ./telml-markup-extended-test <<EOF | |
| 131 | > Now we can do this: \hello{friend}. | |
| 132 | > EOF | |
| 133 | <p>Now we can do this: <strong>Hello, friend!</strong>. | |
| 134 | </p> | |
| 135 | ~~~~ |
| 1 | Name: telml-markup | |
| 2 | Version: 0.0.0 | |
| 3 | Author: Getty Ritter <gettylefou@gmail.com> | |
| 4 | Maintainer: Getty Ritter <gettylefou@gmail.com> | |
| 5 | License: BSD3 | |
| 6 | License-File: LICENSE | |
| 7 | -- Synopsis: | |
| 8 | -- Description: | |
| 9 | Cabal-Version: >= 1.10 | |
| 10 | Build-Type: Simple | |
| 11 | Extra-Source-Files: README.md, ChangeLog.md | |
| 12 | ||
| 13 | Library | |
| 14 | Default-Language: Haskell2010 | |
| 15 | GHC-Options: -Wall | |
| 16 | Exposed-Modules: Data.TeLML.Markup | |
| 17 | Build-Depends: base >= 4 && < 5, telml, blaze-markup, blaze-html, mtl | |
| 18 | ||
| 19 | -- Test-Suite spec | |
| 20 | -- Type: exitcode-stdio-1.0 | |
| 21 | -- Default-Language: Haskell2010 | |
| 22 | -- Hs-Source-Dirs: test | |
| 23 | -- Ghc-Options: -Wall | |
| 24 | -- Main-Is: Spec.hs | |
| 25 | -- Build-Depends: base | |
| 26 | -- , hspec | |
| 27 | ||
| 28 | Source-Repository head | |
| 29 | Type: git | |
| 30 | -- Location: |
| 1 | name: telml | |
| 2 | version: 0.1.0.0 | |
| 3 | synopsis: A lightweight TeX-like markup format. | |
| 4 | description: A lightweight TeX-like markup format. | |
| 5 | license: BSD3 | |
| 6 | license-file: LICENSE | |
| 7 | author: Getty Ritter | |
| 8 | maintainer: gdritter@galois.com | |
| 9 | copyright: ©2015 Getty Ritter | |
| 10 | category: Data | |
| 11 | build-type: Simple | |
| 12 | cabal-version: >=1.10 | |
| 13 | ||
| 14 | library | |
| 15 | exposed-modules: Data.TeLML | |
| 16 | ghc-options: -Wall | |
| 17 | other-modules: Data.TeLML.Parser, Data.TeLML.Type | |
| 18 | build-depends: base >=4.7 && <4.9, | |
| 19 | deepseq >=1.4 && <2 | |
| 20 | default-language: Haskell2010 |