Major repo change: moved telml-markup into this repo and started using cabal.project format
Getty Ritter
8 years ago
1 | {-# LANGUAGE LambdaCase #-} | |
2 | ||
3 | module Data.TeLML.Parser (Fragment(..), Document, parse) where | |
4 | ||
5 | import Data.Char (isAlpha, isAlphaNum, isSpace) | |
6 | import Data.TeLML.Type | |
7 | ||
8 | type Result a = Either String (String, a) | |
9 | type Parse a = String -> Result a | |
10 | ||
11 | -- All of these characters are the ones which need escaping if used | |
12 | -- inside a document. | |
13 | isSpecial :: Char -> Bool | |
14 | isSpecial c = c `elem` "\\{}|" | |
15 | ||
16 | -- This is here for, uh, aesthetic reasons. | |
17 | throw :: a -> Either a b | |
18 | throw = Left | |
19 | ||
20 | -- This is 'fmap' named in such a way that it does not conflict with | |
21 | -- 'fmap'. | |
22 | over :: (a -> b) -> Result a -> Result b | |
23 | over _ (Left err) = Left err | |
24 | over f (Right (s, x)) = Right (s, f x) | |
25 | ||
26 | {- And this is a monadic bind. You'll note that this basically has the | |
27 | - same type as almost any other monadic parser combinator library, so | |
28 | - you might be wondering why I'm not just using parsec. On the other | |
29 | - hand, this is literally the only function I actually need there, so | |
30 | - why have another dependency I need to keep up-to-date when I could | |
31 | - just define this trivial function here? | |
32 | - | |
33 | - I'm also not defining this for `Monad` itself because then I'd have | |
34 | - to deal with newtype wrappers and defining methods for the whole | |
35 | - Functor=>Applicative=>Monad hierarchy, when all I really need is | |
36 | - this. | |
37 | -} | |
38 | bind :: Result a -> ((String, a) -> Result b) -> Result b | |
39 | bind (Left err) _ = Left err | |
40 | bind (Right a) f = f a | |
41 | ||
42 | -- Parse a text fragment, handling escapes. This will end as soon as it | |
43 | -- sees any non-escaped special character. | |
44 | pText :: Parse Fragment | |
45 | pText = over Text . go | |
46 | where go ('\\':x:xs) | |
47 | | isSpecial x = (x:) `over` go xs | |
48 | go i@(x:xs) | |
49 | | isSpecial x = return (i, "") | |
50 | | otherwise = (x:) `over` go xs | |
51 | go "" = return ("", "") | |
52 | ||
53 | -- Parse a tag name of length >= 0. | |
54 | pTagName :: Parse String | |
55 | pTagName s = go s `bind` ensureName | |
56 | where go i@(x:xs) | |
57 | | isAlphaNum x = (x:) `over` go xs | |
58 | | elem x "-_" = (x:) `over` go xs | |
59 | | otherwise = return (i, "") | |
60 | go [] = throw "unexpected end-of-document while parsing tag" | |
61 | ensureName (xs, name) | |
62 | | length name == 0 = | |
63 | throw "expected tag name after `\\'" | |
64 | | not (isAlpha (head name)) = | |
65 | throw "tag names must begin with an alphabetic character" | |
66 | | otherwise = return (xs, name) | |
67 | ||
68 | -- Skip any space charaters, returning () for the first non-space | |
69 | -- character (including EOF). | |
70 | skipSpace :: Parse () | |
71 | skipSpace i@(x:xs) | |
72 | | isSpace x = skipSpace xs | |
73 | | otherwise = return (i, ()) | |
74 | skipSpace _ = return ("", ()) | |
75 | ||
76 | -- Parse a tag assuming that a backslash has already been encountered. | |
77 | pTag :: Parse Fragment | |
78 | pTag i = | |
79 | bind (pTagName i) $ \ (i', name) -> | |
80 | bind (skipSpace i') $ \case | |
81 | ('{':i'', ()) -> Tag name `over` pArgs i'' | |
82 | ("",_) -> throw "unexpected end-of-document while parsing tag" | |
83 | _ -> throw "expected start of block" | |
84 | ||
85 | -- Parse the vertical-bar-separated arguments to a tag, ending when a | |
86 | -- right curly brace is encountered. | |
87 | pArgs :: Parse [Document] | |
88 | pArgs ('}':xs) = return (xs, []) | |
89 | pArgs s = bind (pFragments s) $ \case | |
90 | ('|':xs, cs) -> (cs:) `over` pArgs xs | |
91 | ('}':xs, cs) -> return (xs, [cs]) | |
92 | _ -> throw "[unreachable]" | |
93 | ||
94 | -- Parse any fragment, deciding whether to parse it as a tag or a text chunk | |
95 | pFragment :: Parse Fragment | |
96 | pFragment s@('\\':c:_) | |
97 | | isSpecial c = pText s | |
98 | pFragment ('\\':xs) = pTag xs | |
99 | pFragment s = pText s | |
100 | ||
101 | -- Parse multiple fragments, ending when it encounters a }, or |, or end-of-file. | |
102 | pFragments :: Parse Document | |
103 | pFragments "" = return ("", []) | |
104 | pFragments ('{':s) = bind (pFragments s) $ \case | |
105 | ('}':xs, cs) -> bind (pFragments xs) $ \(xs', cs') -> return (xs', cs ++ cs') | |
106 | (x:_, _) -> throw ("unexpected " ++ show x ++ "; expected '}'") | |
107 | ([], _) -> throw ("unexpected end-of-document while parsing block") | |
108 | pFragments s@(x:_) | |
109 | | x `elem` "}|" = return (s, []) | |
110 | | otherwise = | |
111 | bind (pFragment s) $ \case | |
112 | (s', c) -> (c:) `over` pFragments s' | |
113 | ||
114 | -- | Parse a string into a @TeLML@ 'Fragment'. | |
115 | parse :: String -> Either String Document | |
116 | parse str = case pFragments str of | |
117 | Right ("", r) -> return r | |
118 | Right ('}':_, _) -> throw ("Found unmatched '}' in document") | |
119 | Right (s, _) -> throw ("expected end of document but found " ++ show s) | |
120 | Left err -> throw err |
1 | {-# LANGUAGE DeriveDataTypeable #-} | |
2 | ||
3 | module Data.TeLML.Type (Document, Fragment(..)) where | |
4 | ||
5 | import Control.DeepSeq (NFData(..)) | |
6 | import Data.Data (Data) | |
7 | import Data.Typeable (Typeable) | |
8 | import Data.String (IsString(..)) | |
9 | ||
10 | -- | A 'Document' is zero or more 'Fragment's. | |
11 | type Document = [Fragment] | |
12 | ||
13 | -- | A 'Fragment' is either a snippet of text (as indicated by the | |
14 | -- 'Text' constructor) or a tag (as indicated by the 'Tag' | |
15 | -- constructor). The former is a raw string, and the latter consists | |
16 | -- of a name followed by zero or more 'Document's. | |
17 | data Fragment | |
18 | = Text String | |
19 | | Tag String [Document] | |
20 | deriving (Eq, Show, Typeable, Data) | |
21 | ||
22 | instance IsString Fragment where | |
23 | fromString = Text | |
24 | ||
25 | instance NFData Fragment where | |
26 | rnf (Text s) = rnf s | |
27 | rnf (Tag s l) = rnf s `seq` rnf l |
1 | Copyright (c) 2014, Getty Ritter | |
2 | ||
3 | All rights reserved. | |
4 | ||
5 | Redistribution and use in source and binary forms, with or without | |
6 | modification, are permitted provided that the following conditions are met: | |
7 | ||
8 | * Redistributions of source code must retain the above copyright | |
9 | notice, this list of conditions and the following disclaimer. | |
10 | ||
11 | * Redistributions in binary form must reproduce the above | |
12 | copyright notice, this list of conditions and the following | |
13 | disclaimer in the documentation and/or other materials provided | |
14 | with the distribution. | |
15 | ||
16 | * Neither the name of Getty Ritter nor the names of other | |
17 | contributors may be used to endorse or promote products derived | |
18 | from this software without specific prior written permission. | |
19 | ||
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
1 |
# |
|
1 | # telml | |
2 | 2 | |
3 | A Tex-Like Markup Language—which is a bit of a misnomer, because | |
4 | as presented, it's more of a structured data language optimized for | |
5 | text documents than an actual markup language. A markup-oriented | |
6 | use of this library can be found in the | |
3 | This is the repository for both the `telml` and the `telml-markup` | |
4 | packages. The former implements a TeX-inspired data format, while | |
5 | the latter uses that data format to implement a lightweight, | |
6 | extensible markup format. | |
7 | ||
8 | This repository contains a | |
9 | [cabal.project file](http://blog.ezyang.com/2016/05/announcing-cabal-new-build-nix-style-local-builds/), which facilitates building multiple packages together; | |
10 | this repository had previously been used for just the `telml` package, | |
11 | and the corresponding | |
7 | 12 | [`telml-markup`](https://github.com/aisamanra/telml-markup) |
8 | package. | |
9 | ||
10 | A _TeLML Document_ is a sequence of textual | |
11 | fragments interspersed with "tags", which have the format | |
12 | ||
13 | ~~~~ | |
14 | \tagname{ document | document | ... } | |
15 | ~~~~ | |
16 | ||
17 | Whitespace is _not_ allowed between the backslash and the tag | |
18 | name, but _is_ allowed between the tag name and the following | |
19 | block. This means that the following is allowed: | |
20 | ||
21 | ~~~~ | |
22 | \tag | |
23 | { element_1 | |
24 | | element_2 | |
25 | | ... | |
26 | | element_n | |
27 | } | |
28 | ~~~~ | |
29 | ||
30 | In contrast to TeX, the block is _obligatory_, i.e. | |
31 | the following is **not** a valid document: | |
32 | ||
33 | ~~~~ | |
34 | \p{this \br and that} | |
35 | ~~~~ | |
36 | ||
37 | Whereas this is: | |
38 | ||
39 | ~~~~ | |
40 | \p{this \br{} and that} | |
41 | ~~~~ | |
42 | ||
43 | (This restriction might at some point be lifted based on experience | |
44 | with the format.) | |
45 | ||
46 | The intended use for TeLML is as a building-block for specific | |
47 | markup formats in which you might want to have arbitrary new | |
48 | tags, but don't want to use an XML-based solution. | |
49 | ||
50 | # Formal Grammar | |
51 | ||
52 | ~~~~ | |
53 | <document> ::= <fragment>* | |
54 | <fragment> ::= <tag> | <text> | "{" <document> "}" | |
55 | ||
56 | <text> ::= /([^\]|[\][\{}|])*/ | |
57 | ||
58 | <tag> ::= "\" <tagname> <spaces> "{" <arglist> "}" | |
59 | <tagname> ::= /[A-Za-z][A-Za-z0-9_-]*/ | |
60 | <arglist> ::= <document> ("|" <document>)* | |
61 | <spaces> ::= /[ \t\r\n]*/ | |
62 | ~~~~ | |
63 | ||
64 | # Possible Future Modifications | |
65 | ||
66 | Variations on this theme that might be possible: | |
67 | ||
68 | - It might (as stated above) be worthwhile to attempt to lift the | |
69 | restriction that every tag has an argument block, if nullary | |
70 | tags are commonly used. | |
71 | - It might be nice to optionally allow `\begin{...}` and `\end{...}` | |
72 | tags to be parsed as a special case of certain delimiters. | |
73 | - Maybe a special case for named arguments of the form | |
74 | `\tag{arg=stuff|arg=stuff}`, although this could be | |
75 | handled at present by using nested tags as in | |
76 |
|
|
13 | repository is left but will not be updated. |
1 | packages: telml/telml.cabal, telml-markup/telml-markup.cabal |
1 | {-# LANGUAGE LambdaCase #-} | |
2 | ||
3 | module Data.TeLML.Parser (Fragment(..), Document, parse) where | |
4 | ||
5 | import Data.Char (isAlpha, isAlphaNum, isSpace) | |
6 | import Data.TeLML.Type | |
7 | ||
8 | type Result a = Either String (String, a) | |
9 | type Parse a = String -> Result a | |
10 | ||
11 | -- All of these characters are the ones which need escaping if used | |
12 | -- inside a document. | |
13 | isSpecial :: Char -> Bool | |
14 | isSpecial c = c `elem` "\\{}|" | |
15 | ||
16 | -- This is here for, uh, aesthetic reasons. | |
17 | throw :: a -> Either a b | |
18 | throw = Left | |
19 | ||
20 | -- This is 'fmap' named in such a way that it does not conflict with | |
21 | -- 'fmap'. | |
22 | over :: (a -> b) -> Result a -> Result b | |
23 | over _ (Left err) = Left err | |
24 | over f (Right (s, x)) = Right (s, f x) | |
25 | ||
26 | {- And this is a monadic bind. You'll note that this basically has the | |
27 | - same type as almost any other monadic parser combinator library, so | |
28 | - you might be wondering why I'm not just using parsec. On the other | |
29 | - hand, this is literally the only function I actually need there, so | |
30 | - why have another dependency I need to keep up-to-date when I could | |
31 | - just define this trivial function here? | |
32 | - | |
33 | - I'm also not defining this for `Monad` itself because then I'd have | |
34 | - to deal with newtype wrappers and defining methods for the whole | |
35 | - Functor=>Applicative=>Monad hierarchy, when all I really need is | |
36 | - this. | |
37 | -} | |
38 | bind :: Result a -> ((String, a) -> Result b) -> Result b | |
39 | bind (Left err) _ = Left err | |
40 | bind (Right a) f = f a | |
41 | ||
42 | -- Parse a text fragment, handling escapes. This will end as soon as it | |
43 | -- sees any non-escaped special character. | |
44 | pText :: Parse Fragment | |
45 | pText = over Text . go | |
46 | where go ('\\':x:xs) | |
47 | | isSpecial x = (x:) `over` go xs | |
48 | go i@(x:xs) | |
49 | | isSpecial x = return (i, "") | |
50 | | otherwise = (x:) `over` go xs | |
51 | go "" = return ("", "") | |
52 | ||
53 | -- Parse a tag name of length >= 0. | |
54 | pTagName :: Parse String | |
55 | pTagName s = go s `bind` ensureName | |
56 | where go i@(x:xs) | |
57 | | isAlphaNum x = (x:) `over` go xs | |
58 | | elem x "-_" = (x:) `over` go xs | |
59 | | otherwise = return (i, "") | |
60 | go [] = throw "unexpected end-of-document while parsing tag" | |
61 | ensureName (xs, name) | |
62 | | length name == 0 = | |
63 | throw "expected tag name after `\\'" | |
64 | | not (isAlpha (head name)) = | |
65 | throw "tag names must begin with an alphabetic character" | |
66 | | otherwise = return (xs, name) | |
67 | ||
68 | -- Skip any space charaters, returning () for the first non-space | |
69 | -- character (including EOF). | |
70 | skipSpace :: Parse () | |
71 | skipSpace i@(x:xs) | |
72 | | isSpace x = skipSpace xs | |
73 | | otherwise = return (i, ()) | |
74 | skipSpace _ = return ("", ()) | |
75 | ||
76 | -- Parse a tag assuming that a backslash has already been encountered. | |
77 | pTag :: Parse Fragment | |
78 | pTag i = | |
79 | bind (pTagName i) $ \ (i', name) -> | |
80 | bind (skipSpace i') $ \case | |
81 | ('{':i'', ()) -> Tag name `over` pArgs i'' | |
82 | ("",_) -> throw "unexpected end-of-document while parsing tag" | |
83 | _ -> throw "expected start of block" | |
84 | ||
85 | -- Parse the vertical-bar-separated arguments to a tag, ending when a | |
86 | -- right curly brace is encountered. | |
87 | pArgs :: Parse [Document] | |
88 | pArgs ('}':xs) = return (xs, []) | |
89 | pArgs s = bind (pFragments s) $ \case | |
90 | ('|':xs, cs) -> (cs:) `over` pArgs xs | |
91 | ('}':xs, cs) -> return (xs, [cs]) | |
92 | _ -> throw "[unreachable]" | |
93 | ||
94 | -- Parse any fragment, deciding whether to parse it as a tag or a text chunk | |
95 | pFragment :: Parse Fragment | |
96 | pFragment s@('\\':c:_) | |
97 | | isSpecial c = pText s | |
98 | pFragment ('\\':xs) = pTag xs | |
99 | pFragment s = pText s | |
100 | ||
101 | -- Parse multiple fragments, ending when it encounters a }, or |, or end-of-file. | |
102 | pFragments :: Parse Document | |
103 | pFragments "" = return ("", []) | |
104 | pFragments ('{':s) = bind (pFragments s) $ \case | |
105 | ('}':xs, cs) -> bind (pFragments xs) $ \(xs', cs') -> return (xs', cs ++ cs') | |
106 | (x:_, _) -> throw ("unexpected " ++ show x ++ "; expected '}'") | |
107 | ([], _) -> throw ("unexpected end-of-document while parsing block") | |
108 | pFragments s@(x:_) | |
109 | | x `elem` "}|" = return (s, []) | |
110 | | otherwise = | |
111 | bind (pFragment s) $ \case | |
112 | (s', c) -> (c:) `over` pFragments s' | |
113 | ||
114 | -- | Parse a string into a @TeLML@ 'Fragment'. | |
115 | parse :: String -> Either String Document | |
116 | parse str = case pFragments str of | |
117 | Right ("", r) -> return r | |
118 | Right ('}':_, _) -> throw ("Found unmatched '}' in document") | |
119 | Right (s, _) -> throw ("expected end of document but found " ++ show s) | |
120 | Left err -> throw err |
1 | {-# LANGUAGE DeriveDataTypeable #-} | |
2 | ||
3 | module Data.TeLML.Type (Document, Fragment(..)) where | |
4 | ||
5 | import Control.DeepSeq (NFData(..)) | |
6 | import Data.Data (Data) | |
7 | import Data.Typeable (Typeable) | |
8 | import Data.String (IsString(..)) | |
9 | ||
10 | -- | A 'Document' is zero or more 'Fragment's. | |
11 | type Document = [Fragment] | |
12 | ||
13 | -- | A 'Fragment' is either a snippet of text (as indicated by the | |
14 | -- 'Text' constructor) or a tag (as indicated by the 'Tag' | |
15 | -- constructor). The former is a raw string, and the latter consists | |
16 | -- of a name followed by zero or more 'Document's. | |
17 | data Fragment | |
18 | = Text String | |
19 | | Tag String [Document] | |
20 | deriving (Eq, Show, Typeable, Data) | |
21 | ||
22 | instance IsString Fragment where | |
23 | fromString = Text | |
24 | ||
25 | instance NFData Fragment where | |
26 | rnf (Text s) = rnf s | |
27 | rnf (Tag s l) = rnf s `seq` rnf l |
1 | Copyright (c) 2014, Getty Ritter | |
2 | ||
3 | All rights reserved. | |
4 | ||
5 | Redistribution and use in source and binary forms, with or without | |
6 | modification, are permitted provided that the following conditions are met: | |
7 | ||
8 | * Redistributions of source code must retain the above copyright | |
9 | notice, this list of conditions and the following disclaimer. | |
10 | ||
11 | * Redistributions in binary form must reproduce the above | |
12 | copyright notice, this list of conditions and the following | |
13 | disclaimer in the documentation and/or other materials provided | |
14 | with the distribution. | |
15 | ||
16 | * Neither the name of Getty Ritter nor the names of other | |
17 | contributors may be used to endorse or promote products derived | |
18 | from this software without specific prior written permission. | |
19 | ||
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
1 | # TeLML | |
2 | ||
3 | A Tex-Like Markup Language—which is a bit of a misnomer, because | |
4 | as presented, it's more of a structured data language optimized for | |
5 | text documents than an actual markup language. A markup-oriented | |
6 | use of this library can be found in the | |
7 | [`telml-markup`](https://github.com/aisamanra/telml-markup) | |
8 | package. | |
9 | ||
10 | A _TeLML Document_ is a sequence of textual | |
11 | fragments interspersed with "tags", which have the format | |
12 | ||
13 | ~~~~ | |
14 | \tagname{ document | document | ... } | |
15 | ~~~~ | |
16 | ||
17 | Whitespace is _not_ allowed between the backslash and the tag | |
18 | name, but _is_ allowed between the tag name and the following | |
19 | block. This means that the following is allowed: | |
20 | ||
21 | ~~~~ | |
22 | \tag | |
23 | { element_1 | |
24 | | element_2 | |
25 | | ... | |
26 | | element_n | |
27 | } | |
28 | ~~~~ | |
29 | ||
30 | In contrast to TeX, the block is _obligatory_, i.e. | |
31 | the following is **not** a valid document: | |
32 | ||
33 | ~~~~ | |
34 | \p{this \br and that} | |
35 | ~~~~ | |
36 | ||
37 | Whereas this is: | |
38 | ||
39 | ~~~~ | |
40 | \p{this \br{} and that} | |
41 | ~~~~ | |
42 | ||
43 | (This restriction might at some point be lifted based on experience | |
44 | with the format.) | |
45 | ||
46 | The intended use for TeLML is as a building-block for specific | |
47 | markup formats in which you might want to have arbitrary new | |
48 | tags, but don't want to use an XML-based solution. | |
49 | ||
50 | # Formal Grammar | |
51 | ||
52 | ~~~~ | |
53 | <document> ::= <fragment>* | |
54 | <fragment> ::= <tag> | <text> | "{" <document> "}" | |
55 | ||
56 | <text> ::= /([^\]|[\][\{}|])*/ | |
57 | ||
58 | <tag> ::= "\" <tagname> <spaces> "{" <arglist> "}" | |
59 | <tagname> ::= /[A-Za-z][A-Za-z0-9_-]*/ | |
60 | <arglist> ::= <document> ("|" <document>)* | |
61 | <spaces> ::= /[ \t\r\n]*/ | |
62 | ~~~~ | |
63 | ||
64 | # Possible Future Modifications | |
65 | ||
66 | Variations on this theme that might be possible: | |
67 | ||
68 | - It might (as stated above) be worthwhile to attempt to lift the | |
69 | restriction that every tag has an argument block, if nullary | |
70 | tags are commonly used. | |
71 | - It might be nice to optionally allow `\begin{...}` and `\end{...}` | |
72 | tags to be parsed as a special case of certain delimiters. | |
73 | - Maybe a special case for named arguments of the form | |
74 | `\tag{arg=stuff|arg=stuff}`, although this could be | |
75 | handled at present by using nested tags as in | |
76 | `\tag{\arg{stuff}|\arg{stuff}}`. |
1 | name: telml | |
2 | version: 0.1.0.0 | |
3 | synopsis: A lightweight TeX-like markup format. | |
4 | description: A lightweight TeX-like markup format. | |
5 | license: BSD3 | |
6 | license-file: LICENSE | |
7 | author: Getty Ritter | |
8 | maintainer: gdritter@galois.com | |
9 | copyright: ©2015 Getty Ritter | |
10 | category: Data | |
11 | build-type: Simple | |
12 | cabal-version: >=1.10 | |
13 | ||
14 | library | |
15 | exposed-modules: Data.TeLML | |
16 | ghc-options: -Wall | |
17 | other-modules: Data.TeLML.Parser, Data.TeLML.Type | |
18 | build-depends: base >=4.7 && <4.9, | |
19 | deepseq >=1.4 && <2 | |
20 | default-language: Haskell2010 |
1 | {-# LANGUAGE LambdaCase #-} | |
2 | ||
3 | module Data.TeLML.Markup where | |
4 | ||
5 | import Control.Monad (void) | |
6 | import Data.TeLML | |
7 | import Text.Blaze.Html | |
8 | import Text.Blaze.Html5 hiding (map, head, html) | |
9 | import Text.Blaze.Html5.Attributes hiding (name) | |
10 | ||
11 | -- | Render a TeLML document with an extra set of possible tags. | |
12 | renderWith :: [(String, Renderer)] -> Document -> Either String Html | |
13 | renderWith rs = | |
14 | fmap (void . sequence) . mapM (renderPara (basicTags ++ rs)) . gatherPara | |
15 | ||
16 | -- | Render a TeLML document with the default set of tags. | |
17 | render :: Document -> Either String Html | |
18 | render = renderWith [] | |
19 | ||
20 | -- This is a gross function, but I'm not sure how to decompose it any | |
21 | -- other way. It takes a Document---i.e. a set of Fragments---and | |
22 | -- splits it apart whenever it comes across double newlines. | |
23 | gatherPara :: Document -> [Document] | |
24 | gatherPara = reverse . map reverse . go [[]] | |
25 | where go rs [] = rs | |
26 | go (r:rs) (t@Tag {}:ts) = go ((t:r):rs) ts | |
27 | go (r:rs) (Text s:ts) = case splitString s of | |
28 | [] -> go (r:rs) ts | |
29 | [x] -> go ((Text x:r):rs) ts | |
30 | xs -> go (map ((:[]) . Text) (tail xs) ++ | |
31 | ((Text (head xs):r) : rs)) ts | |
32 | go _ _ = error "[unreachable]" | |
33 | ||
34 | -- Split a string at double-newlines. | |
35 | splitString :: String -> [String] | |
36 | splitString = filter (/= "") . go | |
37 | where go ('\n':'\n':xs) = "\n":go xs | |
38 | go (x:xs) = let r:rs = go xs in ((x:r):rs) | |
39 | go "" = [""] | |
40 | ||
41 | -- This is just to make type signatures shorter | |
42 | type HtmlE = Either String Html | |
43 | ||
44 | type Renderer = (Fragment -> HtmlE, [Document]) -> HtmlE | |
45 | ||
46 | -- The built-in set of tags (subject to change) | |
47 | basicTags :: [(String, Renderer)] | |
48 | basicTags = | |
49 | [ ("em" | |
50 | , \case (f,[rs]) -> fmap (em . sequence_) (mapM f rs) | |
51 | _ -> Left "wrong arity for em/1" | |
52 | ) | |
53 | , ("strong" | |
54 | , \case (f,[rs]) -> fmap (strong . sequence_) (mapM f rs) | |
55 | _ -> Left "wrong arity for strong/1" | |
56 | ) | |
57 | , ("code" | |
58 | , \case (f,[rs]) -> fmap (code . sequence_) (mapM f rs) | |
59 | _ -> Left "wrong arity for code/1" | |
60 | ) | |
61 | , ("link" | |
62 | , \case (f,[[Text l],r]) -> let go h = a ! href (stringValue l) $ h | |
63 | in fmap (go . sequence_) (mapM f r) | |
64 | (_,[_,_]) -> Left "link target should be string" | |
65 | _ -> Left "wrong arity for link/1" | |
66 | ) | |
67 | ] | |
68 | ||
69 | -- render a single paragraph | |
70 | renderPara :: [(String, Renderer)] -> Document -> Either String Html | |
71 | renderPara taglist ds = fmap (p . sequence_) (mapM go ds) | |
72 | where go (Text ts) = Right (toMarkup ts) | |
73 | go (Tag tx rs) = exec tx rs taglist | |
74 | exec name args ((tag, func):tags) | |
75 | | name == tag = case func (go, args) of | |
76 | Right html -> Right html | |
77 | Left {} -> exec name args tags | |
78 | exec name args (_:tags) = exec name args tags | |
79 | exec name args [] = Left $ | |
80 | "Error: no match for tag " ++ name ++ "/" ++ show (length args) |
1 | Copyright (c) 2015, Getty Ritter | |
2 | ||
3 | All rights reserved. | |
4 | ||
5 | Redistribution and use in source and binary forms, with or without | |
6 | modification, are permitted provided that the following conditions are met: | |
7 | ||
8 | * Redistributions of source code must retain the above copyright | |
9 | notice, this list of conditions and the following disclaimer. | |
10 | ||
11 | * Redistributions in binary form must reproduce the above | |
12 | copyright notice, this list of conditions and the following | |
13 | disclaimer in the documentation and/or other materials provided | |
14 | with the distribution. | |
15 | ||
16 | * Neither the name of Getty Ritter nor the names of other | |
17 | contributors may be used to endorse or promote products derived | |
18 | from this software without specific prior written permission. | |
19 | ||
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
1 | # telml-markup | |
2 | ||
3 | The `telml-markup` package provides a simple, LaTeX-inspired markup | |
4 | language with the possibility of writing extensible instructions. | |
5 | ||
6 | **This is in early phases, and so should not be relied upon for any | |
7 | kind of stability at this stage.** In particular, the set of tags | |
8 | and how they are understood could change radically during development. | |
9 | ||
10 | ## Sample Document | |
11 | ||
12 | ~~~~ | |
13 | \code{telml-markup} uses a mostly HTML-like set of names, so we create | |
14 | \em{emphasis} with \code{\\em}, \strong{bolding} with \em{\\strong}, | |
15 | and \code{fixed-width text} with \code{\\code}. | |
16 | ||
17 | One major difference is the \code{\\link} tag, which takes the form | |
18 | \code{\\link\{url\|text\}}, | |
19 | \link{https://github.com/aisamanra/telml-markup|like this}. | |
20 | ~~~~ | |
21 | ||
22 | This produces the following rendered HTML: | |
23 | ||
24 | > <p><code>telml-markup</code> uses an HTML-like set of names, so we create | |
25 | > <em>emphasis</em> with <code>\em</code>, <strong>bolding</strong> with <code>\strong</code>, | |
26 | > and <code>fixed-width text</code> with <code>\code</code>. | |
27 | > </p><p>One major difference is the <code>\link</code> tag, which takes the form | |
28 | > <code>\link{url|text}</code>, | |
29 | > <a href="https://github.com/aisamanra/telml-markup">like this</a>. | |
30 | > </p> | |
31 | ||
32 | ## Basic Usage | |
33 | ||
34 | The `render` function takes a [`telml`](https://github.com/aisamanra/telml) | |
35 | document and renders it into the | |
36 | [`blaze-html`](http://hackage.haskell.org/package/blaze-html-0.8.0.2) | |
37 | [`Html`](http://hackage.haskell.org/package/blaze-html-0.8.0.2/docs/Text-Blaze-Html.html#t:Html) | |
38 | type, which can then be rendered into HTML. For example, the following | |
39 | is a minimal program which interprets input on `stdin` and prints the | |
40 | rendered HTML (or an error message) to `stdout`. | |
41 | ||
42 | ~~~~{.haskell} | |
43 | module Main | |
44 | ||
45 | import Control.Monad ((>=>)) | |
46 | import Data.TeLML (parse) | |
47 | import Data.TeLML.Markup (render) | |
48 | import System.Exit (exitFailure) | |
49 | import Text.Blaze.Renderer.String (renderMarkup) | |
50 | ||
51 | main :: IO () | |
52 | main = do | |
53 | str <- getContents | |
54 | case (parse >=> render) str of | |
55 | Left err -> putStrLn err >> exitFailure | |
56 | Right doc -> putStrLn (renderMarkup doc) | |
57 | ~~~~ | |
58 | ||
59 | We could invoke it at the command line like so: | |
60 | ||
61 | ~~~~ | |
62 | $ ./telml-markup-test <<EOF | |
63 | > This should be \em{emphasized}. | |
64 | > | |
65 | > This, on the other hand, is \strong{bold}. | |
66 | > EOF | |
67 | <p>This should be<em>emphasized</em>. | |
68 | </p><p>This, on the other hand, is <strong>bold</strong>. | |
69 | </p> | |
70 | ~~~~ | |
71 | ||
72 | If we give it an unknown tag, or a tag with the wrong arity, it will | |
73 | give us an error: | |
74 | ||
75 | ~~~~ | |
76 | $ ./telml-markup-test <<EOF | |
77 | > This is a \fake{tag}. | |
78 | > EOF | |
79 | Error: no match for tag fake/1 | |
80 | $ ./telml-markup-test <<EOF | |
81 | > This is a tag with \em{too|many|arguments}. | |
82 | > EOF | |
83 | Error: no match for tag em/3 | |
84 | ~~~~ | |
85 | ||
86 | ## Extended Usage | |
87 | ||
88 | The `renderWith` function takes a list of additional tags and their | |
89 | denotations (in the form of functions from `telml` fragments to | |
90 | `blaze-html` fragments.) This allows you to add new tags to the | |
91 | markup for particular purposes. | |
92 | ||
93 | For example, here we add a tag so that `\hello{...}` will render out to | |
94 | the HTML string `<strong>Hello, ...!</strong>`: | |
95 | ||
96 | ~~~~{.haskell} | |
97 | module Main where | |
98 | ||
99 | import Control.Monad ((>=>)) | |
100 | import Data.TeLML (parse) | |
101 | import Data.TeLML.Markup (Renderer, renderWith) | |
102 | import System.Exit (exitFailure) | |
103 | import Text.Blaze.Html5 (strong, toMarkup) | |
104 | import Text.Blaze.Renderer.String (renderMarkup) | |
105 | ||
106 | myTags :: [(String, Renderer)] | |
107 | myTags = | |
108 | [ ("hello", \ c -> case c of | |
109 | (render, [name]) -> do | |
110 | rName <- mapM render name | |
111 | return $ strong $ do | |
112 | toMarkup "Hello, " | |
113 | sequence_ rName | |
114 | toMarkup "!" | |
115 | (_, args) -> Left ("Did not match hello/" ++ show (length args)) | |
116 | ) | |
117 | ] | |
118 | ||
119 | main :: IO () | |
120 | main = do | |
121 | str <- getContents | |
122 | case (parse >=> renderWith myTags) str of | |
123 | Left err -> putStrLn err >> exitFailure | |
124 | Right doc -> putStrLn (renderMarkup doc) | |
125 | ~~~~ | |
126 | ||
127 | We can execute this to test it: | |
128 | ||
129 | ~~~~ | |
130 | $ ./telml-markup-extended-test <<EOF | |
131 | > Now we can do this: \hello{friend}. | |
132 | > EOF | |
133 | <p>Now we can do this: <strong>Hello, friend!</strong>. | |
134 | </p> | |
135 | ~~~~ |
1 | Name: telml-markup | |
2 | Version: 0.0.0 | |
3 | Author: Getty Ritter <gettylefou@gmail.com> | |
4 | Maintainer: Getty Ritter <gettylefou@gmail.com> | |
5 | License: BSD3 | |
6 | License-File: LICENSE | |
7 | -- Synopsis: | |
8 | -- Description: | |
9 | Cabal-Version: >= 1.10 | |
10 | Build-Type: Simple | |
11 | Extra-Source-Files: README.md, ChangeLog.md | |
12 | ||
13 | Library | |
14 | Default-Language: Haskell2010 | |
15 | GHC-Options: -Wall | |
16 | Exposed-Modules: Data.TeLML.Markup | |
17 | Build-Depends: base >= 4 && < 5, telml, blaze-markup, blaze-html, mtl | |
18 | ||
19 | -- Test-Suite spec | |
20 | -- Type: exitcode-stdio-1.0 | |
21 | -- Default-Language: Haskell2010 | |
22 | -- Hs-Source-Dirs: test | |
23 | -- Ghc-Options: -Wall | |
24 | -- Main-Is: Spec.hs | |
25 | -- Build-Depends: base | |
26 | -- , hspec | |
27 | ||
28 | Source-Repository head | |
29 | Type: git | |
30 | -- Location: |
1 | name: telml | |
2 | version: 0.1.0.0 | |
3 | synopsis: A lightweight TeX-like markup format. | |
4 | description: A lightweight TeX-like markup format. | |
5 | license: BSD3 | |
6 | license-file: LICENSE | |
7 | author: Getty Ritter | |
8 | maintainer: gdritter@galois.com | |
9 | copyright: ©2015 Getty Ritter | |
10 | category: Data | |
11 | build-type: Simple | |
12 | cabal-version: >=1.10 | |
13 | ||
14 | library | |
15 | exposed-modules: Data.TeLML | |
16 | ghc-options: -Wall | |
17 | other-modules: Data.TeLML.Parser, Data.TeLML.Type | |
18 | build-depends: base >=4.7 && <4.9, | |
19 | deepseq >=1.4 && <2 | |
20 | default-language: Haskell2010 |