gdritter repos s-cargot / 883e058
Cleared up Common docs + added dozen parsers for no reason Getty Ritter 9 years ago
1 changed file(s) with 66 addition(s) and 30 deletion(s). Collapse all Expand all
1 module Data.SCargot.Common ( number
1 module Data.SCargot.Common ( -- * Numeric Literal Parsers
2 binNumber
3 , signedBinNumber
4 , octNumber
5 , signedOctNumber
26 , decNumber
7 , signedDecNumber
8 , dozNumber
9 , signedDozNumber
310 , hexNumber
4 , octNumber
5 , sign
11 , signedHexNumber
12 , signed
613 -- * Lisp Identifier Syntaxes
714 , parseR5RSIdent
815 , parseR6RSIdent
3441
3542 -- | Parse an identifier according to the R6RS Scheme standard. An
3643 -- R6RS identifier may include inline hexadecimal escape sequences
37 -- so that, for example, 'foo' is equivalent to 'f\x6f;o', and is
44 -- so that, for example, @foo@ is equivalent to @f\x6f;o@, and is
3845 -- more liberal than R5RS as to which Unicode characters it may
3946 -- accept.
4047 parseR6RSIdent :: Parser Text
6976 -- R7RS identifier, in addition to a typical identifier format,
7077 -- can also be a chunk of text surrounded by vertical bars that
7178 -- can contain spaces and other characters. Unlike R6RS, it does
72 -- not allow escapes to be included in identifiers that are not
73 -- surrounded by vertical bars.
79 -- not allow escapes to be included in identifiers unless those
80 -- identifiers are surrounded by vertical bars.
7481 parseR7RSIdent :: Parser Text
7582 parseR7RSIdent = T.pack <$>
7683 ( (:) <$> initial <*> many subsequent
118125 | c == 'e' || c == 'E' = 0xe
119126 | c == 'f' || c == 'F' = 0xf
120127 | c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
128 | c == '\x218a' = 0xa
129 | c == '\x218b' = 0xb
121130 | otherwise = error ("Unknown letter in number: " ++ show c)
122131
123 -- | A parser for bare binary numbers
124 binNumber :: Parser Integer
125 binNumber = number 2 (char '0' <|> char '1')
126
127 -- | A parser for bare octal numbers
128 octNumber :: Parser Integer
129 octNumber = number 8 digit
130
131 -- | A parser for bare decimal numbers
132 decNumber :: Parser Integer
133 decNumber = number 10 digit
134
135 -- | A parser for bare hexadecimal numbers
136 hexNumber :: Parser Integer
137 hexNumber = number 16 hexDigit
138
139 -- | A parser for numeric signs, represented as a function from numbers
140 -- to numbers. It will parse '+' as the identity function, '-', as
141 -- 'negate', or consume no input and return the identity function.
142 -- This can be combined with other numeric literals to implement
143 -- signedness:
144 --
145 -- > myNum = go <$> sign <*> decNumber
146 -- > where go s n = s n
147132 sign :: Num a => Parser (a -> a)
148133 sign = (pure id <* char '+')
149134 <|> (pure negate <* char '-')
150135 <|> pure id
136
137 -- | Given a parser for some kind of numeric literal, this will attempt to
138 -- parse a leading @+@ or a leading @-@ and, in the latter case, negate
139 -- the parsed number.
140 signed :: Num a => Parser a -> Parser a
141 signed p = ($) <$> sign <*> p
142
143 -- | A parser for non-signed binary numbers
144 binNumber :: Parser Integer
145 binNumber = number 2 (char '0' <|> char '1')
146
147 -- | A parser for signed binary numbers, with an optional leading @+@ or @-@.
148 signedBinNumber :: Parser Integer
149 signedBinNumber = ($) <$> sign <*> binNumber
150
151 -- | A parser for non-signed octal numbers
152 octNumber :: Parser Integer
153 octNumber = number 8 (oneOf "01234567")
154
155 -- | A parser for signed octal numbers, with an optional leading @+@ or @-@.
156 signedOctNumber :: Parser Integer
157 signedOctNumber = ($) <$> sign <*> octNumber
158
159 -- | A parser for non-signed decimal numbers
160 decNumber :: Parser Integer
161 decNumber = number 10 digit
162
163 -- | A parser for signed decimal numbers, with an optional leading @+@ or @-@.
164 signedDecNumber :: Parser Integer
165 signedDecNumber = ($) <$> sign <*> decNumber
166
167 dozDigit = digit <|> oneOf "AaBb\x218a\x218b"
168
169 -- | A parser for non-signed duodecimal (dozenal) numbers. This understands both
170 -- the ASCII characters @'a'@ and @'b'@ and the Unicode characters @'\x218a'@ (↊)
171 -- and @'\x218b'@ (↋) as digits with the decimal values @11@ and @12@
172 -- respectively.
173 dozNumber :: Parser Integer
174 dozNumber = number 16 dozDigit
175
176 -- | A parser for signed hexadecimal numbers, with an optional leading @+@ or @-@.
177 signedDozNumber :: Parser Integer
178 signedDozNumber = ($) <$> sign <*> dozNumber
179
180 -- | A parser for non-signed hexadecimal numbers
181 hexNumber :: Parser Integer
182 hexNumber = number 16 hexDigit
183
184 -- | A parser for signed hexadecimal numbers, with an optional leading @+@ or @-@.
185 signedHexNumber :: Parser Integer
186 signedHexNumber = ($) <$> sign <*> hexNumber