Added a few new identifier syntaxes
Getty Ritter
8 years ago
1 | 1 | module Data.SCargot.Common ( -- $intro |
2 |
-- * |
|
2 | -- * Identifier Syntaxes | |
3 | 3 | parseR5RSIdent |
4 | 4 | , parseR6RSIdent |
5 | 5 | , parseR7RSIdent |
6 | , parseXIDIdentStrict | |
7 | , parseXIDIdentGeneral | |
8 | , parseHaskellIdent | |
9 | , parseHaskellVariable | |
10 | , parseHaskellConstructor | |
6 | 11 | -- * Numeric Literal Parsers |
7 | 12 | , signed |
8 | 13 | , prefixedNumber |
116 | 121 | cons2 a b cs = a : b : cs |
117 | 122 | cons3 a b c ds = a : b : c : ds |
118 | 123 | |
124 | -- | Parse a Haskell variable identifier: a sequence of alphanumeric | |
125 | -- characters, underscores, or single quote that begins with a | |
126 | -- lower-case letter. | |
127 | parseHaskellVariable :: Parser Text | |
128 | parseHaskellVariable = | |
129 | T.pack <$> ((:) <$> small <*> many (small <|> | |
130 | large <|> | |
131 | digit' <|> | |
132 | char '\'' <|> | |
133 | char '_')) | |
134 | where small = satisfy isLower | |
135 | large = satisfy isUpper | |
136 | digit' = satisfy isDigit | |
137 | ||
138 | -- | Parse a Haskell constructor: a sequence of alphanumeric | |
139 | -- characters, underscores, or single quote that begins with an | |
140 | -- upper-case letter. | |
141 | parseHaskellConstructor :: Parser Text | |
142 | parseHaskellConstructor = | |
143 | T.pack <$> ((:) <$> large <*> many (small <|> | |
144 | large <|> | |
145 | digit' <|> | |
146 | char '\'' <|> | |
147 | char '_')) | |
148 | where small = satisfy isLower | |
149 | large = satisfy isUpper | |
150 | digit' = satisfy isDigit | |
151 | ||
152 | -- | Parse a Haskell identifer: a sequence of alphanumeric | |
153 | -- characters, underscores, or a single quote. This matches both | |
154 | -- variable and constructor names. | |
155 | parseHaskellIdent :: Parser Text | |
156 | parseHaskellIdent = | |
157 | T.pack <$> ((:) <$> (large <|> small) | |
158 | <*> many (small <|> | |
159 | large <|> | |
160 | digit' <|> | |
161 | char '\'' <|> | |
162 | char '_')) | |
163 | where small = satisfy isLower | |
164 | large = satisfy isUpper | |
165 | digit' = satisfy isDigit | |
166 | ||
167 | -- Ensure that a given character has the given Unicode category | |
168 | hasCat :: [GeneralCategory] -> Parser Char | |
169 | hasCat cats = satisfy (flip hasCategory cats) | |
170 | ||
171 | xidStart :: [GeneralCategory] | |
172 | xidStart = [ UppercaseLetter | |
173 | , LowercaseLetter | |
174 | , TitlecaseLetter | |
175 | , ModifierLetter | |
176 | , OtherLetter | |
177 | , LetterNumber | |
178 | ] | |
179 | ||
180 | xidContinue :: [GeneralCategory] | |
181 | xidContinue = xidStart ++ [ NonSpacingMark | |
182 | , SpacingCombiningMark | |
183 | , DecimalNumber | |
184 | , ConnectorPunctuation | |
185 | ] | |
186 | ||
187 | -- | Parse an identifier of unicode characters of the form | |
188 | -- @<XID_Start> <XID_Continue>*@, which corresponds strongly | |
189 | -- to the identifiers found in most C-like languages. Note that | |
190 | -- the @XID_Start@ category does not include the underscore, | |
191 | -- so @__foo@ is not a valid XID identifier. To parse | |
192 | -- identifiers that may include leading underscores, use | |
193 | -- 'parseXIDIdentGeneral'. | |
194 | parseXIDIdentStrict :: Parser Text | |
195 | parseXIDIdentStrict = T.pack <$> ((:) <$> hasCat xidStart | |
196 | <*> many (hasCat xidContinue)) | |
197 | ||
198 | -- | Parse an identifier of unicode characters of the form | |
199 | -- @(<XID_Start> | '_') <XID_Continue>*@, which corresponds | |
200 | -- strongly to the identifiers found in most C-like languages. | |
201 | -- Unlike 'parseXIDIdentStrict', this will also accept an | |
202 | -- underscore as leading character, which corresponds more | |
203 | -- closely to programming languages like C and Java, but | |
204 | -- deviates somewhat from the | |
205 | -- <http://unicode.org/reports/tr31/ Unicode Identifier and | |
206 | -- Pattern Syntax standard>. | |
207 | parseXIDIdentGeneral :: Parser Text | |
208 | parseXIDIdentGeneral = T.pack <$> ((:) <$> (hasCat xidStart <|> char '_') | |
209 | <*> many (hasCat xidContinue)) | |
210 | ||
119 | 211 | -- | A helper function for defining parsers for arbitrary-base integers. |
120 | 212 | -- The first argument will be the base, and the second will be the |
121 | 213 | -- parser for the individual digits. |