gdritter repos s-cargot / 2520c10
Added a few new identifier syntaxes Getty Ritter 8 years ago
1 changed file(s) with 93 addition(s) and 1 deletion(s). Collapse all Expand all
11 module Data.SCargot.Common ( -- $intro
2 -- * Lisp Identifier Syntaxes
2 -- * Identifier Syntaxes
33 parseR5RSIdent
44 , parseR6RSIdent
55 , parseR7RSIdent
6 , parseXIDIdentStrict
7 , parseXIDIdentGeneral
8 , parseHaskellIdent
9 , parseHaskellVariable
10 , parseHaskellConstructor
611 -- * Numeric Literal Parsers
712 , signed
813 , prefixedNumber
116121 cons2 a b cs = a : b : cs
117122 cons3 a b c ds = a : b : c : ds
118123
124 -- | Parse a Haskell variable identifier: a sequence of alphanumeric
125 -- characters, underscores, or single quote that begins with a
126 -- lower-case letter.
127 parseHaskellVariable :: Parser Text
128 parseHaskellVariable =
129 T.pack <$> ((:) <$> small <*> many (small <|>
130 large <|>
131 digit' <|>
132 char '\'' <|>
133 char '_'))
134 where small = satisfy isLower
135 large = satisfy isUpper
136 digit' = satisfy isDigit
137
138 -- | Parse a Haskell constructor: a sequence of alphanumeric
139 -- characters, underscores, or single quote that begins with an
140 -- upper-case letter.
141 parseHaskellConstructor :: Parser Text
142 parseHaskellConstructor =
143 T.pack <$> ((:) <$> large <*> many (small <|>
144 large <|>
145 digit' <|>
146 char '\'' <|>
147 char '_'))
148 where small = satisfy isLower
149 large = satisfy isUpper
150 digit' = satisfy isDigit
151
152 -- | Parse a Haskell identifer: a sequence of alphanumeric
153 -- characters, underscores, or a single quote. This matches both
154 -- variable and constructor names.
155 parseHaskellIdent :: Parser Text
156 parseHaskellIdent =
157 T.pack <$> ((:) <$> (large <|> small)
158 <*> many (small <|>
159 large <|>
160 digit' <|>
161 char '\'' <|>
162 char '_'))
163 where small = satisfy isLower
164 large = satisfy isUpper
165 digit' = satisfy isDigit
166
167 -- Ensure that a given character has the given Unicode category
168 hasCat :: [GeneralCategory] -> Parser Char
169 hasCat cats = satisfy (flip hasCategory cats)
170
171 xidStart :: [GeneralCategory]
172 xidStart = [ UppercaseLetter
173 , LowercaseLetter
174 , TitlecaseLetter
175 , ModifierLetter
176 , OtherLetter
177 , LetterNumber
178 ]
179
180 xidContinue :: [GeneralCategory]
181 xidContinue = xidStart ++ [ NonSpacingMark
182 , SpacingCombiningMark
183 , DecimalNumber
184 , ConnectorPunctuation
185 ]
186
187 -- | Parse an identifier of unicode characters of the form
188 -- @<XID_Start> <XID_Continue>*@, which corresponds strongly
189 -- to the identifiers found in most C-like languages. Note that
190 -- the @XID_Start@ category does not include the underscore,
191 -- so @__foo@ is not a valid XID identifier. To parse
192 -- identifiers that may include leading underscores, use
193 -- 'parseXIDIdentGeneral'.
194 parseXIDIdentStrict :: Parser Text
195 parseXIDIdentStrict = T.pack <$> ((:) <$> hasCat xidStart
196 <*> many (hasCat xidContinue))
197
198 -- | Parse an identifier of unicode characters of the form
199 -- @(<XID_Start> | '_') <XID_Continue>*@, which corresponds
200 -- strongly to the identifiers found in most C-like languages.
201 -- Unlike 'parseXIDIdentStrict', this will also accept an
202 -- underscore as leading character, which corresponds more
203 -- closely to programming languages like C and Java, but
204 -- deviates somewhat from the
205 -- <http://unicode.org/reports/tr31/ Unicode Identifier and
206 -- Pattern Syntax standard>.
207 parseXIDIdentGeneral :: Parser Text
208 parseXIDIdentGeneral = T.pack <$> ((:) <$> (hasCat xidStart <|> char '_')
209 <*> many (hasCat xidContinue))
210
119211 -- | A helper function for defining parsers for arbitrary-base integers.
120212 -- The first argument will be the base, and the second will be the
121213 -- parser for the individual digits.