Added a few new identifier syntaxes
Getty Ritter
9 years ago
| 1 | 1 | module Data.SCargot.Common ( -- $intro |
| 2 |
-- * |
|
| 2 | -- * Identifier Syntaxes | |
| 3 | 3 | parseR5RSIdent |
| 4 | 4 | , parseR6RSIdent |
| 5 | 5 | , parseR7RSIdent |
| 6 | , parseXIDIdentStrict | |
| 7 | , parseXIDIdentGeneral | |
| 8 | , parseHaskellIdent | |
| 9 | , parseHaskellVariable | |
| 10 | , parseHaskellConstructor | |
| 6 | 11 | -- * Numeric Literal Parsers |
| 7 | 12 | , signed |
| 8 | 13 | , prefixedNumber |
| 116 | 121 | cons2 a b cs = a : b : cs |
| 117 | 122 | cons3 a b c ds = a : b : c : ds |
| 118 | 123 | |
| 124 | -- | Parse a Haskell variable identifier: a sequence of alphanumeric | |
| 125 | -- characters, underscores, or single quote that begins with a | |
| 126 | -- lower-case letter. | |
| 127 | parseHaskellVariable :: Parser Text | |
| 128 | parseHaskellVariable = | |
| 129 | T.pack <$> ((:) <$> small <*> many (small <|> | |
| 130 | large <|> | |
| 131 | digit' <|> | |
| 132 | char '\'' <|> | |
| 133 | char '_')) | |
| 134 | where small = satisfy isLower | |
| 135 | large = satisfy isUpper | |
| 136 | digit' = satisfy isDigit | |
| 137 | ||
| 138 | -- | Parse a Haskell constructor: a sequence of alphanumeric | |
| 139 | -- characters, underscores, or single quote that begins with an | |
| 140 | -- upper-case letter. | |
| 141 | parseHaskellConstructor :: Parser Text | |
| 142 | parseHaskellConstructor = | |
| 143 | T.pack <$> ((:) <$> large <*> many (small <|> | |
| 144 | large <|> | |
| 145 | digit' <|> | |
| 146 | char '\'' <|> | |
| 147 | char '_')) | |
| 148 | where small = satisfy isLower | |
| 149 | large = satisfy isUpper | |
| 150 | digit' = satisfy isDigit | |
| 151 | ||
| 152 | -- | Parse a Haskell identifer: a sequence of alphanumeric | |
| 153 | -- characters, underscores, or a single quote. This matches both | |
| 154 | -- variable and constructor names. | |
| 155 | parseHaskellIdent :: Parser Text | |
| 156 | parseHaskellIdent = | |
| 157 | T.pack <$> ((:) <$> (large <|> small) | |
| 158 | <*> many (small <|> | |
| 159 | large <|> | |
| 160 | digit' <|> | |
| 161 | char '\'' <|> | |
| 162 | char '_')) | |
| 163 | where small = satisfy isLower | |
| 164 | large = satisfy isUpper | |
| 165 | digit' = satisfy isDigit | |
| 166 | ||
| 167 | -- Ensure that a given character has the given Unicode category | |
| 168 | hasCat :: [GeneralCategory] -> Parser Char | |
| 169 | hasCat cats = satisfy (flip hasCategory cats) | |
| 170 | ||
| 171 | xidStart :: [GeneralCategory] | |
| 172 | xidStart = [ UppercaseLetter | |
| 173 | , LowercaseLetter | |
| 174 | , TitlecaseLetter | |
| 175 | , ModifierLetter | |
| 176 | , OtherLetter | |
| 177 | , LetterNumber | |
| 178 | ] | |
| 179 | ||
| 180 | xidContinue :: [GeneralCategory] | |
| 181 | xidContinue = xidStart ++ [ NonSpacingMark | |
| 182 | , SpacingCombiningMark | |
| 183 | , DecimalNumber | |
| 184 | , ConnectorPunctuation | |
| 185 | ] | |
| 186 | ||
| 187 | -- | Parse an identifier of unicode characters of the form | |
| 188 | -- @<XID_Start> <XID_Continue>*@, which corresponds strongly | |
| 189 | -- to the identifiers found in most C-like languages. Note that | |
| 190 | -- the @XID_Start@ category does not include the underscore, | |
| 191 | -- so @__foo@ is not a valid XID identifier. To parse | |
| 192 | -- identifiers that may include leading underscores, use | |
| 193 | -- 'parseXIDIdentGeneral'. | |
| 194 | parseXIDIdentStrict :: Parser Text | |
| 195 | parseXIDIdentStrict = T.pack <$> ((:) <$> hasCat xidStart | |
| 196 | <*> many (hasCat xidContinue)) | |
| 197 | ||
| 198 | -- | Parse an identifier of unicode characters of the form | |
| 199 | -- @(<XID_Start> | '_') <XID_Continue>*@, which corresponds | |
| 200 | -- strongly to the identifiers found in most C-like languages. | |
| 201 | -- Unlike 'parseXIDIdentStrict', this will also accept an | |
| 202 | -- underscore as leading character, which corresponds more | |
| 203 | -- closely to programming languages like C and Java, but | |
| 204 | -- deviates somewhat from the | |
| 205 | -- <http://unicode.org/reports/tr31/ Unicode Identifier and | |
| 206 | -- Pattern Syntax standard>. | |
| 207 | parseXIDIdentGeneral :: Parser Text | |
| 208 | parseXIDIdentGeneral = T.pack <$> ((:) <$> (hasCat xidStart <|> char '_') | |
| 209 | <*> many (hasCat xidContinue)) | |
| 210 | ||
| 119 | 211 | -- | A helper function for defining parsers for arbitrary-base integers. |
| 120 | 212 | -- The first argument will be the base, and the second will be the |
| 121 | 213 | -- parser for the individual digits. |