Improved width-constrained pretty-printer performance + exposed lazy Text versions
Getty Ritter
7 years ago
| 6 | 6 | ( -- * Pretty-Printing |
| 7 | 7 | encodeOne |
| 8 | 8 | , encode |
| 9 | , encodeOneLazy | |
| 10 | , encodeLazy | |
| 9 | 11 | -- * Pretty-Printing Control |
| 10 | 12 | , SExprPrinter |
| 11 | 13 | , Indent(..) |
| 125 | 127 | , indentPrint = True |
| 126 | 128 | } |
| 127 | 129 | |
| 130 | data Size = Size | |
| 131 | { sizeSum :: !Int | |
| 132 | , sizeMax :: !Int | |
| 133 | } deriving (Show) | |
| 134 | ||
| 128 | 135 | -- | This is an intermediate representation which is like (but not |
| 129 | 136 | -- identical to) a RichSExpr representation. In particular, it has a |
| 130 | 137 | -- special case for empty lists, and it also keeps a single piece of |
| 131 | 138 | -- indent information around for each list |
| 132 | 139 | data Intermediate |
| 133 | 140 | = IAtom Text |
| 134 |
|
|
| 141 | -- ^ An atom, already serialized | |
| 142 | | IList Indent Size Intermediate (Seq.Seq Intermediate) (Maybe Text) | |
| 143 | -- ^ A (possibly-improper) list, with the intended indentation | |
| 144 | -- strategy, the head of the list, the main set of elements, and the | |
| 145 | -- final improper element (if it exists) | |
| 135 | 146 | | IEmpty |
| 136 | ||
| 147 | -- ^ An empty list | |
| 148 | ||
| 149 | sizeOf :: Intermediate -> Size | |
| 150 | sizeOf IEmpty = Size 2 2 | |
| 151 | sizeOf (IAtom t) = Size len len where len = T.length t | |
| 152 | sizeOf (IList _ s _ _ _) = s | |
| 153 | ||
| 154 | concatSize :: Size -> Size -> Size | |
| 155 | concatSize l r = Size | |
| 156 | { sizeSum = sizeSum l + 1 + sizeSum r | |
| 157 | , sizeMax = sizeMax l `max` sizeMax r | |
| 158 | } | |
| 137 | 159 | |
| 138 | 160 | toIntermediate :: SExprPrinter a (SExpr a) -> SExpr a -> Intermediate |
| 139 | 161 | toIntermediate |
| 144 | 166 | headOf (SAtom a) = IAtom (printAtom a) |
| 145 | 167 | headOf SNil = IEmpty |
| 146 | 168 | headOf (SCons x xs) = |
| 147 | gather (swing x) (headOf x) (Seq.empty) xs | |
| 148 | gather sw hd rs SNil = | |
| 149 | IList sw hd rs Nothing | |
| 150 | gather sw hd rs (SAtom a) = | |
| 151 | IList sw hd rs (Just (printAtom a)) | |
| 152 | gather sw hd rs (SCons x xs) = | |
| 153 |
gather |
|
| 169 | gather (swing x) hd Seq.empty xs (sizeOf hd) where hd = headOf x | |
| 170 | gather sw hd rs SNil sz = | |
| 171 | IList sw sz hd rs Nothing | |
| 172 | gather sw hd rs (SAtom a) sz = | |
| 173 | IList sw (sz `concatSize` aSize) hd rs (Just aStr) | |
| 174 | where aSize = Size (T.length aStr) (T.length aStr) | |
| 175 | aStr = printAtom a | |
| 176 | gather sw hd rs (SCons x xs) sz = | |
| 177 | gather sw hd (rs Seq.|> x') xs (sz `concatSize` sizeOf x') | |
| 178 | where x' = headOf x | |
| 154 | 179 | |
| 155 | 180 | |
| 156 | 181 | unboundIndentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text |
| 157 | 182 | unboundIndentPrintSExpr spec = finalize . go . toIntermediate spec |
| 158 | 183 | where |
| 159 |
finalize = B.toLazyText . |
|
| 184 | finalize = B.toLazyText . joinLinesS | |
| 160 | 185 | |
| 161 | 186 | go :: Intermediate -> Seq.Seq B.Builder |
| 162 | 187 | go (IAtom t) = Seq.singleton (B.fromText t) |
| 164 | 189 | -- this case should never be called with an empty argument to |
| 165 | 190 | -- @values@, as that should have been translated to @IEmpty@ |
| 166 | 191 | -- instead. |
| 167 |
go (IList iv |
|
| 192 | go (IList iv _ initial values rest) | |
| 168 | 193 | -- if we're looking at an s-expression that has no nested |
| 169 | 194 | -- s-expressions, then we might as well consider it flat and let |
| 170 | 195 | -- it take the whole line |
| 207 | 232 | let hd = B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [initial, y])) |
| 208 | 233 | butLast = hd Seq.<| fmap (doIndentOf (fromIntegral len)) (F.foldMap go ys) |
| 209 | 234 | in handleTail rest butLast |
| 210 | -- B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [x, y])) | |
| 211 | -- Seq.<| fmap (doIndentOf (fromIntegral len)) (handleTail rest (F.foldMap go ys)) | |
| 212 | 235 | |
| 213 | 236 | doIndent :: B.Builder -> B.Builder |
| 214 | 237 | doIndent = doIndentOf (indentAmount spec) |
| 224 | 247 | handleTail :: Maybe Text -> Seq.Seq B.Builder -> Seq.Seq B.Builder |
| 225 | 248 | handleTail Nothing = insertCloseParen |
| 226 | 249 | handleTail (Just t) = |
| 227 |
(Seq.|> (B.fromString " |
|
| 250 | (Seq.|> (B.fromString " . " <> B.fromText t <> B.fromString ")")) | |
| 228 | 251 | |
| 229 | 252 | insertCloseParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder |
| 230 | 253 | insertCloseParen s = case Seq.viewr s of |
| 237 | 260 | t Seq.:< ts -> t <> F.foldMap (\ x -> B.fromString " " <> x) ts |
| 238 | 261 | |
| 239 | 262 | pTail Nothing = B.fromString ")" |
| 240 |
pTail (Just t) = B.fromString " |
|
| 263 | pTail (Just t) = B.fromString " . " <> B.fromText t <> B.fromString ")" | |
| 241 | 264 | |
| 242 | 265 | ppBasic (IAtom t) = Just (B.fromText t) |
| 243 | 266 | ppBasic (IEmpty) = Just (B.fromString "()") |
| 302 | 325 | |
| 303 | 326 | |
| 304 | 327 | -- Indents a line by n spaces |
| 305 | indent :: Int -> Text -> Text | |
| 306 | indent n ts = T.replicate n " " <> ts | |
| 328 | indent :: Int -> B.Builder -> B.Builder | |
| 329 | indent n ts = B.fromText (T.replicate n " ") <> ts | |
| 307 | 330 | |
| 308 | 331 | |
| 309 | 332 | -- Sort of like 'unlines' but without the trailing newline |
| 310 |
joinLinesS :: Seq.Seq |
|
| 333 | joinLinesS :: Seq.Seq B.Builder -> B.Builder | |
| 311 | 334 | joinLinesS s = case Seq.viewl s of |
| 312 | 335 | Seq.EmptyL -> "" |
| 313 | 336 | t Seq.:< ts |
| 314 | 337 | | F.null ts -> t |
| 315 |
| otherwise -> t <> |
|
| 338 | | otherwise -> t <> B.fromString "\n" <> joinLinesS ts | |
| 316 | 339 | |
| 317 | 340 | |
| 318 | 341 | -- Sort of like 'unlines' but without the trailing newline |
| 319 |
unwordsS :: Seq.Seq |
|
| 342 | unwordsS :: Seq.Seq B.Builder -> B.Builder | |
| 320 | 343 | unwordsS s = case Seq.viewl s of |
| 321 | 344 | Seq.EmptyL -> "" |
| 322 | 345 | t Seq.:< ts |
| 326 | 349 | |
| 327 | 350 | -- Indents every line n spaces, and adds a newline to the beginning |
| 328 | 351 | -- used in swung indents |
| 329 |
indentAllS :: Int -> Seq.Seq |
|
| 352 | indentAllS :: Int -> Seq.Seq B.Builder -> B.Builder | |
| 330 | 353 | indentAllS n = ("\n" <>) . joinLinesS . fmap (indent n) |
| 331 | 354 | |
| 332 | 355 | |
| 333 | 356 | -- Indents every line but the first by some amount |
| 334 | 357 | -- used in aligned indents |
| 335 |
indentSubsequentS :: Int -> Seq.Seq |
|
| 358 | indentSubsequentS :: Int -> Seq.Seq B.Builder -> B.Builder | |
| 336 | 359 | indentSubsequentS n s = case Seq.viewl s of |
| 337 | 360 | Seq.EmptyL -> "" |
| 338 | 361 | t Seq.:< ts |
| 339 | 362 | | F.null ts -> t |
| 340 | 363 | | otherwise -> joinLinesS (t Seq.<| fmap (indent n) ts) |
| 341 | -- where go = fmap (indent n) | |
| 342 | 364 | |
| 343 | 365 | |
| 344 | 366 | -- oh god this code is so disgusting |
| 348 | 370 | |
| 349 | 371 | -- | Pretty-print a 'SExpr' according to the options in a |
| 350 | 372 | -- 'LayoutOptions' value. |
| 351 |
prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> T |
|
| 373 | prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text | |
| 352 | 374 | prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of |
| 353 | 375 | Nothing |
| 354 |
| indentPrint -> |
|
| 376 | | indentPrint -> unboundIndentPrintSExpr pr (fromCarrier expr) | |
| 355 | 377 | | otherwise -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr)) |
| 356 | Just _ -> indentPrintSExpr' pr expr | |
| 357 | ||
| 358 | ||
| 359 | indentPrintSExpr' :: SExprPrinter a (SExpr a) -> SExpr a -> Text | |
| 360 |
|
|
| 378 | Just w -> indentPrintSExpr' w pr expr | |
| 379 | ||
| 380 | ||
| 381 | indentPrintSExpr' :: Int -> SExprPrinter a (SExpr a) -> SExpr a -> TL.Text | |
| 382 | indentPrintSExpr' maxAmt pr@SExprPrinter { .. } = B.toLazyText . pp 0 . toIntermediate pr | |
| 361 | 383 | where |
| 362 | pp _ IEmpty = "()" | |
| 363 | pp _ (IAtom t) = t | |
| 364 |
pp |
|
| 384 | pp _ IEmpty = B.fromString "()" | |
| 385 | pp _ (IAtom t) = B.fromText t | |
| 386 | pp ind (IList i sz h values end) = | |
| 387 | -- we always are going to have a head, a (possibly empty) body, | |
| 388 | -- and a (possibly empty) tail in our list formats | |
| 389 | B.fromString "(" <> hd <> body <> tl <> B.fromString ")" | |
| 365 | 390 | where |
| 391 | -- the tail is either nothing, or the final dotted pair | |
| 366 | 392 | tl = case end of |
| 367 | Nothing -> "" | |
| 368 | Just x -> " . " <> x | |
| 393 | Nothing -> mempty | |
| 394 | Just x -> B.fromString " . " <> B.fromText x | |
| 395 | -- the head is the pretty-printed head, with an ambient | |
| 396 | -- indentation of +1 to account for the left paren | |
| 369 | 397 | hd = pp (ind+1) h |
| 370 | flat = unwordsS (fmap (pp (ind + 1)) values) | |
| 371 | headWidth = T.length hd + 1 | |
| 398 | headWidth = sizeSum (sizeOf h) | |
| 372 | 399 | indented = |
| 373 | 400 | case i of |
| 374 | 401 | SwingAfter n -> |
| 384 | 411 | indentSubsequentS (ind + headWidth + 1) |
| 385 | 412 | (fmap (pp (ind + headWidth + 1)) values) |
| 386 | 413 | body |
| 387 | | length values == 0 = "" | |
| 388 | | Just maxAmt <- maxWidth | |
| 389 | , T.length flat + ind > maxAmt = " " <> indented | |
| 390 | | otherwise = " " <> flat | |
| 414 | -- if there's nothing here, then we don't have anything to | |
| 415 | -- indent | |
| 416 | | length values == 0 = mempty | |
| 417 | -- if we can't fit the whole next s-expression on the same | |
| 418 | -- line, then we use the indented form | |
| 419 | | sizeSum sz + ind > maxAmt = B.fromString " " <> indented | |
| 420 | | otherwise = | |
| 421 | -- otherwise we print the whole thing on one line! | |
| 422 | B.fromString " " <> unwordsS (fmap (pp (ind + 1)) values) | |
| 391 | 423 | |
| 392 | 424 | |
| 393 | 425 | -- if we don't indent anything, then we can ignore a bunch of the |
| 394 | 426 | -- details above |
| 395 | flatPrintSExpr :: SExpr Text -> Text | |
| 396 | flatPrintSExpr = TL.toStrict . B.toLazyText . pHead | |
| 427 | flatPrintSExpr :: SExpr Text -> TL.Text | |
| 428 | flatPrintSExpr = B.toLazyText . pHead | |
| 397 | 429 | where |
| 398 | 430 | pHead (SCons x xs) = |
| 399 | 431 | B.fromString "(" <> pHead x <> pTail xs |
| 413 | 445 | -- 'SExprPrinter'. |
| 414 | 446 | encodeOne :: SExprPrinter atom carrier -> carrier -> Text |
| 415 | 447 | encodeOne s@(SExprPrinter { .. }) = |
| 416 |
|
|
| 448 | TL.toStrict . prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier | |
| 417 | 449 | |
| 418 | 450 | -- | Turn a list of s-expressions into a single string according to |
| 419 | 451 | -- a given 'SExprPrinter'. |
| 420 | 452 | encode :: SExprPrinter atom carrier -> [carrier] -> Text |
| 421 |
encode spec = |
|
| 453 | encode spec = | |
| 454 | T.intercalate "\n\n" . map (encodeOne spec) | |
| 455 | ||
| 456 | -- | Turn a single s-expression into a lazy 'Text' according to a given | |
| 457 | -- 'SExprPrinter'. | |
| 458 | encodeOneLazy :: SExprPrinter atom carrier -> carrier -> TL.Text | |
| 459 | encodeOneLazy s@(SExprPrinter { .. }) = | |
| 460 | prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier | |
| 461 | ||
| 462 | -- | Turn a list of s-expressions into a lazy 'Text' according to | |
| 463 | -- a given 'SExprPrinter'. | |
| 464 | encodeLazy :: SExprPrinter atom carrier -> [carrier] -> TL.Text | |
| 465 | encodeLazy spec = TL.intercalate "\n\n" . map (encodeOneLazy spec) | |