Improved width-constrained pretty-printer performance + exposed lazy Text versions
Getty Ritter
6 years ago
6 | 6 | ( -- * Pretty-Printing |
7 | 7 | encodeOne |
8 | 8 | , encode |
9 | , encodeOneLazy | |
10 | , encodeLazy | |
9 | 11 | -- * Pretty-Printing Control |
10 | 12 | , SExprPrinter |
11 | 13 | , Indent(..) |
125 | 127 | , indentPrint = True |
126 | 128 | } |
127 | 129 | |
130 | data Size = Size | |
131 | { sizeSum :: !Int | |
132 | , sizeMax :: !Int | |
133 | } deriving (Show) | |
134 | ||
128 | 135 | -- | This is an intermediate representation which is like (but not |
129 | 136 | -- identical to) a RichSExpr representation. In particular, it has a |
130 | 137 | -- special case for empty lists, and it also keeps a single piece of |
131 | 138 | -- indent information around for each list |
132 | 139 | data Intermediate |
133 | 140 | = IAtom Text |
134 |
|
|
141 | -- ^ An atom, already serialized | |
142 | | IList Indent Size Intermediate (Seq.Seq Intermediate) (Maybe Text) | |
143 | -- ^ A (possibly-improper) list, with the intended indentation | |
144 | -- strategy, the head of the list, the main set of elements, and the | |
145 | -- final improper element (if it exists) | |
135 | 146 | | IEmpty |
136 | ||
147 | -- ^ An empty list | |
148 | ||
149 | sizeOf :: Intermediate -> Size | |
150 | sizeOf IEmpty = Size 2 2 | |
151 | sizeOf (IAtom t) = Size len len where len = T.length t | |
152 | sizeOf (IList _ s _ _ _) = s | |
153 | ||
154 | concatSize :: Size -> Size -> Size | |
155 | concatSize l r = Size | |
156 | { sizeSum = sizeSum l + 1 + sizeSum r | |
157 | , sizeMax = sizeMax l `max` sizeMax r | |
158 | } | |
137 | 159 | |
138 | 160 | toIntermediate :: SExprPrinter a (SExpr a) -> SExpr a -> Intermediate |
139 | 161 | toIntermediate |
144 | 166 | headOf (SAtom a) = IAtom (printAtom a) |
145 | 167 | headOf SNil = IEmpty |
146 | 168 | headOf (SCons x xs) = |
147 | gather (swing x) (headOf x) (Seq.empty) xs | |
148 | gather sw hd rs SNil = | |
149 | IList sw hd rs Nothing | |
150 | gather sw hd rs (SAtom a) = | |
151 | IList sw hd rs (Just (printAtom a)) | |
152 | gather sw hd rs (SCons x xs) = | |
153 |
gather |
|
169 | gather (swing x) hd Seq.empty xs (sizeOf hd) where hd = headOf x | |
170 | gather sw hd rs SNil sz = | |
171 | IList sw sz hd rs Nothing | |
172 | gather sw hd rs (SAtom a) sz = | |
173 | IList sw (sz `concatSize` aSize) hd rs (Just aStr) | |
174 | where aSize = Size (T.length aStr) (T.length aStr) | |
175 | aStr = printAtom a | |
176 | gather sw hd rs (SCons x xs) sz = | |
177 | gather sw hd (rs Seq.|> x') xs (sz `concatSize` sizeOf x') | |
178 | where x' = headOf x | |
154 | 179 | |
155 | 180 | |
156 | 181 | unboundIndentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text |
157 | 182 | unboundIndentPrintSExpr spec = finalize . go . toIntermediate spec |
158 | 183 | where |
159 |
finalize = B.toLazyText . |
|
184 | finalize = B.toLazyText . joinLinesS | |
160 | 185 | |
161 | 186 | go :: Intermediate -> Seq.Seq B.Builder |
162 | 187 | go (IAtom t) = Seq.singleton (B.fromText t) |
164 | 189 | -- this case should never be called with an empty argument to |
165 | 190 | -- @values@, as that should have been translated to @IEmpty@ |
166 | 191 | -- instead. |
167 |
go (IList iv |
|
192 | go (IList iv _ initial values rest) | |
168 | 193 | -- if we're looking at an s-expression that has no nested |
169 | 194 | -- s-expressions, then we might as well consider it flat and let |
170 | 195 | -- it take the whole line |
207 | 232 | let hd = B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [initial, y])) |
208 | 233 | butLast = hd Seq.<| fmap (doIndentOf (fromIntegral len)) (F.foldMap go ys) |
209 | 234 | in handleTail rest butLast |
210 | -- B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [x, y])) | |
211 | -- Seq.<| fmap (doIndentOf (fromIntegral len)) (handleTail rest (F.foldMap go ys)) | |
212 | 235 | |
213 | 236 | doIndent :: B.Builder -> B.Builder |
214 | 237 | doIndent = doIndentOf (indentAmount spec) |
224 | 247 | handleTail :: Maybe Text -> Seq.Seq B.Builder -> Seq.Seq B.Builder |
225 | 248 | handleTail Nothing = insertCloseParen |
226 | 249 | handleTail (Just t) = |
227 |
(Seq.|> (B.fromString " |
|
250 | (Seq.|> (B.fromString " . " <> B.fromText t <> B.fromString ")")) | |
228 | 251 | |
229 | 252 | insertCloseParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder |
230 | 253 | insertCloseParen s = case Seq.viewr s of |
237 | 260 | t Seq.:< ts -> t <> F.foldMap (\ x -> B.fromString " " <> x) ts |
238 | 261 | |
239 | 262 | pTail Nothing = B.fromString ")" |
240 |
pTail (Just t) = B.fromString " |
|
263 | pTail (Just t) = B.fromString " . " <> B.fromText t <> B.fromString ")" | |
241 | 264 | |
242 | 265 | ppBasic (IAtom t) = Just (B.fromText t) |
243 | 266 | ppBasic (IEmpty) = Just (B.fromString "()") |
302 | 325 | |
303 | 326 | |
304 | 327 | -- Indents a line by n spaces |
305 | indent :: Int -> Text -> Text | |
306 | indent n ts = T.replicate n " " <> ts | |
328 | indent :: Int -> B.Builder -> B.Builder | |
329 | indent n ts = B.fromText (T.replicate n " ") <> ts | |
307 | 330 | |
308 | 331 | |
309 | 332 | -- Sort of like 'unlines' but without the trailing newline |
310 |
joinLinesS :: Seq.Seq |
|
333 | joinLinesS :: Seq.Seq B.Builder -> B.Builder | |
311 | 334 | joinLinesS s = case Seq.viewl s of |
312 | 335 | Seq.EmptyL -> "" |
313 | 336 | t Seq.:< ts |
314 | 337 | | F.null ts -> t |
315 |
| otherwise -> t <> |
|
338 | | otherwise -> t <> B.fromString "\n" <> joinLinesS ts | |
316 | 339 | |
317 | 340 | |
318 | 341 | -- Sort of like 'unlines' but without the trailing newline |
319 |
unwordsS :: Seq.Seq |
|
342 | unwordsS :: Seq.Seq B.Builder -> B.Builder | |
320 | 343 | unwordsS s = case Seq.viewl s of |
321 | 344 | Seq.EmptyL -> "" |
322 | 345 | t Seq.:< ts |
326 | 349 | |
327 | 350 | -- Indents every line n spaces, and adds a newline to the beginning |
328 | 351 | -- used in swung indents |
329 |
indentAllS :: Int -> Seq.Seq |
|
352 | indentAllS :: Int -> Seq.Seq B.Builder -> B.Builder | |
330 | 353 | indentAllS n = ("\n" <>) . joinLinesS . fmap (indent n) |
331 | 354 | |
332 | 355 | |
333 | 356 | -- Indents every line but the first by some amount |
334 | 357 | -- used in aligned indents |
335 |
indentSubsequentS :: Int -> Seq.Seq |
|
358 | indentSubsequentS :: Int -> Seq.Seq B.Builder -> B.Builder | |
336 | 359 | indentSubsequentS n s = case Seq.viewl s of |
337 | 360 | Seq.EmptyL -> "" |
338 | 361 | t Seq.:< ts |
339 | 362 | | F.null ts -> t |
340 | 363 | | otherwise -> joinLinesS (t Seq.<| fmap (indent n) ts) |
341 | -- where go = fmap (indent n) | |
342 | 364 | |
343 | 365 | |
344 | 366 | -- oh god this code is so disgusting |
348 | 370 | |
349 | 371 | -- | Pretty-print a 'SExpr' according to the options in a |
350 | 372 | -- 'LayoutOptions' value. |
351 |
prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> T |
|
373 | prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text | |
352 | 374 | prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of |
353 | 375 | Nothing |
354 |
| indentPrint -> |
|
376 | | indentPrint -> unboundIndentPrintSExpr pr (fromCarrier expr) | |
355 | 377 | | otherwise -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr)) |
356 | Just _ -> indentPrintSExpr' pr expr | |
357 | ||
358 | ||
359 | indentPrintSExpr' :: SExprPrinter a (SExpr a) -> SExpr a -> Text | |
360 |
|
|
378 | Just w -> indentPrintSExpr' w pr expr | |
379 | ||
380 | ||
381 | indentPrintSExpr' :: Int -> SExprPrinter a (SExpr a) -> SExpr a -> TL.Text | |
382 | indentPrintSExpr' maxAmt pr@SExprPrinter { .. } = B.toLazyText . pp 0 . toIntermediate pr | |
361 | 383 | where |
362 | pp _ IEmpty = "()" | |
363 | pp _ (IAtom t) = t | |
364 |
pp |
|
384 | pp _ IEmpty = B.fromString "()" | |
385 | pp _ (IAtom t) = B.fromText t | |
386 | pp ind (IList i sz h values end) = | |
387 | -- we always are going to have a head, a (possibly empty) body, | |
388 | -- and a (possibly empty) tail in our list formats | |
389 | B.fromString "(" <> hd <> body <> tl <> B.fromString ")" | |
365 | 390 | where |
391 | -- the tail is either nothing, or the final dotted pair | |
366 | 392 | tl = case end of |
367 | Nothing -> "" | |
368 | Just x -> " . " <> x | |
393 | Nothing -> mempty | |
394 | Just x -> B.fromString " . " <> B.fromText x | |
395 | -- the head is the pretty-printed head, with an ambient | |
396 | -- indentation of +1 to account for the left paren | |
369 | 397 | hd = pp (ind+1) h |
370 | flat = unwordsS (fmap (pp (ind + 1)) values) | |
371 | headWidth = T.length hd + 1 | |
398 | headWidth = sizeSum (sizeOf h) | |
372 | 399 | indented = |
373 | 400 | case i of |
374 | 401 | SwingAfter n -> |
384 | 411 | indentSubsequentS (ind + headWidth + 1) |
385 | 412 | (fmap (pp (ind + headWidth + 1)) values) |
386 | 413 | body |
387 | | length values == 0 = "" | |
388 | | Just maxAmt <- maxWidth | |
389 | , T.length flat + ind > maxAmt = " " <> indented | |
390 | | otherwise = " " <> flat | |
414 | -- if there's nothing here, then we don't have anything to | |
415 | -- indent | |
416 | | length values == 0 = mempty | |
417 | -- if we can't fit the whole next s-expression on the same | |
418 | -- line, then we use the indented form | |
419 | | sizeSum sz + ind > maxAmt = B.fromString " " <> indented | |
420 | | otherwise = | |
421 | -- otherwise we print the whole thing on one line! | |
422 | B.fromString " " <> unwordsS (fmap (pp (ind + 1)) values) | |
391 | 423 | |
392 | 424 | |
393 | 425 | -- if we don't indent anything, then we can ignore a bunch of the |
394 | 426 | -- details above |
395 | flatPrintSExpr :: SExpr Text -> Text | |
396 | flatPrintSExpr = TL.toStrict . B.toLazyText . pHead | |
427 | flatPrintSExpr :: SExpr Text -> TL.Text | |
428 | flatPrintSExpr = B.toLazyText . pHead | |
397 | 429 | where |
398 | 430 | pHead (SCons x xs) = |
399 | 431 | B.fromString "(" <> pHead x <> pTail xs |
413 | 445 | -- 'SExprPrinter'. |
414 | 446 | encodeOne :: SExprPrinter atom carrier -> carrier -> Text |
415 | 447 | encodeOne s@(SExprPrinter { .. }) = |
416 |
|
|
448 | TL.toStrict . prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier | |
417 | 449 | |
418 | 450 | -- | Turn a list of s-expressions into a single string according to |
419 | 451 | -- a given 'SExprPrinter'. |
420 | 452 | encode :: SExprPrinter atom carrier -> [carrier] -> Text |
421 |
encode spec = |
|
453 | encode spec = | |
454 | T.intercalate "\n\n" . map (encodeOne spec) | |
455 | ||
456 | -- | Turn a single s-expression into a lazy 'Text' according to a given | |
457 | -- 'SExprPrinter'. | |
458 | encodeOneLazy :: SExprPrinter atom carrier -> carrier -> TL.Text | |
459 | encodeOneLazy s@(SExprPrinter { .. }) = | |
460 | prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier | |
461 | ||
462 | -- | Turn a list of s-expressions into a lazy 'Text' according to | |
463 | -- a given 'SExprPrinter'. | |
464 | encodeLazy :: SExprPrinter atom carrier -> [carrier] -> TL.Text | |
465 | encodeLazy spec = TL.intercalate "\n\n" . map (encodeOneLazy spec) |