gdritter repos s-cargot / 8611635
Improved width-constrained pretty-printer performance + exposed lazy Text versions Getty Ritter 6 years ago
1 changed file(s) with 89 addition(s) and 45 deletion(s). Collapse all Expand all
66 ( -- * Pretty-Printing
77 encodeOne
88 , encode
9 , encodeOneLazy
10 , encodeLazy
911 -- * Pretty-Printing Control
1012 , SExprPrinter
1113 , Indent(..)
125127 , indentPrint = True
126128 }
127129
130 data Size = Size
131 { sizeSum :: !Int
132 , sizeMax :: !Int
133 } deriving (Show)
134
128135 -- | This is an intermediate representation which is like (but not
129136 -- identical to) a RichSExpr representation. In particular, it has a
130137 -- special case for empty lists, and it also keeps a single piece of
131138 -- indent information around for each list
132139 data Intermediate
133140 = IAtom Text
134 | IList Indent Intermediate (Seq.Seq Intermediate) (Maybe Text)
141 -- ^ An atom, already serialized
142 | IList Indent Size Intermediate (Seq.Seq Intermediate) (Maybe Text)
143 -- ^ A (possibly-improper) list, with the intended indentation
144 -- strategy, the head of the list, the main set of elements, and the
145 -- final improper element (if it exists)
135146 | IEmpty
136
147 -- ^ An empty list
148
149 sizeOf :: Intermediate -> Size
150 sizeOf IEmpty = Size 2 2
151 sizeOf (IAtom t) = Size len len where len = T.length t
152 sizeOf (IList _ s _ _ _) = s
153
154 concatSize :: Size -> Size -> Size
155 concatSize l r = Size
156 { sizeSum = sizeSum l + 1 + sizeSum r
157 , sizeMax = sizeMax l `max` sizeMax r
158 }
137159
138160 toIntermediate :: SExprPrinter a (SExpr a) -> SExpr a -> Intermediate
139161 toIntermediate
144166 headOf (SAtom a) = IAtom (printAtom a)
145167 headOf SNil = IEmpty
146168 headOf (SCons x xs) =
147 gather (swing x) (headOf x) (Seq.empty) xs
148 gather sw hd rs SNil =
149 IList sw hd rs Nothing
150 gather sw hd rs (SAtom a) =
151 IList sw hd rs (Just (printAtom a))
152 gather sw hd rs (SCons x xs) =
153 gather sw hd (rs Seq.|> headOf x) xs
169 gather (swing x) hd Seq.empty xs (sizeOf hd) where hd = headOf x
170 gather sw hd rs SNil sz =
171 IList sw sz hd rs Nothing
172 gather sw hd rs (SAtom a) sz =
173 IList sw (sz `concatSize` aSize) hd rs (Just aStr)
174 where aSize = Size (T.length aStr) (T.length aStr)
175 aStr = printAtom a
176 gather sw hd rs (SCons x xs) sz =
177 gather sw hd (rs Seq.|> x') xs (sz `concatSize` sizeOf x')
178 where x' = headOf x
154179
155180
156181 unboundIndentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
157182 unboundIndentPrintSExpr spec = finalize . go . toIntermediate spec
158183 where
159 finalize = B.toLazyText . F.foldMap (<> B.fromString "\n")
184 finalize = B.toLazyText . joinLinesS
160185
161186 go :: Intermediate -> Seq.Seq B.Builder
162187 go (IAtom t) = Seq.singleton (B.fromText t)
164189 -- this case should never be called with an empty argument to
165190 -- @values@, as that should have been translated to @IEmpty@
166191 -- instead.
167 go (IList iv initial values rest)
192 go (IList iv _ initial values rest)
168193 -- if we're looking at an s-expression that has no nested
169194 -- s-expressions, then we might as well consider it flat and let
170195 -- it take the whole line
207232 let hd = B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [initial, y]))
208233 butLast = hd Seq.<| fmap (doIndentOf (fromIntegral len)) (F.foldMap go ys)
209234 in handleTail rest butLast
210 -- B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [x, y]))
211 -- Seq.<| fmap (doIndentOf (fromIntegral len)) (handleTail rest (F.foldMap go ys))
212235
213236 doIndent :: B.Builder -> B.Builder
214237 doIndent = doIndentOf (indentAmount spec)
224247 handleTail :: Maybe Text -> Seq.Seq B.Builder -> Seq.Seq B.Builder
225248 handleTail Nothing = insertCloseParen
226249 handleTail (Just t) =
227 (Seq.|> (B.fromString "." <> B.fromText t <> B.fromString ")"))
250 (Seq.|> (B.fromString " . " <> B.fromText t <> B.fromString ")"))
228251
229252 insertCloseParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder
230253 insertCloseParen s = case Seq.viewr s of
237260 t Seq.:< ts -> t <> F.foldMap (\ x -> B.fromString " " <> x) ts
238261
239262 pTail Nothing = B.fromString ")"
240 pTail (Just t) = B.fromString ". " <> B.fromText t <> B.fromString ")"
263 pTail (Just t) = B.fromString " . " <> B.fromText t <> B.fromString ")"
241264
242265 ppBasic (IAtom t) = Just (B.fromText t)
243266 ppBasic (IEmpty) = Just (B.fromString "()")
302325
303326
304327 -- Indents a line by n spaces
305 indent :: Int -> Text -> Text
306 indent n ts = T.replicate n " " <> ts
328 indent :: Int -> B.Builder -> B.Builder
329 indent n ts = B.fromText (T.replicate n " ") <> ts
307330
308331
309332 -- Sort of like 'unlines' but without the trailing newline
310 joinLinesS :: Seq.Seq Text -> Text
333 joinLinesS :: Seq.Seq B.Builder -> B.Builder
311334 joinLinesS s = case Seq.viewl s of
312335 Seq.EmptyL -> ""
313336 t Seq.:< ts
314337 | F.null ts -> t
315 | otherwise -> t <> "\n" <> joinLinesS ts
338 | otherwise -> t <> B.fromString "\n" <> joinLinesS ts
316339
317340
318341 -- Sort of like 'unlines' but without the trailing newline
319 unwordsS :: Seq.Seq Text -> Text
342 unwordsS :: Seq.Seq B.Builder -> B.Builder
320343 unwordsS s = case Seq.viewl s of
321344 Seq.EmptyL -> ""
322345 t Seq.:< ts
326349
327350 -- Indents every line n spaces, and adds a newline to the beginning
328351 -- used in swung indents
329 indentAllS :: Int -> Seq.Seq Text -> Text
352 indentAllS :: Int -> Seq.Seq B.Builder -> B.Builder
330353 indentAllS n = ("\n" <>) . joinLinesS . fmap (indent n)
331354
332355
333356 -- Indents every line but the first by some amount
334357 -- used in aligned indents
335 indentSubsequentS :: Int -> Seq.Seq Text -> Text
358 indentSubsequentS :: Int -> Seq.Seq B.Builder -> B.Builder
336359 indentSubsequentS n s = case Seq.viewl s of
337360 Seq.EmptyL -> ""
338361 t Seq.:< ts
339362 | F.null ts -> t
340363 | otherwise -> joinLinesS (t Seq.<| fmap (indent n) ts)
341 -- where go = fmap (indent n)
342364
343365
344366 -- oh god this code is so disgusting
348370
349371 -- | Pretty-print a 'SExpr' according to the options in a
350372 -- 'LayoutOptions' value.
351 prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
373 prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
352374 prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of
353375 Nothing
354 | indentPrint -> TL.toStrict (unboundIndentPrintSExpr pr (fromCarrier expr))
376 | indentPrint -> unboundIndentPrintSExpr pr (fromCarrier expr)
355377 | otherwise -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr))
356 Just _ -> indentPrintSExpr' pr expr
357
358
359 indentPrintSExpr' :: SExprPrinter a (SExpr a) -> SExpr a -> Text
360 indentPrintSExpr' pr@SExprPrinter { .. } = pp 0 . toIntermediate pr
378 Just w -> indentPrintSExpr' w pr expr
379
380
381 indentPrintSExpr' :: Int -> SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
382 indentPrintSExpr' maxAmt pr@SExprPrinter { .. } = B.toLazyText . pp 0 . toIntermediate pr
361383 where
362 pp _ IEmpty = "()"
363 pp _ (IAtom t) = t
364 pp ind (IList i h values end) = "(" <> hd <> body <> tl <> ")"
384 pp _ IEmpty = B.fromString "()"
385 pp _ (IAtom t) = B.fromText t
386 pp ind (IList i sz h values end) =
387 -- we always are going to have a head, a (possibly empty) body,
388 -- and a (possibly empty) tail in our list formats
389 B.fromString "(" <> hd <> body <> tl <> B.fromString ")"
365390 where
391 -- the tail is either nothing, or the final dotted pair
366392 tl = case end of
367 Nothing -> ""
368 Just x -> " . " <> x
393 Nothing -> mempty
394 Just x -> B.fromString " . " <> B.fromText x
395 -- the head is the pretty-printed head, with an ambient
396 -- indentation of +1 to account for the left paren
369397 hd = pp (ind+1) h
370 flat = unwordsS (fmap (pp (ind + 1)) values)
371 headWidth = T.length hd + 1
398 headWidth = sizeSum (sizeOf h)
372399 indented =
373400 case i of
374401 SwingAfter n ->
384411 indentSubsequentS (ind + headWidth + 1)
385412 (fmap (pp (ind + headWidth + 1)) values)
386413 body
387 | length values == 0 = ""
388 | Just maxAmt <- maxWidth
389 , T.length flat + ind > maxAmt = " " <> indented
390 | otherwise = " " <> flat
414 -- if there's nothing here, then we don't have anything to
415 -- indent
416 | length values == 0 = mempty
417 -- if we can't fit the whole next s-expression on the same
418 -- line, then we use the indented form
419 | sizeSum sz + ind > maxAmt = B.fromString " " <> indented
420 | otherwise =
421 -- otherwise we print the whole thing on one line!
422 B.fromString " " <> unwordsS (fmap (pp (ind + 1)) values)
391423
392424
393425 -- if we don't indent anything, then we can ignore a bunch of the
394426 -- details above
395 flatPrintSExpr :: SExpr Text -> Text
396 flatPrintSExpr = TL.toStrict . B.toLazyText . pHead
427 flatPrintSExpr :: SExpr Text -> TL.Text
428 flatPrintSExpr = B.toLazyText . pHead
397429 where
398430 pHead (SCons x xs) =
399431 B.fromString "(" <> pHead x <> pTail xs
413445 -- 'SExprPrinter'.
414446 encodeOne :: SExprPrinter atom carrier -> carrier -> Text
415447 encodeOne s@(SExprPrinter { .. }) =
416 prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
448 TL.toStrict . prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
417449
418450 -- | Turn a list of s-expressions into a single string according to
419451 -- a given 'SExprPrinter'.
420452 encode :: SExprPrinter atom carrier -> [carrier] -> Text
421 encode spec = T.intercalate "\n\n" . map (encodeOne spec)
453 encode spec =
454 T.intercalate "\n\n" . map (encodeOne spec)
455
456 -- | Turn a single s-expression into a lazy 'Text' according to a given
457 -- 'SExprPrinter'.
458 encodeOneLazy :: SExprPrinter atom carrier -> carrier -> TL.Text
459 encodeOneLazy s@(SExprPrinter { .. }) =
460 prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
461
462 -- | Turn a list of s-expressions into a lazy 'Text' according to
463 -- a given 'SExprPrinter'.
464 encodeLazy :: SExprPrinter atom carrier -> [carrier] -> TL.Text
465 encodeLazy spec = TL.intercalate "\n\n" . map (encodeOneLazy spec)