gdritter repos s-cargot / ed9cd8d
New pretty-printer with greatly improved performance. The implementation is not pretty, and there is much more tedious explicitness than graceful eloquence in this code. The problem domain is difficult to solve, but the general approach here is: 1. Pre-calculate *unconstrained* widths of each element (possibly as a composition of its sub-elements. 2. For each multi-part sub-expression, consider the following (in order of preference): a. All elements on one line. b. If first element is an `(SCons (SAtom _) r)` then try to place the atom and the first element of `r` on the same line, lining up all subsequent elements of `r` beneath the first one (essentially the `Align` indentation). c. All elements are on their own line. Kevin Quick 6 years ago
1 changed file(s) with 204 addition(s) and 42 deletion(s). Collapse all Expand all
1919 , flatPrint
2020 ) where
2121
22 import Control.Applicative
2223 import Data.Monoid ((<>))
2324 import Data.Text (Text)
2425 import qualified Data.Text as T
180181 prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
181182 prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of
182183 Nothing -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr))
183 Just _ -> indentPrintSExpr pr expr
184
185 indentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
186 indentPrintSExpr SExprPrinter { .. } = pHead 0
184 Just w -> indentPrintSExpr2 pr w expr
185
186 indentPrintSExpr :: SExprPrinter a (SExpr a) -> Int -> SExpr a -> Text
187 indentPrintSExpr SExprPrinter { .. } _ = pHead 0
187188 where
188189 pHead _ SNil = "()"
189190 pHead _ (SAtom a) = atomPrinter a
218219 , T.length flat + ind > maxAmt = " " <> indented
219220 | otherwise = " " <> flat
220221
221 -- where
222 -- -- this is the base-case that knows how to print empty lists and
223 -- -- atoms
224 -- pHead _ SNil = B.fromString "()"
225 -- pHead _ (SAtom a) = B.fromText a
226 -- pHead ind (SCons x xs) = gather ind x xs id 0
227
228 -- -- otherwise, we trawl through the list grabbing every element...
229 -- gather ind h (SCons x xs) k r = gather ind h xs (k . (x:)) (r + T.length x)
230 -- gather ind h end k r = B.fromString "(" <> hd <> body <> tl <> B.fromString ")"
231 -- where
232 -- tl = case end of
233 -- SNil -> mempty
234 -- SAtom a -> B.fromString " . " <> B.fromText a
235 -- SCons _ _ -> error "[unreachable]"
236 -- hd = indentSubsequent ind [pHead (ind+1) h]
237 -- lst = k []
238 -- flat = T.unwords (map (pHead (ind+1)) lst)
239 -- headWidth = T.length hd + 1
240 -- indented =
241 -- case swingIndent h of
242 -- SwingAfter n ->
243 -- let (l, ls) = splitAt n lst
244 -- t = T.unwords (map (pHead (ind+1)) l)
245 -- ts = indentAll (ind + indentAmount)
246 -- (map (pHead (ind + indentAmount)) ls)
247 -- in t <> ts
248 -- Swing ->
249 -- indentAll (ind + indentAmount)
250 -- (map (pHead (ind + indentAmount)) lst)
251 -- Align ->
252 -- indentSubsequent (ind + headWidth + 1)
253 -- (map (pHead (ind + headWidth + 1)) lst)
254 -- body
255 -- | length lst == 0 = B.fromString ""
256 -- | Just maxAmt <- maxWidth
257 -- , T.length flat + ind > maxAmt = B.fromString " " <> indented
258 -- | otherwise = B.fromString " " <> flat
222 -- | Pretty-printing for S-Expressions. The general strategy is that
223 -- an SCons tail should either all fit on the current line, or else
224 -- each tail item should be placed on its own line with indenting.
225 -- Note that a line must print something, so while subsequent elements
226 -- will be placed on following lines, it is possible that the first
227 -- thing on a line (plus its indentation) may exceed the maxwidth.
228
229 type IndentSpec = Int
230 type Indenting = Maybe IndentSpec
231
232 data PPS = PPS { indentWc :: IndentSpec
233 , remWidth :: Int
234 , numClose :: Int
235 }
236 deriving Show
237
238 data SElem = SText Int T.Text
239 | SPair Int SElem SElem
240 | SDecl Int SElem [SElem]
241 | SJoin Int [SElem]
242 deriving (Show, Eq)
243
244 sElemSize :: SElem -> Int
245 sElemSize (SText n _) = n
246 sElemSize (SPair n _ _) = n
247 sElemSize (SDecl n _ _) = n
248 sElemSize (SJoin n _) = n
249
250 indentPrintSExpr2 :: SExprPrinter a (SExpr a) -> Int -> SExpr a -> Text
251 indentPrintSExpr2 SExprPrinter { .. } maxW sexpr =
252 let atomTextTree = selems sexpr
253 pretty = fmap addIndent $ fst $ pHead (PPS 0 maxW 0) atomTextTree
254 -- prettyWithDebug = pretty <> ["", (T.pack $ show atomTextTree)]
255 in T.unlines pretty
256 where
257 -- selems converts the (SExpr a) into an SElem, converting
258 -- individual atoms to their text format but not concerned with
259 -- other text formatting. The resulting SElem tree will be
260 -- iterated over to determine the wrapping strategy to apply.
261 selems SNil = SText 2 "()"
262 selems (SAtom a) = let p = atomPrinter a in SText (T.length p) p
263 selems (SCons l r) =
264 let l' = selems l
265 lsz = sElemSize l'
266 r' = selems r
267 rsz = sElemSize r'
268 bsz = lsz + rsz
269 in case r of
270 SNil -> SJoin lsz [l']
271 SAtom _ -> SPair bsz l' r'
272 _ -> case l of
273 SAtom _ -> case r' of
274 SJoin _ rl' -> SDecl bsz l' rl'
275 SDecl _ d dl -> SDecl bsz l' (d:dl)
276 _ -> SDecl bsz l' [r']
277 _ -> SJoin bsz $ prefixOnto l' r'
278
279 prefixOnto e (SJoin _ l) = e:l
280 prefixOnto e (SDecl _ l r) = e:l:r
281 prefixOnto e r = [e,r]
282
283 addIndent (Nothing, t) = t
284 addIndent (Just n, t) = indent n t
285
286 nextIndent = incrIndent indentAmount
287 incrIndent v n = n + v
288
289 pHead :: PPS -> SElem -> ( [(Indenting, Text)], PPS )
290 pHead pps (SText _ t) = ( [(Nothing, t)]
291 , pps { remWidth = remWidth pps - T.length t})
292 pHead pps (SPair _ e1 e2) =
293 let (t1,pps1) = pHead pps e1
294 (t2,pps2) = pTail ppsNextLine e2
295 (t3,pps3) = pTail ppsSameLine e2 -- same line
296 ppsNextLine = pps { remWidth = remWidth pps - T.length sep }
297 ppsSameLine = pps1 { remWidth = remWidth pps1 - T.length sep }
298 sep = " . "
299 t1h = head t1
300 wrapJoin i l rs = wrapT i (snd l <> sep) rs
301 sameLine l r p = (wrapJoin (indentWc pps) l r, p)
302 separateLines l r p = (wrapTWith False "(" "" (indentWc pps) "" l ++
303 wrapTWith True sep ")" (indentWc pps) "" r, p)
304 in if length t1 > 1 || remWidth pps3 < numClose pps + 5
305 then separateLines t1 t2 pps2
306 else sameLine t1h t3 pps3
307 -- An SJoin is a sequence of elements at the same rank. They are
308 -- either all placed on a single line, or one on each line.
309 pHead pps (SJoin _ []) = ( [], pps )
310 pHead pps (SJoin els others) =
311 let (t1,_) = pHead pps $ head others
312 (t3,pps3) = foldl pTail' ([], pps) others
313 pTail' :: ([(Indenting, Text)], PPS)
314 -> SElem
315 -> ([(Indenting, Text)], PPS)
316 pTail' (rl,pp) ne = let (rt,pr) = pTail pp ne
317 hrl = head rl
318 hrt = head rt
319 in if length rt == 1
320 then case length rl of
321 0 -> (rt, pr)
322 1 -> ((fst hrl, snd hrl <> " " <> snd hrt):[], pr)
323 _ -> (rl <> rt, pr)
324 else (rl <> rt, pr)
325 sameLine parts pEnd = (wrapT (indentWc ppsSame) "" parts, pEnd)
326 ppsNext = pps { indentWc = nextIndent (indentWc pps)
327 , remWidth = remWidth pps - indentAmount
328 }
329 ppsSame = pps { indentWc = nextIndent (indentWc pps)
330 , remWidth = remWidth pps - indentAmount
331 }
332 ppsMulti = pps { indentWc = nextIndent (indentWc pps)
333 , remWidth = remWidth pps - indentAmount
334 }
335 pps3' = pps3
336 separateLines elems pEnd =
337 let lr = concatMap (fst . pTail pEnd) elems
338 in (wrapTWith False "(" ")" (indentWc ppsNext) "" lr, pEnd)
339 in if els > remWidth pps3 || length t1 > 1 || remWidth pps3 < numClose pps + 5
340 then separateLines others ppsMulti
341 else sameLine t3 pps3'
342 -- For an SDecl, always put the first element on the line. If
343 -- *all* other elements fit on the same line, do that, otherwise
344 -- all other elements should appear on subsequent lines with
345 -- indentation. This will produce left-biased wrapping: wrapping
346 -- will occur near the root of the SExp tree more than at the
347 -- leaves.
348 pHead pps (SDecl els first others) =
349 let (t1,pps1) = pHead pp2 first
350 (to1,_) = pTail pps1 (head others)
351 firstPlusFits = sElemSize first + sElemSize (head others) < (remWidth pps - 4)
352 allFits = els < (remWidth pps - length others - 3)
353 tryFirstArgSameLine = case swingIndent (SCons SNil (SCons SNil SNil)) of
354 Align -> True
355 _ -> False
356 pp2 = pps { indentWc = nextIndent (indentWc pps)
357 , remWidth = remWidth pps - 1 - indentAmount
358 , numClose = numClose pps + 1
359 }
360 pp2next = pp2
361 pp2solo = pp2
362 t1h = head t1
363 pps1' = pps1 { indentWc = incrIndent (T.length (snd t1h) + 1)
364 (indentWc pps1)
365 , remWidth = remWidth pps1 - T.length (snd t1h) - 1
366 }
367 tothers = concatMap (fst . pTail pp2next) others -- multiline
368 tothers' = concatMap (fst . pTail pps1') $ tail others -- multiline from 2nd
369 (others', ppone) = foldl foldPTail ([],pps1) others -- oneline
370 (others'', ppone') = foldl foldPTail ([],pps1') $ tail others -- multiline from 2nd
371 foldPTail (tf,ppf) o = let (ot,opp) = pTail ppf o
372 tf1 = head tf
373 tr = if length ot == 1
374 then case length tf of
375 0 -> ot
376 1 -> [(fst tf1, snd tf1 <> " " <> snd (head ot))]
377 _ -> tf ++ ot
378 else tf ++ ot
379 in (tr, opp)
380 separateLines l r p =
381 let wr = if null r then []
382 else wrapTWith True "" ")" (indentWc p) "" r
383 cl = if null r then ")" else ""
384 in (wrapTWith False "(" cl (indentWc pps) "" l <> wr, pp2)
385 maybeSameLine l (r1,p1) (rn,p2) =
386 if length r1 <= 1 && remWidth p1 > numClose p1
387 then (wrapT (indentWc pps) (snd l <> " ") r1, p1)
388 else separateLines [l] rn p2
389 in if allFits && length t1 < 2
390 then maybeSameLine t1h (others',ppone) (tothers,pp2solo)
391 else if (tryFirstArgSameLine && firstPlusFits &&
392 length t1 < 2 &&
393 length to1 < 2 &&
394 not (null to1) && not (null others))
395 then maybeSameLine (fst t1h,
396 snd t1h <> " " <> snd (head to1)) (others'',ppone') (tothers',pps1')
397 else separateLines t1 tothers pp2
398
399
400 pTail = pHead
401
402
403 wrapTWith :: Bool -> T.Text -> T.Text -> IndentSpec
404 -> T.Text
405 -> [(Indenting, T.Text)]
406 -> [(Indenting, T.Text)]
407 wrapTWith isContinued st en ind hstart ts =
408 let th = head ts
409 tt = last ts
410 tb = init $ tail ts
411 tp l = (fst l <|> Just ind, snd l)
412 fi = if isContinued then Just ind else Nothing
413 in if length ts > 1
414 then (((fi, st <> hstart <> snd th) : map tp tb) ++
415 [ tp $ (fst tt, snd tt <> en) ])
416 else [(fi, st <> hstart <> snd th <> en)]
417
418 wrapT :: IndentSpec -> T.Text -> [(Indenting, T.Text)] -> [(Indenting, T.Text)]
419 wrapT = wrapTWith False "(" ")"
420
259421
260422 -- if we don't indent anything, then we can ignore a bunch of the
261423 -- details above