gdritter repos s-cargot / 2b3a68d
Added width-insensitive efficient pretty-printer Getty Ritter 6 years ago
3 changed file(s) with 184 addition(s) and 6 deletion(s). Collapse all Expand all
1717 -- * Default Printing Strategies
1818 , basicPrint
1919 , flatPrint
20 , unboundIndentPrint
2021 ) where
2122
23 import qualified Data.Foldable as F
2224 import Data.Monoid ((<>))
25 import qualified Data.Sequence as Seq
2326 import Data.Text (Text)
2427 import qualified Data.Text as T
2528 import qualified Data.Text.Lazy as TL
2629 import qualified Data.Text.Lazy.Builder as B
30 import qualified Data.Traversable as T
2731
2832 import Data.SCargot.Repr
33
2934
3035 -- | The 'Indent' type is used to determine how to indent subsequent
3136 -- s-expressions in a list, after printing the head of the list.
5459 -- > quux)
5560 deriving (Eq, Show)
5661
62
5763 -- | A 'SExprPrinter' value describes how to print a given value as an
5864 -- s-expression. The @carrier@ type parameter indicates the value
5965 -- that will be printed, and the @atom@ parameter indicates the type
6975 , indentAmount :: Int
7076 -- ^ How much to indent after a swung indentation.
7177 , maxWidth :: Maybe Int
72 -- ^ The maximum width (if any) If this is 'None' then
73 -- the resulting s-expression will always be printed
74 -- on a single line.
78 -- ^ The maximum width (if any) If this is 'None' then the
79 -- resulting s-expression might be printed on one line (if
80 -- 'indentPrint' is 'False') and might be pretty-printed in
81 -- the most naive way possible (if 'indentPrint' is 'True').
82 , indentPrint :: Bool
83 -- ^ Whether to indent or not. This has been retrofitted onto
7584 }
7685
77 -- | A default 'LayoutOptions' struct that will always print a 'SExpr'
86
87 -- | A default 'SExprPrinter' struct that will always print a 'SExpr'
7888 -- as a single line.
7989 flatPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
8090 flatPrint printer = SExprPrinter
8393 , swingIndent = const Swing
8494 , indentAmount = 2
8595 , maxWidth = Nothing
96 , indentPrint = False
8697 }
8798
88 -- | A default 'LayoutOptions' struct that will always swing subsequent
99 -- | A default 'SExprPrinter' struct that will always swing subsequent
89100 -- expressions onto later lines if they're too long, indenting them
90101 -- by two spaces.
91102 basicPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
95106 , swingIndent = const Swing
96107 , indentAmount = 2
97108 , maxWidth = Just 80
109 , indentPrint = True
98110 }
111
112 unboundIndentPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
113 unboundIndentPrint printer = SExprPrinter
114 { atomPrinter = printer
115 , fromCarrier = id
116 , swingIndent = const Swing
117 , indentAmount = 2
118 , maxWidth = Nothing
119 , indentPrint = True
120 }
121
122 -- | This is an intermediate representation which is like (but not
123 -- identical to) a RichSExpr representation. In particular, it has a
124 -- special case for empty lists, and it also keeps a single piece of
125 -- indent information around for each list
126 data Intermediate
127 = IAtom Text
128 | IList Indent (Seq.Seq Intermediate) (Maybe Text)
129 | IEmpty
130
131
132 toIntermediate :: SExprPrinter a (SExpr a) -> SExpr a -> Intermediate
133 toIntermediate
134 SExprPrinter { atomPrinter = printAtom
135 , swingIndent = swing
136 } = headOf
137 where
138 headOf (SAtom a) = IAtom (printAtom a)
139 headOf SNil = IEmpty
140 headOf (SCons x xs) =
141 gather (swing x) (Seq.singleton (headOf x)) xs
142 gather sw rs SNil =
143 IList sw rs Nothing
144 gather sw rs (SAtom a) =
145 IList sw rs (Just (printAtom a))
146 gather sw rs (SCons x xs) =
147 gather sw (rs Seq.|> headOf x) xs
148
149
150 unboundIndentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
151 unboundIndentPrintSExpr spec = finalize . go . toIntermediate spec
152 where
153 finalize = B.toLazyText . F.foldMap (<> B.fromString "\n")
154
155 go :: Intermediate -> Seq.Seq B.Builder
156 go (IAtom t) = Seq.singleton (B.fromText t)
157 go IEmpty = Seq.singleton (B.fromString "()")
158 -- this case should never be called with an empty argument to
159 -- @values@, as that should have been translated to @IEmpty@
160 -- instead.
161 go (IList iv values rest)
162 -- if we're looking at an s-expression that has no nested
163 -- s-expressions, then we might as well consider it flat and let
164 -- it take the whole line
165 | Just strings <- T.traverse ppBasic values =
166 Seq.singleton (B.fromString "(" <> buildUnwords strings <> pTail rest)
167
168 -- it's not "flat", so we might want to swing after the first thing
169 | Swing <- iv =
170 -- if this match fails, then it means we've failed to
171 -- convert to an Intermediate correctly!
172 let x Seq.:< xs = Seq.viewl values
173 butLast = insertParen (go x) <> fmap doIndent (F.foldMap go xs)
174 in handleTail rest butLast
175
176 -- ...or after several things
177 | SwingAfter n <- iv =
178 let (hs, xs) = Seq.splitAt n values
179 hd = B.fromString "(" <> buildUnwords (F.foldMap go hs)
180 butLast = hd Seq.<| fmap doIndent (F.foldMap go xs)
181 in handleTail rest butLast
182
183 -- the 'align' choice is clunkier because we need to know how
184 -- deep to indent, so we have to force the first builder to grab its size
185 | otherwise =
186 let x Seq.:< xs = Seq.viewl values
187 -- so we grab that and figure out its length plus two (for
188 -- the leading paren and the following space). This uses a
189 -- max because it's possible the first thing is itself a
190 -- multi-line s-expression (in which case it seems like
191 -- using the Align strategy is a terrible idea, but who am
192 -- I to quarrel with the wild fruits upon the Tree of
193 -- Life?)
194 len = 2 + F.maximum (fmap (TL.length . B.toLazyText) (go x))
195 in case Seq.viewl xs of
196 -- if there's nothing after the head of the expression, then
197 -- we simply close it
198 Seq.EmptyL -> insertParen (insertCloseParen (go x))
199 -- otherwise, we put the first two things on the same line
200 -- with spaces and everything else gets indended the
201 -- forementioned length
202 y Seq.:< ys ->
203 let hd = B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [x, y]))
204 butLast = hd Seq.<| fmap (doIndentOf (fromIntegral len)) (F.foldMap go ys)
205 in handleTail rest butLast
206 -- B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [x, y]))
207 -- Seq.<| fmap (doIndentOf (fromIntegral len)) (handleTail rest (F.foldMap go ys))
208
209 doIndent :: B.Builder -> B.Builder
210 doIndent = doIndentOf (indentAmount spec)
211
212 doIndentOf :: Int -> B.Builder -> B.Builder
213 doIndentOf n b = B.fromText (T.replicate n " ") <> b
214
215 insertParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder
216 insertParen s = case Seq.viewl s of
217 Seq.EmptyL -> s
218 x Seq.:< xs -> (B.fromString "(" <> x) Seq.<| xs
219
220 handleTail :: Maybe Text -> Seq.Seq B.Builder -> Seq.Seq B.Builder
221 handleTail Nothing = insertCloseParen
222 handleTail (Just t) =
223 (Seq.|> (B.fromString "." <> B.fromText t <> B.fromString ")"))
224
225 insertCloseParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder
226 insertCloseParen s = case Seq.viewr s of
227 Seq.EmptyR -> Seq.singleton (B.fromString ")")
228 xs Seq.:> x -> xs Seq.|> (x <> B.fromString ")")
229
230 buildUnwords sq =
231 case Seq.viewl sq of
232 Seq.EmptyL -> mempty
233 t Seq.:< ts -> t <> F.foldMap (\ x -> B.fromString " " <> x) ts
234
235 pTail Nothing = B.fromString ")"
236 pTail (Just t) = B.fromString ". " <> B.fromText t <> B.fromString ")"
237
238 ppBasic (IAtom t) = Just (B.fromText t)
239 ppBasic (IEmpty) = Just (B.fromString "()")
240 ppBasic _ = Nothing
241
99242
100243 -- | Modify the carrier type of a 'SExprPrinter' by describing how
101244 -- to convert the new type back to the previous type. For example,
108251 setFromCarrier :: (c -> b) -> SExprPrinter a b -> SExprPrinter a c
109252 setFromCarrier fc pr = pr { fromCarrier = fromCarrier pr . fc }
110253
254
111255 -- | Dictate a maximum width for pretty-printed s-expressions.
112256 --
113257 -- >>> let printer = setMaxWidth 8 (basicPrint id)
115259 -- "(one \n two\n three)"
116260 setMaxWidth :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
117261 setMaxWidth n pr = pr { maxWidth = Just n }
262
118263
119264 -- | Allow the serialized s-expression to be arbitrarily wide. This
120265 -- makes all pretty-printing happen on a single line.
124269 -- "(one two three)"
125270 removeMaxWidth :: SExprPrinter atom carrier -> SExprPrinter atom carrier
126271 removeMaxWidth pr = pr { maxWidth = Nothing }
272
127273
128274 -- | Set the number of spaces that a subsequent line will be indented
129275 -- after a swing indentation.
135281 -- "(elephant \n pachyderm)"
136282 setIndentAmount :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
137283 setIndentAmount n pr = pr { indentAmount = n }
284
138285
139286 -- | Dictate how to indent subsequent lines based on the leading
140287 -- subexpression in an s-expression. For details on how this works,
149296 setIndentStrategy :: (SExpr atom -> Indent) -> SExprPrinter atom carrier -> SExprPrinter atom carrier
150297 setIndentStrategy st pr = pr { swingIndent = st }
151298
299
152300 -- Sort of like 'unlines' but without the trailing newline
153301 joinLines :: [Text] -> Text
154302 joinLines = T.intercalate "\n"
155303
304
156305 -- Indents a line by n spaces
157306 indent :: Int -> Text -> Text
158307 indent n ts = T.replicate n " " <> ts
308
159309
160310 -- Indents every line n spaces, and adds a newline to the beginning
161311 -- used in swung indents
162312 indentAll :: Int -> [Text] -> Text
163313 indentAll n = ("\n" <>) . joinLines . map (indent n)
314
164315
165316 -- Indents every line but the first by some amount
166317 -- used in aligned indents
170321 indentSubsequent n (t:ts) = joinLines (t : go ts)
171322 where go = map (indent n)
172323
324
173325 -- oh god this code is so disgusting
174326 -- i'm sorry to everyone i let down by writing this
175327 -- i swear i'll do better in the future i promise i have to
179331 -- 'LayoutOptions' value.
180332 prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
181333 prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of
182 Nothing -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr))
334 Nothing
335 | indentPrint -> TL.toStrict (unboundIndentPrintSExpr pr (fromCarrier expr))
336 | otherwise -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr))
183337 Just _ -> indentPrintSExpr pr expr
338
184339
185340 indentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
186341 indentPrintSExpr SExprPrinter { .. } = pHead 0
2626 , Indent(..)
2727 , basicPrint
2828 , flatPrint
29 , unboundIndentPrint
2930 , setFromCarrier
3031 , setMaxWidth
3132 , removeMaxWidth
7373 prettyPrinter :: SExprPrinter () (SExpr ())
7474 prettyPrinter = basicPrint (const "X")
7575
76 widePrinter :: SExprPrinter () (SExpr ())
77 widePrinter = unboundIndentPrint (const "X")
78
7679
7780 richIso :: SExpr () -> Bool
7881 richIso s = fromRich (toRich s) == s
9699 encDecPretty :: SExpr () -> Bool
97100 encDecPretty s = decodeOne parser (encodeOne prettyPrinter s) == Right s
98101
102 encDecWide :: SExpr () -> Bool
103 encDecWide s = decodeOne parser (encodeOne widePrinter s) == Right s
104
99105 decEnc :: EncodedSExpr -> Bool
100106 decEnc s = decodeOne parser (encoding s) == Right (original s)
101107
109115 (encodeOne prettyPrinter (fromRich s))
110116 == Right s
111117
118 encDecRichWide :: RichSExpr () -> Bool
119 encDecRichWide s =
120 decodeOne (asRich parser)
121 (encodeOne widePrinter (fromRich s))
122 == Right s
123
112124 decEncRich :: EncodedSExpr -> Bool
113125 decEncRich s = decodeOne (asRich parser) (encoding s) == Right (toRich (original s))
114126
120132 encDecWFPretty :: WellFormedSExpr () -> Bool
121133 encDecWFPretty s =
122134 decodeOne (asWellFormed parser) (encodeOne prettyPrinter (fromWellFormed s))
135 == Right s
136
137 encDecWFWide :: WellFormedSExpr () -> Bool
138 encDecWFWide s =
139 decodeOne (asWellFormed parser) (encodeOne widePrinter (fromWellFormed s))
123140 == Right s
124141
125142 decEncWF :: EncodedSExpr -> Bool
170187 reallyQuickCheck encDecRichPretty
171188 reallyQuickCheck encDecWFPretty
172189
190 putStrLn "And it should be true if pretty-printed using the wide-format printer"
191 reallyQuickCheck encDecWide
192 reallyQuickCheck encDecRichWide
193 reallyQuickCheck encDecWFWide
194
173195 putStrLn "Comments should not affect parsing"
174196 reallyQuickCheck encDecLineComments
175197 reallyQuickCheck encDecBlockComments