gdritter repos fulcrum / master
Added some haddocks; removed Lens dependency from Analysis Getty Ritter 9 years ago
6 changed file(s) with 106 addition(s) and 75 deletion(s). Collapse all Expand all
11 module Data.Analysis.Fulcrum.Abstract
2 ( Typeable
3 , Abstract
2 ( Abstract
43 , makeAbs
54 , absShow
65 , absShowS
109 import Data.Monoid
1110 import Data.Typeable (Typeable)
1211
12 -- | An abstract value is a wrapper over Dynamic that supports
13 -- heterogeneous comparison, equality, and retains access to
14 -- the underlying 'show' implementation. This allows us to use
15 -- selectors over possibly heterogeneous values without
16 -- various kinds of type-wrangling.
1317 data Abstract = Abstract
1418 { absVal :: Dynamic
1519 , absEq :: Abstract -> Bool
1721 , absStr :: String
1822 }
1923
24 -- | Print out a comma-separated list of the values contained
25 -- in a list of 'Abstract' values.
2026 absShowS :: [Abstract] -> String
2127 absShowS [] = ""
2228 absShowS [x] = absShow x
2329 absShowS (x:xs) = absShow x ++ "," ++ absShowS xs
2430
31 -- | Get the string representation of the interior value of an
32 -- 'Abstract'. (Note that the 'Show' instance for 'Abstract'
33 -- will also print out the fact that it's an 'Abstract' value,
34 -- whereas this will omit it.)
2535 absShow :: Abstract -> String
2636 absShow = absStr
2737
3040 | Just x <- fromDynamic dyn = op x
3141 | otherwise = def
3242
33 -- | Oftentimes we want to compare two things without knowing that
34 -- they're even the same type. We can wrap them in this and
35 -- ensure that those operations are still possible.
43 -- | Create an 'Abstract' value. This does mean that relevant
44 -- values in your rows must have a 'Typeable' constraint.
3645 makeAbs :: (Typeable t, Ord t, Show t) => t -> Abstract
3746 makeAbs x = Abstract
3847 { absVal = toDyn x
4453 cmpTyp y = dynTypeRep (toDyn x) `compare` dynTypeRep y
4554
4655 instance Eq Abstract where a == b = a `absEq` b
47 instance Show Abstract where show a = "Abstract(" ++ absStr a ++ ")"
56 instance Show Abstract where show a = "makeAbs (" ++ absStr a ++ ")"
4857 instance Ord Abstract where a `compare` b = a `absCmp` b
1 {-# LANGUAGE ExistentialQuantification #-}
2 {-# LANGUAGE TemplateHaskell #-}
3 {-# LANGUAGE RankNTypes #-}
4 {-# LANGUAGE DeriveFunctor #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE ScopedTypeVariables #-}
1 {-# LANGUAGE RecordWildCards #-}
72
83 module Data.Analysis.Fulcrum.Analysis
94 ( module Data.Analysis.Fulcrum.Abstract
5 -- * Plans
6 , Plan(..)
7 , runPlanToMap
8 , defaultPlan
109
11 , runPlanToMap
12
10 -- * Helper Types
1311 , Merge
14 , Plan(..)
15 , defaultPlan
1612 , Select
13 , AbstractTuple
14 , select
1715 , slName
1816 , Result
1917
20 , select
18 -- * Utility Functions
2119 , getUniqVals
2220 , getAxisValues
2321 , getLineValues
2422 , validRowValue
2523
26 , opBy
27 , divBy
24 -- Reduction functions
25 -- , opBy
26 -- , divBy
2827 ) where
2928
3029 import Control.Applicative ((<$>))
3130 import Control.Arrow (second)
32 import Control.Lens
3331 import Data.Dynamic
3432 import Data.Maybe (catMaybes)
3533 import Data.Monoid (Monoid(..), (<>))
3634 import Data.List (nub, sort, find)
3735 import Data.Map.Strict (Map, fromList)
36 import Math.Statistics (average)
3837
3938 import Data.Analysis.Fulcrum.Abstract
4039
41 type AVal = [Abstract]
40 -- | The value extracted by a 'Select' may be the result of
41 -- several combined 'Select'ors, and so is represented
42 -- as a list of 'Abstract' values.
43 type AbstractTuple = [Abstract]
4244
43 -- | A merge function is anything that takes several
44 -- rows and merges them to a single value.
45 -- | A 'Merge' function is anything that takes several
46 -- rows and merges them to a single row.
4547 type Merge r = [r] -> r
4648
47 -- | A selector returns a list of abstract values, and is used
48 -- for extracting from heterogeneous data.
49 -- | A 'Select' encapsulates the ability to extracts a list of abstract
50 -- values as well as the human-readable name for the field it is
51 -- extracting. The 'Monoid' instance for 'Select' allows us to take
52 -- two possibly heterogeneous 'Getter's and combine them.
4953 data Select r = Select
50 { _slLens :: Getter r AVal
54 { _slLens :: r -> AbstractTuple
5155 , _slName :: String
5256 }
5357
58 -- | The human-readable name of a 'Select'.
5459 slName :: Select r -> String
5560 slName = _slName
5661
57 -- | Turn a getter for a specific type into a getter of a generic
58 -- value (so we can compare possibly across types)
59 select :: (Typeable a, Ord a, Show a) => Getter r a -> String -> Select r
60 select l s = Select (l.to go) s
62 -- | Turn a 'Getter' for a specific type as well as its human-readable
63 -- equivalent into a 'Getter' of a generic value.
64 select :: (Typeable a, Ord a, Show a) => (r -> a) -> String -> Select r
65 select l s = Select (go . l) s
6166 where go x = [makeAbs x]
6267
6368 -- | A convenience function to combine strings with commas
6873
6974 instance Monoid (Select r) where
7075 mappend (Select l ln) (Select r rn) =
71 Select (to (\ x -> x^.l <> x^.r)) (comma ln rn)
72 mempty = Select (to (const mempty)) ""
76 Select (\ x -> l x <> r x) (comma ln rn)
77 mempty = Select (const mempty) ""
7378
7479 -- | A convenience function to run a selector over data
75 extract :: Select r -> r -> AVal
76 extract (Select l _) r = (r^.l)
80 extract :: Select r -> r -> AbstractTuple
81 extract (Select l _) r = (l r)
7782
78 -- | A plan describes how to go from a dataset of type [row]
79 -- and arrive at a Result map
83 -- | A plan describes how to go from a dataset of type @[row]@
84 -- and arrive at a @Result@ map
8085 data Plan row focus = Plan
81 { planFocus :: Getter row focus -- ^ The final dependent variable
86 { planFocus :: row -> focus -- ^ The final dependent variable
8287 , planFCName :: String -- ^ The name of the dependent variable
8388 , planName :: String -- ^ The name of the plan
8489 , planFilters :: row -> Bool -- ^ select which rows to keep
8994 , planLinAxis :: Bool -- ^ true if the axis is linear
9095 }
9196
92 defaultPlan :: Plan a a
97 -- | The default plan has sensible defaults where possible, and
98 -- is filled in with undefined functions
99 defaultPlan :: (Floating b) => Plan a b
93100 defaultPlan = Plan
94101 { planFocus = error "undefined focus"
95102 , planFCName = "undefined"
98105 , planMaps = id
99106 , planAxis = mempty
100107 , planLines = mempty
101 , planMerge = error "undefined merge"
108 , planMerge = average
102109 , planLinAxis = True
103110 }
104111
105112 -- | The result of a plan is always going to be a map from
106113 -- (abstract but comparable) keys to some kind of result
107114 -- value.
108 type Result r = Map (AVal, AVal) r
115 type Result r = Map (AbstractTuple, AbstractTuple) r
109116
110117 -- | Given a selector and a list of rows, groups into sub-lists that
111118 -- share a common value for the selector.
114121 where go v r = extract l r == v
115122
116123 -- | Extract every unique value that appears in the data
117 getUniqVals :: (Select r) -> [r] -> [AVal]
124 getUniqVals :: (Select r) -> [r] -> [AbstractTuple]
118125 getUniqVals l rs = sort (nub (map (extract l) rs))
119126
120127 -- | Merge the focused value using the merge function
121 combine :: Getter r f -> Merge f -> [r] -> f
122 combine focus merge = merge . map (^.focus)
128 combine :: (r -> f) -> Merge f -> [r] -> f
129 combine focus merge = merge . map focus
123130
124131 -- | Group into a map based on the axis values and lines
125 group :: Select r -> Select r -> [r] -> [((AVal, AVal), [r])]
132 group :: Select r -> Select r -> [r] -> [((AbstractTuple, AbstractTuple), [r])]
126133 group l a rs =
127134 [ ((extract a (head r), extract l (head r)), r)
128135 | r <- groupOn (l <> a) rs
129136 ]
130137
131 -- | Execute a given plan on a set of data
138 -- | Execute a given 'Plan' on a set of data and return the 'Result' set.
132139 runPlanToMap :: Plan r f -> [r] -> Result f
133140 runPlanToMap p@(Plan { .. }) rawData =
134141 fromList $ map (second (combine planFocus planMerge))
138145 $ rawData
139146
140147 -- | A convenience function for writing maps over rows
141 opBy :: (a -> a -> a) -> Setter' r a -> Getter r a -> Getter r a -> r -> r
142 opBy op t l r x = set t ((x^.l) `op` (x^.r)) x
148 opBy :: (a -> a -> a) -> (a -> r -> r) -> (r -> a) -> (r -> a) -> r -> r
149 opBy op set l r x = set ((l x) `op` (r x)) x
143150
144151 -- | A specialized convenience function for normalizing a given value
145152 -- by another by division
146 divBy :: Lens' r Double -> Getter r Double -> r -> r
147 divBy l = opBy (/) l l
153 -- divBy :: Lens' r Double -> (r -> Double) -> r -> r
154 -- divBy l = opBy (/) l (^.l)
148155
149156 -- | A specialized convenience function for normalizing a given value
150157 -- by another using an nth root
151 rootBy :: Lens' r Double -> Getter r Double -> r -> r
152 rootBy l = undefined
158 -- rootBy :: Lens' r Double -> (r -> Double) -> r -> r
159 -- rootBy l = undefined
153160
154161 -- | Get the unique points to graph along the X axis
155 getAxisValues :: Plan r f -> [r] -> [AVal]
162 getAxisValues :: Plan r f -> [r] -> [AbstractTuple]
156163 getAxisValues (Plan { .. }) rows =
157164 getUniqVals planAxis (filter planFilters rows)
158165
161168 getAxisName = slName . planAxis
162169
163170 -- | Get the unique field values which determine the set of lines to produce
164 getLineValues :: Plan r f -> [r] -> [AVal]
171 getLineValues :: Plan r f -> [r] -> [AbstractTuple]
165172 getLineValues (Plan { .. }) rows =
166173 getUniqVals planLines (filter planFilters rows)
167174
170177 getLineName = slName . planLines
171178
172179 -- | Determine whether a given (abstract) value appears in the data set
173 validRowValue :: Select r -> [r] -> AVal -> Bool
180 validRowValue :: Select r -> [r] -> AbstractTuple -> Bool
174181 validRowValue l rs a = any (== a) (getUniqVals l rs)
44
55 module Data.Analysis.Fulcrum.Plot (doPlot, doLogPlot) where
66
7 import Control.Lens ((^.), (.~), (&), set, Lens', to)
7 import Control.Lens((.~))
88 import Data.Colour (opaque, Colour)
99 import Data.Colour.Names
1010 import Data.Default.Class (def)
1515 import Math.Statistics (stddev, average)
1616
1717 import Data.Analysis.Fulcrum.Analysis
18
19 type AVal = [Abstract]
2018
2119 colors :: [Colour Double]
2220 colors = cycle [ blue, red, green, orange, purple, yellow ]
3432 where axisNames = getAxisValues plan rows
3533
3634 plotLines :: (RealFloat f, PlotValue g) =>
37 Bool -> (Result f) -> String -> [(Int, AVal)]
38 -> AVal -> Colour Double -> (f -> g) -> [PlotLines Int g]
35 Bool -> (Result f) -> String -> [(Int, AbstractTuple)]
36 -> AbstractTuple -> Colour Double -> (f -> g) -> [PlotLines Int g]
3937 plotLines True vals axisTitle axisNames ln c conv =
4038 [ plot_lines_style .~ lineStyle 4 c
4139 $ plot_lines_values .~
4846 ]
4947 plotLines False _ _ _ _ _ _ = []
5048
51 plotDots :: (RealFloat f, PlotValue g) => Result f -> [(Int, AVal)] -> AVal
49 plotDots :: (RealFloat f, PlotValue g) => Result f -> [(Int, AbstractTuple)] -> AbstractTuple
5250 -> Colour Double -> (f -> g) -> PlotPoints Int g
5351 plotDots vals axisNames ln color conv
5452 = plot_points_style .~ filledCircles 5 (opaque color)
6058 $ def
6159
6260 plotStds :: (RealFloat f) => Result f -> Result f ->
63 [(Int, AVal)] -> AVal -> Colour Double -> PlotErrBars Int f
61 [(Int, AbstractTuple)] -> AbstractTuple -> Colour Double -> PlotErrBars Int f
6462 plotStds avgs stds axisNames ln color
6563 = plot_errbars_line_style .~ lineStyle 1 color
6664 $ plot_errbars_values .~
7169 , not (isNaN (avgs ! (ax, ln))) ]
7270 $ def
7371
72 -- | Given a 'Plan' and a list of rows in which the focus variable is
73 -- numeric and plottable, produce a log-scale plot of the relevant
74 -- values with error bars. The 'doPlot' function will rerun your plan
75 -- taking the 'average' of your focus variables, so custom 'planMerge'
76 -- values will be replaced.
7477 doLogPlot :: Plan r Double -> [r] -> Renderable ()
7578 doLogPlot plan@(Plan { .. }) rows =
7679 toRenderable $ layout_plots .~ map toPlot (concat plots) <>
9093 avgs = runPlan average rows
9194 axisNums = zip [(0::Int)..] axisNames
9295
96 -- | Given a 'Plan' and a list of rows in which the focus variable is
97 -- numeric and plottable, produce a (linear-scale) plot of the relevant
98 -- values with error bars. The 'doPlot' function will rerun your plan
99 -- taking the 'average' and the 'stddev' of your focus variables, so
100 -- custom 'planMerge' values will be replaced.
93101 doPlot :: (RealFloat f, PlotValue f) => Plan r f -> [r] -> Renderable ()
94102 doPlot plan@(Plan { .. }) rows =
95103 toRenderable $ layout_plots .~ map toPlot bars <>
1 module Data.Analysis.Fulcrum.Pretty (showResults) where
1 module Data.Analysis.Fulcrum.Pretty
2 ( showResults
3 ) where
24
3 import Data.List (nub)
5 import Data.List (nub, intersperse)
46 import Data.Map.Strict (Map)
57 import qualified Data.Map.Strict as M
68
79 import Data.Analysis.Fulcrum.Analysis
810
11 -- | Produce a pretty, tabular representation of the results of a 'Plan'
912 showResults :: Show a => Result a -> String
1013 showResults = showTable . tabulate
1114
1316 tabulate res =
1417 [ [""] ++ map absShowS cols ] ++
1518 [ absShowS r :
16 [ maybe "" show (M.lookup (c,r) res)
19 [ maybe "" show $ M.lookup (c,r) res
1720 | c <- cols
1821 ]
1922 | r <- rows
2023 ]
21 where rows = nub $ map snd (M.keys res)
22 cols = nub $ map fst (M.keys res)
23
24 strJoin :: String -> [String] -> String
25 strJoin c [] = []
26 strJoin c (x:[]) = x
27 strJoin c (x:xs) = x ++ c ++ strJoin c xs
24 where rows = nub $ map snd $ M.keys res
25 cols = nub $ map fst $ M.keys res
2826
2927 rowJoin :: [String] -> String
30 rowJoin = strJoin " | "
28 rowJoin = concat . intersperse " | "
3129
3230 showTable :: [[String]] -> String
3331 showTable tab = unlines $ map rowJoin $ map padRow tab
34 where sizes :: [Int]
35 sizes = [ maximum (map (length . (!! n)) tab)
32 where sizes = [ maximum (map (length . (!! n)) tab)
3633 | n <- [0..(length (head tab))-1]
3734 ]
3835 pad len str =
3936 let diff = len - length str
4037 in str ++ (take diff $ repeat ' ')
41 padRow :: [String] -> [String]
4238 padRow = zipWith pad sizes
1 {-|
2 "fulcrum" is a simple way of doing quick-and-dirty pivot table-style
3 data analysis with utility functions to produce both machine- and
4 human-readable output formats, both graphical and textual, for
5 quick data analysis and visualization.
6
7 "fulcrum" relies on "lens" for accessors.
8 -}
9
110 module Data.Analysis.Fulcrum
211 ( module Data.Analysis.Fulcrum.Analysis
312 , module Data.Analysis.Fulcrum.Pretty
55 maintainer: gdritter@galois.com
66 build-type: Simple
77 cabal-version: >=1.10
8 description: "fulcrum" is intended for simple, scriptable, convenient
9 data analysis tasks in the style of pivot tables.
810
911 library
1012 exposed-modules: Data.Analysis.Fulcrum,