Added some haddocks; removed Lens dependency from Analysis
Getty Ritter
10 years ago
1 | 1 | module Data.Analysis.Fulcrum.Abstract |
2 | ( Typeable | |
3 | , Abstract | |
2 | ( Abstract | |
4 | 3 | , makeAbs |
5 | 4 | , absShow |
6 | 5 | , absShowS |
10 | 9 | import Data.Monoid |
11 | 10 | import Data.Typeable (Typeable) |
12 | 11 | |
12 | -- | An abstract value is a wrapper over Dynamic that supports | |
13 | -- heterogeneous comparison, equality, and retains access to | |
14 | -- the underlying 'show' implementation. This allows us to use | |
15 | -- selectors over possibly heterogeneous values without | |
16 | -- various kinds of type-wrangling. | |
13 | 17 | data Abstract = Abstract |
14 | 18 | { absVal :: Dynamic |
15 | 19 | , absEq :: Abstract -> Bool |
17 | 21 | , absStr :: String |
18 | 22 | } |
19 | 23 | |
24 | -- | Print out a comma-separated list of the values contained | |
25 | -- in a list of 'Abstract' values. | |
20 | 26 | absShowS :: [Abstract] -> String |
21 | 27 | absShowS [] = "" |
22 | 28 | absShowS [x] = absShow x |
23 | 29 | absShowS (x:xs) = absShow x ++ "," ++ absShowS xs |
24 | 30 | |
31 | -- | Get the string representation of the interior value of an | |
32 | -- 'Abstract'. (Note that the 'Show' instance for 'Abstract' | |
33 | -- will also print out the fact that it's an 'Abstract' value, | |
34 | -- whereas this will omit it.) | |
25 | 35 | absShow :: Abstract -> String |
26 | 36 | absShow = absStr |
27 | 37 | |
30 | 40 | | Just x <- fromDynamic dyn = op x |
31 | 41 | | otherwise = def |
32 | 42 | |
33 | -- | Oftentimes we want to compare two things without knowing that | |
34 | -- they're even the same type. We can wrap them in this and | |
35 |
-- |
|
43 | -- | Create an 'Abstract' value. This does mean that relevant | |
44 | -- values in your rows must have a 'Typeable' constraint. | |
36 | 45 | makeAbs :: (Typeable t, Ord t, Show t) => t -> Abstract |
37 | 46 | makeAbs x = Abstract |
38 | 47 | { absVal = toDyn x |
44 | 53 | cmpTyp y = dynTypeRep (toDyn x) `compare` dynTypeRep y |
45 | 54 | |
46 | 55 | instance Eq Abstract where a == b = a `absEq` b |
47 |
instance Show Abstract where show a = " |
|
56 | instance Show Abstract where show a = "makeAbs (" ++ absStr a ++ ")" | |
48 | 57 | instance Ord Abstract where a `compare` b = a `absCmp` b |
1 | {-# LANGUAGE ExistentialQuantification #-} | |
2 | {-# LANGUAGE TemplateHaskell #-} | |
3 | {-# LANGUAGE RankNTypes #-} | |
4 | {-# LANGUAGE DeriveFunctor #-} | |
5 | {-# LANGUAGE RecordWildCards #-} | |
6 | {-# LANGUAGE ScopedTypeVariables #-} | |
1 | {-# LANGUAGE RecordWildCards #-} | |
7 | 2 | |
8 | 3 | module Data.Analysis.Fulcrum.Analysis |
9 | 4 | ( module Data.Analysis.Fulcrum.Abstract |
5 | -- * Plans | |
6 | , Plan(..) | |
7 | , runPlanToMap | |
8 | , defaultPlan | |
10 | 9 | |
11 | , runPlanToMap | |
12 | ||
10 | -- * Helper Types | |
13 | 11 | , Merge |
14 | , Plan(..) | |
15 | , defaultPlan | |
16 | 12 | , Select |
13 | , AbstractTuple | |
14 | , select | |
17 | 15 | , slName |
18 | 16 | , Result |
19 | 17 | |
20 |
|
|
18 | -- * Utility Functions | |
21 | 19 | , getUniqVals |
22 | 20 | , getAxisValues |
23 | 21 | , getLineValues |
24 | 22 | , validRowValue |
25 | 23 | |
26 | , opBy | |
27 | , divBy | |
24 | -- Reduction functions | |
25 | -- , opBy | |
26 | -- , divBy | |
28 | 27 | ) where |
29 | 28 | |
30 | 29 | import Control.Applicative ((<$>)) |
31 | 30 | import Control.Arrow (second) |
32 | import Control.Lens | |
33 | 31 | import Data.Dynamic |
34 | 32 | import Data.Maybe (catMaybes) |
35 | 33 | import Data.Monoid (Monoid(..), (<>)) |
36 | 34 | import Data.List (nub, sort, find) |
37 | 35 | import Data.Map.Strict (Map, fromList) |
36 | import Math.Statistics (average) | |
38 | 37 | |
39 | 38 | import Data.Analysis.Fulcrum.Abstract |
40 | 39 | |
41 | type AVal = [Abstract] | |
40 | -- | The value extracted by a 'Select' may be the result of | |
41 | -- several combined 'Select'ors, and so is represented | |
42 | -- as a list of 'Abstract' values. | |
43 | type AbstractTuple = [Abstract] | |
42 | 44 | |
43 | -- | A merge function is anything that takes several | |
44 | -- rows and merges them to a single value. | |
45 | -- | A 'Merge' function is anything that takes several | |
46 | -- rows and merges them to a single row. | |
45 | 47 | type Merge r = [r] -> r |
46 | 48 | |
47 | -- | A selector returns a list of abstract values, and is used | |
48 | -- for extracting from heterogeneous data. | |
49 | -- | A 'Select' encapsulates the ability to extracts a list of abstract | |
50 | -- values as well as the human-readable name for the field it is | |
51 | -- extracting. The 'Monoid' instance for 'Select' allows us to take | |
52 | -- two possibly heterogeneous 'Getter's and combine them. | |
49 | 53 | data Select r = Select |
50 |
{ _slLens :: |
|
54 | { _slLens :: r -> AbstractTuple | |
51 | 55 | , _slName :: String |
52 | 56 | } |
53 | 57 | |
58 | -- | The human-readable name of a 'Select'. | |
54 | 59 | slName :: Select r -> String |
55 | 60 | slName = _slName |
56 | 61 | |
57 | -- | Turn a getter for a specific type into a getter of a generic | |
58 | -- value (so we can compare possibly across types) | |
59 | select :: (Typeable a, Ord a, Show a) => Getter r a -> String -> Select r | |
60 | select l s = Select (l.to go) s | |
62 | -- | Turn a 'Getter' for a specific type as well as its human-readable | |
63 | -- equivalent into a 'Getter' of a generic value. | |
64 | select :: (Typeable a, Ord a, Show a) => (r -> a) -> String -> Select r | |
65 | select l s = Select (go . l) s | |
61 | 66 | where go x = [makeAbs x] |
62 | 67 | |
63 | 68 | -- | A convenience function to combine strings with commas |
68 | 73 | |
69 | 74 | instance Monoid (Select r) where |
70 | 75 | mappend (Select l ln) (Select r rn) = |
71 | Select (to (\ x -> x^.l <> x^.r)) (comma ln rn) | |
72 | mempty = Select (to (const mempty)) "" | |
76 | Select (\ x -> l x <> r x) (comma ln rn) | |
77 | mempty = Select (const mempty) "" | |
73 | 78 | |
74 | 79 | -- | A convenience function to run a selector over data |
75 | extract :: Select r -> r -> AVal | |
76 | extract (Select l _) r = (r^.l) | |
80 | extract :: Select r -> r -> AbstractTuple | |
81 | extract (Select l _) r = (l r) | |
77 | 82 | |
78 | -- | A plan describes how to go from a dataset of type [row] | |
79 | -- and arrive at a Result map | |
83 | -- | A plan describes how to go from a dataset of type @[row]@ | |
84 | -- and arrive at a @Result@ map | |
80 | 85 | data Plan row focus = Plan |
81 |
{ planFocus :: |
|
86 | { planFocus :: row -> focus -- ^ The final dependent variable | |
82 | 87 | , planFCName :: String -- ^ The name of the dependent variable |
83 | 88 | , planName :: String -- ^ The name of the plan |
84 | 89 | , planFilters :: row -> Bool -- ^ select which rows to keep |
89 | 94 | , planLinAxis :: Bool -- ^ true if the axis is linear |
90 | 95 | } |
91 | 96 | |
92 | defaultPlan :: Plan a a | |
97 | -- | The default plan has sensible defaults where possible, and | |
98 | -- is filled in with undefined functions | |
99 | defaultPlan :: (Floating b) => Plan a b | |
93 | 100 | defaultPlan = Plan |
94 | 101 | { planFocus = error "undefined focus" |
95 | 102 | , planFCName = "undefined" |
98 | 105 | , planMaps = id |
99 | 106 | , planAxis = mempty |
100 | 107 | , planLines = mempty |
101 |
, planMerge = |
|
108 | , planMerge = average | |
102 | 109 | , planLinAxis = True |
103 | 110 | } |
104 | 111 | |
105 | 112 | -- | The result of a plan is always going to be a map from |
106 | 113 | -- (abstract but comparable) keys to some kind of result |
107 | 114 | -- value. |
108 |
type Result r = Map (A |
|
115 | type Result r = Map (AbstractTuple, AbstractTuple) r | |
109 | 116 | |
110 | 117 | -- | Given a selector and a list of rows, groups into sub-lists that |
111 | 118 | -- share a common value for the selector. |
114 | 121 | where go v r = extract l r == v |
115 | 122 | |
116 | 123 | -- | Extract every unique value that appears in the data |
117 |
getUniqVals :: (Select r) -> [r] -> [A |
|
124 | getUniqVals :: (Select r) -> [r] -> [AbstractTuple] | |
118 | 125 | getUniqVals l rs = sort (nub (map (extract l) rs)) |
119 | 126 | |
120 | 127 | -- | Merge the focused value using the merge function |
121 | combine :: Getter r f -> Merge f -> [r] -> f | |
122 | combine focus merge = merge . map (^.focus) | |
128 | combine :: (r -> f) -> Merge f -> [r] -> f | |
129 | combine focus merge = merge . map focus | |
123 | 130 | |
124 | 131 | -- | Group into a map based on the axis values and lines |
125 |
group :: Select r -> Select r -> [r] -> [((A |
|
132 | group :: Select r -> Select r -> [r] -> [((AbstractTuple, AbstractTuple), [r])] | |
126 | 133 | group l a rs = |
127 | 134 | [ ((extract a (head r), extract l (head r)), r) |
128 | 135 | | r <- groupOn (l <> a) rs |
129 | 136 | ] |
130 | 137 | |
131 |
-- | Execute a given |
|
138 | -- | Execute a given 'Plan' on a set of data and return the 'Result' set. | |
132 | 139 | runPlanToMap :: Plan r f -> [r] -> Result f |
133 | 140 | runPlanToMap p@(Plan { .. }) rawData = |
134 | 141 | fromList $ map (second (combine planFocus planMerge)) |
138 | 145 | $ rawData |
139 | 146 | |
140 | 147 | -- | A convenience function for writing maps over rows |
141 | opBy :: (a -> a -> a) -> Setter' r a -> Getter r a -> Getter r a -> r -> r | |
142 | opBy op t l r x = set t ((x^.l) `op` (x^.r)) x | |
148 | opBy :: (a -> a -> a) -> (a -> r -> r) -> (r -> a) -> (r -> a) -> r -> r | |
149 | opBy op set l r x = set ((l x) `op` (r x)) x | |
143 | 150 | |
144 | 151 | -- | A specialized convenience function for normalizing a given value |
145 | 152 | -- by another by division |
146 | divBy :: Lens' r Double -> Getter r Double -> r -> r | |
147 | divBy l = opBy (/) l l | |
153 | -- divBy :: Lens' r Double -> (r -> Double) -> r -> r | |
154 | -- divBy l = opBy (/) l (^.l) | |
148 | 155 | |
149 | 156 | -- | A specialized convenience function for normalizing a given value |
150 | 157 | -- by another using an nth root |
151 | rootBy :: Lens' r Double -> Getter r Double -> r -> r | |
152 | rootBy l = undefined | |
158 | -- rootBy :: Lens' r Double -> (r -> Double) -> r -> r | |
159 | -- rootBy l = undefined | |
153 | 160 | |
154 | 161 | -- | Get the unique points to graph along the X axis |
155 |
getAxisValues :: Plan r f -> [r] -> [A |
|
162 | getAxisValues :: Plan r f -> [r] -> [AbstractTuple] | |
156 | 163 | getAxisValues (Plan { .. }) rows = |
157 | 164 | getUniqVals planAxis (filter planFilters rows) |
158 | 165 | |
161 | 168 | getAxisName = slName . planAxis |
162 | 169 | |
163 | 170 | -- | Get the unique field values which determine the set of lines to produce |
164 |
getLineValues :: Plan r f -> [r] -> [A |
|
171 | getLineValues :: Plan r f -> [r] -> [AbstractTuple] | |
165 | 172 | getLineValues (Plan { .. }) rows = |
166 | 173 | getUniqVals planLines (filter planFilters rows) |
167 | 174 | |
170 | 177 | getLineName = slName . planLines |
171 | 178 | |
172 | 179 | -- | Determine whether a given (abstract) value appears in the data set |
173 |
validRowValue :: Select r -> [r] -> A |
|
180 | validRowValue :: Select r -> [r] -> AbstractTuple -> Bool | |
174 | 181 | validRowValue l rs a = any (== a) (getUniqVals l rs) |
4 | 4 | |
5 | 5 | module Data.Analysis.Fulcrum.Plot (doPlot, doLogPlot) where |
6 | 6 | |
7 |
import Control.Lens |
|
7 | import Control.Lens((.~)) | |
8 | 8 | import Data.Colour (opaque, Colour) |
9 | 9 | import Data.Colour.Names |
10 | 10 | import Data.Default.Class (def) |
15 | 15 | import Math.Statistics (stddev, average) |
16 | 16 | |
17 | 17 | import Data.Analysis.Fulcrum.Analysis |
18 | ||
19 | type AVal = [Abstract] | |
20 | 18 | |
21 | 19 | colors :: [Colour Double] |
22 | 20 | colors = cycle [ blue, red, green, orange, purple, yellow ] |
34 | 32 | where axisNames = getAxisValues plan rows |
35 | 33 | |
36 | 34 | plotLines :: (RealFloat f, PlotValue g) => |
37 | Bool -> (Result f) -> String -> [(Int, AVal)] | |
38 | -> AVal -> Colour Double -> (f -> g) -> [PlotLines Int g] | |
35 | Bool -> (Result f) -> String -> [(Int, AbstractTuple)] | |
36 | -> AbstractTuple -> Colour Double -> (f -> g) -> [PlotLines Int g] | |
39 | 37 | plotLines True vals axisTitle axisNames ln c conv = |
40 | 38 | [ plot_lines_style .~ lineStyle 4 c |
41 | 39 | $ plot_lines_values .~ |
48 | 46 | ] |
49 | 47 | plotLines False _ _ _ _ _ _ = [] |
50 | 48 | |
51 |
plotDots :: (RealFloat f, PlotValue g) => Result f -> [(Int, A |
|
49 | plotDots :: (RealFloat f, PlotValue g) => Result f -> [(Int, AbstractTuple)] -> AbstractTuple | |
52 | 50 | -> Colour Double -> (f -> g) -> PlotPoints Int g |
53 | 51 | plotDots vals axisNames ln color conv |
54 | 52 | = plot_points_style .~ filledCircles 5 (opaque color) |
60 | 58 | $ def |
61 | 59 | |
62 | 60 | plotStds :: (RealFloat f) => Result f -> Result f -> |
63 |
[(Int, A |
|
61 | [(Int, AbstractTuple)] -> AbstractTuple -> Colour Double -> PlotErrBars Int f | |
64 | 62 | plotStds avgs stds axisNames ln color |
65 | 63 | = plot_errbars_line_style .~ lineStyle 1 color |
66 | 64 | $ plot_errbars_values .~ |
71 | 69 | , not (isNaN (avgs ! (ax, ln))) ] |
72 | 70 | $ def |
73 | 71 | |
72 | -- | Given a 'Plan' and a list of rows in which the focus variable is | |
73 | -- numeric and plottable, produce a log-scale plot of the relevant | |
74 | -- values with error bars. The 'doPlot' function will rerun your plan | |
75 | -- taking the 'average' of your focus variables, so custom 'planMerge' | |
76 | -- values will be replaced. | |
74 | 77 | doLogPlot :: Plan r Double -> [r] -> Renderable () |
75 | 78 | doLogPlot plan@(Plan { .. }) rows = |
76 | 79 | toRenderable $ layout_plots .~ map toPlot (concat plots) <> |
90 | 93 | avgs = runPlan average rows |
91 | 94 | axisNums = zip [(0::Int)..] axisNames |
92 | 95 | |
96 | -- | Given a 'Plan' and a list of rows in which the focus variable is | |
97 | -- numeric and plottable, produce a (linear-scale) plot of the relevant | |
98 | -- values with error bars. The 'doPlot' function will rerun your plan | |
99 | -- taking the 'average' and the 'stddev' of your focus variables, so | |
100 | -- custom 'planMerge' values will be replaced. | |
93 | 101 | doPlot :: (RealFloat f, PlotValue f) => Plan r f -> [r] -> Renderable () |
94 | 102 | doPlot plan@(Plan { .. }) rows = |
95 | 103 | toRenderable $ layout_plots .~ map toPlot bars <> |
1 |
module Data.Analysis.Fulcrum.Pretty |
|
1 | module Data.Analysis.Fulcrum.Pretty | |
2 | ( showResults | |
3 | ) where | |
2 | 4 | |
3 |
import Data.List (nub |
|
5 | import Data.List (nub, intersperse) | |
4 | 6 | import Data.Map.Strict (Map) |
5 | 7 | import qualified Data.Map.Strict as M |
6 | 8 | |
7 | 9 | import Data.Analysis.Fulcrum.Analysis |
8 | 10 | |
11 | -- | Produce a pretty, tabular representation of the results of a 'Plan' | |
9 | 12 | showResults :: Show a => Result a -> String |
10 | 13 | showResults = showTable . tabulate |
11 | 14 | |
13 | 16 | tabulate res = |
14 | 17 | [ [""] ++ map absShowS cols ] ++ |
15 | 18 | [ absShowS r : |
16 |
[ maybe "" show |
|
19 | [ maybe "" show $ M.lookup (c,r) res | |
17 | 20 | | c <- cols |
18 | 21 | ] |
19 | 22 | | r <- rows |
20 | 23 | ] |
21 | where rows = nub $ map snd (M.keys res) | |
22 | cols = nub $ map fst (M.keys res) | |
23 | ||
24 | strJoin :: String -> [String] -> String | |
25 | strJoin c [] = [] | |
26 | strJoin c (x:[]) = x | |
27 |
|
|
24 | where rows = nub $ map snd $ M.keys res | |
25 | cols = nub $ map fst $ M.keys res | |
28 | 26 | |
29 | 27 | rowJoin :: [String] -> String |
30 |
rowJoin = |
|
28 | rowJoin = concat . intersperse " | " | |
31 | 29 | |
32 | 30 | showTable :: [[String]] -> String |
33 | 31 | showTable tab = unlines $ map rowJoin $ map padRow tab |
34 | where sizes :: [Int] | |
35 | sizes = [ maximum (map (length . (!! n)) tab) | |
32 | where sizes = [ maximum (map (length . (!! n)) tab) | |
36 | 33 | | n <- [0..(length (head tab))-1] |
37 | 34 | ] |
38 | 35 | pad len str = |
39 | 36 | let diff = len - length str |
40 | 37 | in str ++ (take diff $ repeat ' ') |
41 | padRow :: [String] -> [String] | |
42 | 38 | padRow = zipWith pad sizes |
1 | {-| | |
2 | "fulcrum" is a simple way of doing quick-and-dirty pivot table-style | |
3 | data analysis with utility functions to produce both machine- and | |
4 | human-readable output formats, both graphical and textual, for | |
5 | quick data analysis and visualization. | |
6 | ||
7 | "fulcrum" relies on "lens" for accessors. | |
8 | -} | |
9 | ||
1 | 10 | module Data.Analysis.Fulcrum |
2 | 11 | ( module Data.Analysis.Fulcrum.Analysis |
3 | 12 | , module Data.Analysis.Fulcrum.Pretty |