| 1 |  | {-# LANGUAGE ExistentialQuantification #-} | 
| 2 |  | {-# LANGUAGE TemplateHaskell           #-} | 
| 3 |  | {-# LANGUAGE RankNTypes                #-} | 
| 4 |  | {-# LANGUAGE DeriveFunctor             #-} | 
| 5 |  | {-# LANGUAGE RecordWildCards           #-} | 
| 6 |  | {-# LANGUAGE ScopedTypeVariables       #-} | 
|  | 1 | {-# LANGUAGE RecordWildCards #-} | 
| 7 | 2 |  | 
| 8 | 3 | module Data.Analysis.Fulcrum.Analysis | 
| 9 | 4 | ( module Data.Analysis.Fulcrum.Abstract | 
|  | 5 | -- * Plans | 
|  | 6 | , Plan(..) | 
|  | 7 | , runPlanToMap | 
|  | 8 | , defaultPlan | 
| 10 | 9 |  | 
| 11 |  | , runPlanToMap | 
| 12 |  |  | 
|  | 10 | -- * Helper Types | 
| 13 | 11 | , Merge | 
| 14 |  | , Plan(..) | 
| 15 |  | , defaultPlan | 
| 16 | 12 | , Select | 
|  | 13 | , AbstractTuple | 
|  | 14 | , select | 
| 17 | 15 | , slName | 
| 18 | 16 | , Result | 
| 19 | 17 |  | 
| 20 |  | , select | 
|  | 18 | -- * Utility Functions | 
| 21 | 19 | , getUniqVals | 
| 22 | 20 | , getAxisValues | 
| 23 | 21 | , getLineValues | 
| 24 | 22 | , validRowValue | 
| 25 | 23 |  | 
| 26 |  | , opBy | 
| 27 |  | , divBy | 
|  | 24 | -- Reduction functions | 
|  | 25 | --  , opBy | 
|  | 26 | --  , divBy | 
| 28 | 27 | ) where | 
| 29 | 28 |  | 
| 30 | 29 | import Control.Applicative ((<$>)) | 
| 31 | 30 | import Control.Arrow (second) | 
| 32 |  | import Control.Lens | 
| 33 | 31 | import Data.Dynamic | 
| 34 | 32 | import Data.Maybe (catMaybes) | 
| 35 | 33 | import Data.Monoid (Monoid(..), (<>)) | 
| 36 | 34 | import Data.List (nub, sort, find) | 
| 37 | 35 | import Data.Map.Strict (Map, fromList) | 
|  | 36 | import Math.Statistics (average) | 
| 38 | 37 |  | 
| 39 | 38 | import Data.Analysis.Fulcrum.Abstract | 
| 40 | 39 |  | 
| 41 |  | type AVal = [Abstract] | 
|  | 40 | -- | The value extracted by a 'Select' may be the result of | 
|  | 41 | --   several combined 'Select'ors, and so is represented | 
|  | 42 | --   as a list of 'Abstract' values. | 
|  | 43 | type AbstractTuple = [Abstract] | 
| 42 | 44 |  | 
| 43 |  | -- | A merge function is anything that takes several | 
| 44 |  | --   rows and merges them to a single value. | 
|  | 45 | -- | A 'Merge' function is anything that takes several | 
|  | 46 | --   rows and merges them to a single row. | 
| 45 | 47 | type Merge r = [r] -> r | 
| 46 | 48 |  | 
| 47 |  | -- | A selector returns a list of abstract values, and is used | 
| 48 |  | --   for extracting from heterogeneous data. | 
|  | 49 | -- | A 'Select' encapsulates the ability to extracts a list of abstract | 
|  | 50 | --   values as well as the human-readable name for the field it is | 
|  | 51 | --   extracting. The 'Monoid' instance for 'Select' allows us to take | 
|  | 52 | --   two possibly heterogeneous 'Getter's and combine them. | 
| 49 | 53 | data Select r = Select | 
| 50 |  | { _slLens :: Getter r AVal | 
|  | 54 | { _slLens :: r -> AbstractTuple | 
| 51 | 55 | , _slName :: String | 
| 52 | 56 | } | 
| 53 | 57 |  | 
|  | 58 | -- | The human-readable name of a 'Select'. | 
| 54 | 59 | slName :: Select r -> String | 
| 55 | 60 | slName = _slName | 
| 56 | 61 |  | 
| 57 |  | -- | Turn a getter for a specific type into a getter of a generic | 
| 58 |  | --   value (so we can compare possibly across types) | 
| 59 |  | select :: (Typeable a, Ord a, Show a) => Getter r a -> String -> Select r | 
| 60 |  | select l s = Select (l.to go) s | 
|  | 62 | -- | Turn a 'Getter' for a specific type as well as its human-readable | 
|  | 63 | --   equivalent into a 'Getter' of a generic value. | 
|  | 64 | select :: (Typeable a, Ord a, Show a) => (r -> a) -> String -> Select r | 
|  | 65 | select l s = Select (go . l) s | 
| 61 | 66 | where go x = [makeAbs x] | 
| 62 | 67 |  | 
| 63 | 68 | -- | A convenience function to combine strings with commas | 
            
              
                |  | 
            
          | 68 | 73 |  | 
| 69 | 74 | instance Monoid (Select r) where | 
| 70 | 75 | mappend (Select l ln) (Select r rn) = | 
| 71 |  | Select (to (\ x -> x^.l <> x^.r)) (comma ln rn) | 
| 72 |  | mempty = Select (to (const mempty)) "" | 
|  | 76 | Select (\ x -> l x <> r x) (comma ln rn) | 
|  | 77 | mempty = Select (const mempty) "" | 
| 73 | 78 |  | 
| 74 | 79 | -- | A convenience function to run a selector over data | 
| 75 |  | extract :: Select r -> r -> AVal | 
| 76 |  | extract (Select l _) r = (r^.l) | 
|  | 80 | extract :: Select r -> r -> AbstractTuple | 
|  | 81 | extract (Select l _) r = (l r) | 
| 77 | 82 |  | 
| 78 |  | -- | A plan describes how to go from a dataset of type [row] | 
| 79 |  | --   and arrive at a Result map | 
|  | 83 | -- | A plan describes how to go from a dataset of type @[row]@ | 
|  | 84 | --   and arrive at a @Result@ map | 
| 80 | 85 | data Plan row focus = Plan | 
| 81 |  | { planFocus   :: Getter row focus-- ^ The final dependent variable | 
|  | 86 | { planFocus   :: row -> focus     -- ^ The final dependent variable | 
| 82 | 87 | , planFCName  :: String           -- ^ The name of the dependent variable | 
| 83 | 88 | , planName    :: String           -- ^ The name of the plan | 
| 84 | 89 | , planFilters :: row -> Bool      -- ^ select which rows to keep | 
            
              
                |  | 
            
          | 89 | 94 | , planLinAxis :: Bool             -- ^ true if the axis is linear | 
| 90 | 95 | } | 
| 91 | 96 |  | 
| 92 |  | defaultPlan :: Plan a a | 
|  | 97 | -- | The default plan has sensible defaults where possible, and | 
|  | 98 | --   is filled in with undefined functions | 
|  | 99 | defaultPlan :: (Floating b) => Plan a b | 
| 93 | 100 | defaultPlan = Plan | 
| 94 | 101 | { planFocus   = error "undefined focus" | 
| 95 | 102 | , planFCName  = "undefined" | 
            
              
                |  | 
            
          | 98 | 105 | , planMaps    = id | 
| 99 | 106 | , planAxis    = mempty | 
| 100 | 107 | , planLines   = mempty | 
| 101 |  | , planMerge   = error "undefined merge" | 
|  | 108 | , planMerge   = average | 
| 102 | 109 | , planLinAxis = True | 
| 103 | 110 | } | 
| 104 | 111 |  | 
| 105 | 112 | -- | The result of a plan is always going to be a map from | 
| 106 | 113 | --   (abstract but comparable) keys to some kind of result | 
| 107 | 114 | --   value. | 
| 108 |  | type Result r = Map (A Val, AVal) r | 
|  | 115 | type Result r = Map (AbstractTuple, AbstractTuple) r | 
| 109 | 116 |  | 
| 110 | 117 | -- | Given a selector and a list of rows, groups into sub-lists that | 
| 111 | 118 | --   share a common value for the selector. | 
            
              
                |  | 
            
          | 114 | 121 | where go v r = extract l r == v | 
| 115 | 122 |  | 
| 116 | 123 | -- | Extract every unique value that appears in the data | 
| 117 |  | getUniqVals :: (Select r) -> [r] -> [A Val] | 
|  | 124 | getUniqVals :: (Select r) -> [r] -> [AbstractTuple] | 
| 118 | 125 | getUniqVals l rs = sort (nub (map (extract l) rs)) | 
| 119 | 126 |  | 
| 120 | 127 | -- | Merge the focused value using the merge function | 
| 121 |  | combine :: Getter r f -> Merge f -> [r] -> f | 
| 122 |  | combine focus merge = merge . map (^.focus) | 
|  | 128 | combine :: (r -> f) -> Merge f -> [r] -> f | 
|  | 129 | combine focus merge = merge . map focus | 
| 123 | 130 |  | 
| 124 | 131 | -- | Group into a map based on the axis values and lines | 
| 125 |  | group :: Select r -> Select r -> [r] -> [((A Val, AVal), [r])] | 
|  | 132 | group :: Select r -> Select r -> [r] -> [((AbstractTuple, AbstractTuple), [r])] | 
| 126 | 133 | group l a rs = | 
| 127 | 134 | [ ((extract a (head r), extract l (head r)), r) | 
| 128 | 135 | | r <- groupOn (l <> a) rs | 
| 129 | 136 | ] | 
| 130 | 137 |  | 
| 131 |  | -- | Execute a given plan on a set of data | 
|  | 138 | -- | Execute a given 'Plan' on a set of data and return the 'Result' set. | 
| 132 | 139 | runPlanToMap :: Plan r f -> [r] -> Result f | 
| 133 | 140 | runPlanToMap p@(Plan { .. }) rawData = | 
| 134 | 141 | fromList $ map (second (combine planFocus planMerge)) | 
            
              
                |  | 
            
          | 138 | 145 | $ rawData | 
| 139 | 146 |  | 
| 140 | 147 | -- | A convenience function for writing maps over rows | 
| 141 |  | opBy :: (a -> a -> a) -> Setter' r a -> Getter r a -> Getter r a -> r -> r | 
| 142 |  | opBy op t l r x = set t ((x^.l) `op` (x^.r)) x | 
|  | 148 | opBy :: (a -> a -> a) -> (a -> r -> r) -> (r -> a) -> (r -> a) -> r -> r | 
|  | 149 | opBy op set l r x = set ((l x) `op` (r x)) x | 
| 143 | 150 |  | 
| 144 | 151 | -- | A specialized convenience function for normalizing a given value | 
| 145 | 152 | --   by another by division | 
| 146 |  | divBy :: Lens' r Double -> Getter r Double -> r -> r | 
| 147 |  | divBy l = opBy (/) l l | 
|  | 153 | -- divBy :: Lens' r Double -> (r -> Double) -> r -> r | 
|  | 154 | -- divBy l = opBy (/) l (^.l) | 
| 148 | 155 |  | 
| 149 | 156 | -- | A specialized convenience function for normalizing a given value | 
| 150 | 157 | --   by another using an nth root | 
| 151 |  | rootBy :: Lens' r Double -> Getter r Double -> r -> r | 
| 152 |  | rootBy l = undefined | 
|  | 158 | -- rootBy :: Lens' r Double -> (r -> Double) -> r -> r | 
|  | 159 | -- rootBy l = undefined | 
| 153 | 160 |  | 
| 154 | 161 | -- | Get the unique points to graph along the X axis | 
| 155 |  | getAxisValues :: Plan r f -> [r] -> [A Val] | 
|  | 162 | getAxisValues :: Plan r f -> [r] -> [AbstractTuple] | 
| 156 | 163 | getAxisValues (Plan { .. }) rows = | 
| 157 | 164 | getUniqVals planAxis (filter planFilters rows) | 
| 158 | 165 |  | 
            
              
                |  | 
            
          | 161 | 168 | getAxisName = slName . planAxis | 
| 162 | 169 |  | 
| 163 | 170 | -- | Get the unique field values which determine the set of lines to produce | 
| 164 |  | getLineValues :: Plan r f -> [r] -> [A Val] | 
|  | 171 | getLineValues :: Plan r f -> [r] -> [AbstractTuple] | 
| 165 | 172 | getLineValues (Plan { .. }) rows = | 
| 166 | 173 | getUniqVals planLines (filter planFilters rows) | 
| 167 | 174 |  | 
            
              
                |  | 
            
          | 170 | 177 | getLineName = slName . planLines | 
| 171 | 178 |  | 
| 172 | 179 | -- | Determine whether a given (abstract) value appears in the data set | 
| 173 |  | validRowValue :: Select r -> [r] -> A Val-> Bool | 
|  | 180 | validRowValue :: Select r -> [r] -> AbstractTuple -> Bool | 
| 174 | 181 | validRowValue l rs a = any (== a) (getUniqVals l rs) |