| 1 | |
{-# LANGUAGE ExistentialQuantification #-}
|
| 2 | |
{-# LANGUAGE TemplateHaskell #-}
|
| 3 | |
{-# LANGUAGE RankNTypes #-}
|
| 4 | |
{-# LANGUAGE DeriveFunctor #-}
|
| 5 | |
{-# LANGUAGE RecordWildCards #-}
|
| 6 | |
{-# LANGUAGE ScopedTypeVariables #-}
|
| 1 |
{-# LANGUAGE RecordWildCards #-}
|
| 7 | 2 |
|
| 8 | 3 |
module Data.Analysis.Fulcrum.Analysis
|
| 9 | 4 |
( module Data.Analysis.Fulcrum.Abstract
|
| 5 |
-- * Plans
|
| 6 |
, Plan(..)
|
| 7 |
, runPlanToMap
|
| 8 |
, defaultPlan
|
| 10 | 9 |
|
| 11 | |
, runPlanToMap
|
| 12 | |
|
| 10 |
-- * Helper Types
|
| 13 | 11 |
, Merge
|
| 14 | |
, Plan(..)
|
| 15 | |
, defaultPlan
|
| 16 | 12 |
, Select
|
| 13 |
, AbstractTuple
|
| 14 |
, select
|
| 17 | 15 |
, slName
|
| 18 | 16 |
, Result
|
| 19 | 17 |
|
| 20 | |
, select
|
| 18 |
-- * Utility Functions
|
| 21 | 19 |
, getUniqVals
|
| 22 | 20 |
, getAxisValues
|
| 23 | 21 |
, getLineValues
|
| 24 | 22 |
, validRowValue
|
| 25 | 23 |
|
| 26 | |
, opBy
|
| 27 | |
, divBy
|
| 24 |
-- Reduction functions
|
| 25 |
-- , opBy
|
| 26 |
-- , divBy
|
| 28 | 27 |
) where
|
| 29 | 28 |
|
| 30 | 29 |
import Control.Applicative ((<$>))
|
| 31 | 30 |
import Control.Arrow (second)
|
| 32 | |
import Control.Lens
|
| 33 | 31 |
import Data.Dynamic
|
| 34 | 32 |
import Data.Maybe (catMaybes)
|
| 35 | 33 |
import Data.Monoid (Monoid(..), (<>))
|
| 36 | 34 |
import Data.List (nub, sort, find)
|
| 37 | 35 |
import Data.Map.Strict (Map, fromList)
|
| 36 |
import Math.Statistics (average)
|
| 38 | 37 |
|
| 39 | 38 |
import Data.Analysis.Fulcrum.Abstract
|
| 40 | 39 |
|
| 41 | |
type AVal = [Abstract]
|
| 40 |
-- | The value extracted by a 'Select' may be the result of
|
| 41 |
-- several combined 'Select'ors, and so is represented
|
| 42 |
-- as a list of 'Abstract' values.
|
| 43 |
type AbstractTuple = [Abstract]
|
| 42 | 44 |
|
| 43 | |
-- | A merge function is anything that takes several
|
| 44 | |
-- rows and merges them to a single value.
|
| 45 |
-- | A 'Merge' function is anything that takes several
|
| 46 |
-- rows and merges them to a single row.
|
| 45 | 47 |
type Merge r = [r] -> r
|
| 46 | 48 |
|
| 47 | |
-- | A selector returns a list of abstract values, and is used
|
| 48 | |
-- for extracting from heterogeneous data.
|
| 49 |
-- | A 'Select' encapsulates the ability to extracts a list of abstract
|
| 50 |
-- values as well as the human-readable name for the field it is
|
| 51 |
-- extracting. The 'Monoid' instance for 'Select' allows us to take
|
| 52 |
-- two possibly heterogeneous 'Getter's and combine them.
|
| 49 | 53 |
data Select r = Select
|
| 50 | |
{ _slLens :: Getter r AVal
|
| 54 |
{ _slLens :: r -> AbstractTuple
|
| 51 | 55 |
, _slName :: String
|
| 52 | 56 |
}
|
| 53 | 57 |
|
| 58 |
-- | The human-readable name of a 'Select'.
|
| 54 | 59 |
slName :: Select r -> String
|
| 55 | 60 |
slName = _slName
|
| 56 | 61 |
|
| 57 | |
-- | Turn a getter for a specific type into a getter of a generic
|
| 58 | |
-- value (so we can compare possibly across types)
|
| 59 | |
select :: (Typeable a, Ord a, Show a) => Getter r a -> String -> Select r
|
| 60 | |
select l s = Select (l.to go) s
|
| 62 |
-- | Turn a 'Getter' for a specific type as well as its human-readable
|
| 63 |
-- equivalent into a 'Getter' of a generic value.
|
| 64 |
select :: (Typeable a, Ord a, Show a) => (r -> a) -> String -> Select r
|
| 65 |
select l s = Select (go . l) s
|
| 61 | 66 |
where go x = [makeAbs x]
|
| 62 | 67 |
|
| 63 | 68 |
-- | A convenience function to combine strings with commas
|
|
| 68 | 73 |
|
| 69 | 74 |
instance Monoid (Select r) where
|
| 70 | 75 |
mappend (Select l ln) (Select r rn) =
|
| 71 | |
Select (to (\ x -> x^.l <> x^.r)) (comma ln rn)
|
| 72 | |
mempty = Select (to (const mempty)) ""
|
| 76 |
Select (\ x -> l x <> r x) (comma ln rn)
|
| 77 |
mempty = Select (const mempty) ""
|
| 73 | 78 |
|
| 74 | 79 |
-- | A convenience function to run a selector over data
|
| 75 | |
extract :: Select r -> r -> AVal
|
| 76 | |
extract (Select l _) r = (r^.l)
|
| 80 |
extract :: Select r -> r -> AbstractTuple
|
| 81 |
extract (Select l _) r = (l r)
|
| 77 | 82 |
|
| 78 | |
-- | A plan describes how to go from a dataset of type [row]
|
| 79 | |
-- and arrive at a Result map
|
| 83 |
-- | A plan describes how to go from a dataset of type @[row]@
|
| 84 |
-- and arrive at a @Result@ map
|
| 80 | 85 |
data Plan row focus = Plan
|
| 81 | |
{ planFocus :: Getter row focus -- ^ The final dependent variable
|
| 86 |
{ planFocus :: row -> focus -- ^ The final dependent variable
|
| 82 | 87 |
, planFCName :: String -- ^ The name of the dependent variable
|
| 83 | 88 |
, planName :: String -- ^ The name of the plan
|
| 84 | 89 |
, planFilters :: row -> Bool -- ^ select which rows to keep
|
|
| 89 | 94 |
, planLinAxis :: Bool -- ^ true if the axis is linear
|
| 90 | 95 |
}
|
| 91 | 96 |
|
| 92 | |
defaultPlan :: Plan a a
|
| 97 |
-- | The default plan has sensible defaults where possible, and
|
| 98 |
-- is filled in with undefined functions
|
| 99 |
defaultPlan :: (Floating b) => Plan a b
|
| 93 | 100 |
defaultPlan = Plan
|
| 94 | 101 |
{ planFocus = error "undefined focus"
|
| 95 | 102 |
, planFCName = "undefined"
|
|
| 98 | 105 |
, planMaps = id
|
| 99 | 106 |
, planAxis = mempty
|
| 100 | 107 |
, planLines = mempty
|
| 101 | |
, planMerge = error "undefined merge"
|
| 108 |
, planMerge = average
|
| 102 | 109 |
, planLinAxis = True
|
| 103 | 110 |
}
|
| 104 | 111 |
|
| 105 | 112 |
-- | The result of a plan is always going to be a map from
|
| 106 | 113 |
-- (abstract but comparable) keys to some kind of result
|
| 107 | 114 |
-- value.
|
| 108 | |
type Result r = Map (AVal, AVal) r
|
| 115 |
type Result r = Map (AbstractTuple, AbstractTuple) r
|
| 109 | 116 |
|
| 110 | 117 |
-- | Given a selector and a list of rows, groups into sub-lists that
|
| 111 | 118 |
-- share a common value for the selector.
|
|
| 114 | 121 |
where go v r = extract l r == v
|
| 115 | 122 |
|
| 116 | 123 |
-- | Extract every unique value that appears in the data
|
| 117 | |
getUniqVals :: (Select r) -> [r] -> [AVal]
|
| 124 |
getUniqVals :: (Select r) -> [r] -> [AbstractTuple]
|
| 118 | 125 |
getUniqVals l rs = sort (nub (map (extract l) rs))
|
| 119 | 126 |
|
| 120 | 127 |
-- | Merge the focused value using the merge function
|
| 121 | |
combine :: Getter r f -> Merge f -> [r] -> f
|
| 122 | |
combine focus merge = merge . map (^.focus)
|
| 128 |
combine :: (r -> f) -> Merge f -> [r] -> f
|
| 129 |
combine focus merge = merge . map focus
|
| 123 | 130 |
|
| 124 | 131 |
-- | Group into a map based on the axis values and lines
|
| 125 | |
group :: Select r -> Select r -> [r] -> [((AVal, AVal), [r])]
|
| 132 |
group :: Select r -> Select r -> [r] -> [((AbstractTuple, AbstractTuple), [r])]
|
| 126 | 133 |
group l a rs =
|
| 127 | 134 |
[ ((extract a (head r), extract l (head r)), r)
|
| 128 | 135 |
| r <- groupOn (l <> a) rs
|
| 129 | 136 |
]
|
| 130 | 137 |
|
| 131 | |
-- | Execute a given plan on a set of data
|
| 138 |
-- | Execute a given 'Plan' on a set of data and return the 'Result' set.
|
| 132 | 139 |
runPlanToMap :: Plan r f -> [r] -> Result f
|
| 133 | 140 |
runPlanToMap p@(Plan { .. }) rawData =
|
| 134 | 141 |
fromList $ map (second (combine planFocus planMerge))
|
|
| 138 | 145 |
$ rawData
|
| 139 | 146 |
|
| 140 | 147 |
-- | A convenience function for writing maps over rows
|
| 141 | |
opBy :: (a -> a -> a) -> Setter' r a -> Getter r a -> Getter r a -> r -> r
|
| 142 | |
opBy op t l r x = set t ((x^.l) `op` (x^.r)) x
|
| 148 |
opBy :: (a -> a -> a) -> (a -> r -> r) -> (r -> a) -> (r -> a) -> r -> r
|
| 149 |
opBy op set l r x = set ((l x) `op` (r x)) x
|
| 143 | 150 |
|
| 144 | 151 |
-- | A specialized convenience function for normalizing a given value
|
| 145 | 152 |
-- by another by division
|
| 146 | |
divBy :: Lens' r Double -> Getter r Double -> r -> r
|
| 147 | |
divBy l = opBy (/) l l
|
| 153 |
-- divBy :: Lens' r Double -> (r -> Double) -> r -> r
|
| 154 |
-- divBy l = opBy (/) l (^.l)
|
| 148 | 155 |
|
| 149 | 156 |
-- | A specialized convenience function for normalizing a given value
|
| 150 | 157 |
-- by another using an nth root
|
| 151 | |
rootBy :: Lens' r Double -> Getter r Double -> r -> r
|
| 152 | |
rootBy l = undefined
|
| 158 |
-- rootBy :: Lens' r Double -> (r -> Double) -> r -> r
|
| 159 |
-- rootBy l = undefined
|
| 153 | 160 |
|
| 154 | 161 |
-- | Get the unique points to graph along the X axis
|
| 155 | |
getAxisValues :: Plan r f -> [r] -> [AVal]
|
| 162 |
getAxisValues :: Plan r f -> [r] -> [AbstractTuple]
|
| 156 | 163 |
getAxisValues (Plan { .. }) rows =
|
| 157 | 164 |
getUniqVals planAxis (filter planFilters rows)
|
| 158 | 165 |
|
|
| 161 | 168 |
getAxisName = slName . planAxis
|
| 162 | 169 |
|
| 163 | 170 |
-- | Get the unique field values which determine the set of lines to produce
|
| 164 | |
getLineValues :: Plan r f -> [r] -> [AVal]
|
| 171 |
getLineValues :: Plan r f -> [r] -> [AbstractTuple]
|
| 165 | 172 |
getLineValues (Plan { .. }) rows =
|
| 166 | 173 |
getUniqVals planLines (filter planFilters rows)
|
| 167 | 174 |
|
|
| 170 | 177 |
getLineName = slName . planLines
|
| 171 | 178 |
|
| 172 | 179 |
-- | Determine whether a given (abstract) value appears in the data set
|
| 173 | |
validRowValue :: Select r -> [r] -> AVal -> Bool
|
| 180 |
validRowValue :: Select r -> [r] -> AbstractTuple -> Bool
|
| 174 | 181 |
validRowValue l rs a = any (== a) (getUniqVals l rs)
|