gdritter repos fulcrum / master Data / Analysis / Fulcrum / Analysis.hs
master

Tree @master (Download .tar.gz)

Analysis.hs @masterraw · history · blame

{-# LANGUAGE RecordWildCards #-}

module Data.Analysis.Fulcrum.Analysis
  ( module Data.Analysis.Fulcrum.Abstract
  -- * Plans
  , Plan(..)
  , runPlanToMap
  , defaultPlan

  -- * Helper Types
  , Merge
  , Select
  , AbstractTuple
  , select
  , slName
  , Result

  -- * Utility Functions
  , getUniqVals
  , getAxisValues
  , getLineValues
  , validRowValue

  -- Reduction functions
--  , opBy
--  , divBy
  ) where

import Control.Applicative ((<$>))
import Control.Arrow (second)
import Data.Dynamic
import Data.Maybe (catMaybes)
import Data.Monoid (Monoid(..), (<>))
import Data.List (nub, sort, find)
import Data.Map.Strict (Map, fromList)
import Math.Statistics (average)

import Data.Analysis.Fulcrum.Abstract

-- | The value extracted by a 'Select' may be the result of
--   several combined 'Select'ors, and so is represented
--   as a list of 'Abstract' values.
type AbstractTuple = [Abstract]

-- | A 'Merge' function is anything that takes several
--   rows and merges them to a single row.
type Merge r = [r] -> r

-- | A 'Select' encapsulates the ability to extracts a list of abstract
--   values as well as the human-readable name for the field it is
--   extracting. The 'Monoid' instance for 'Select' allows us to take
--   two possibly heterogeneous 'Getter's and combine them.
data Select r = Select
  { _slLens :: r -> AbstractTuple
  , _slName :: String
  }

-- | The human-readable name of a 'Select'.
slName :: Select r -> String
slName = _slName

-- | Turn a 'Getter' for a specific type as well as its human-readable
--   equivalent into a 'Getter' of a generic value.
select :: (Typeable a, Ord a, Show a) => (r -> a) -> String -> Select r
select l s = Select (go . l) s
  where go x = [makeAbs x]

-- | A convenience function to combine strings with commas
comma :: String -> String -> String
comma x "" = x
comma "" y = y
comma x y  = x <> "," <> y

instance Monoid (Select r) where
  mappend (Select l ln) (Select r rn) =
    Select (\ x -> l x <> r x) (comma ln rn)
  mempty = Select (const mempty) ""

-- | A convenience function to run a selector over data
extract :: Select r -> r -> AbstractTuple
extract (Select l _) r = (l r)

-- | A plan describes how to go from a dataset of type @[row]@
--   and arrive at a @Result@ map
data Plan row focus = Plan
  { planFocus   :: row -> focus     -- ^ The final dependent variable
  , planFCName  :: String           -- ^ The name of the dependent variable
  , planName    :: String           -- ^ The name of the plan
  , planFilters :: row -> Bool      -- ^ select which rows to keep
  , planMaps    :: row -> row       -- ^ transform them somehow
  , planAxis    :: Select row       -- ^ select an independent variable
  , planLines   :: Select row       -- ^ select sets of interesting data
  , planMerge   :: Merge focus      -- ^ join the resulting rows together
  , planLinAxis :: Bool             -- ^ true if the axis is linear
  }

-- | The default plan has sensible defaults where possible, and
--   is filled in with undefined functions
defaultPlan :: (Floating b) => Plan a b
defaultPlan = Plan
  { planFocus   = error "undefined focus"
  , planFCName  = "undefined"
  , planName    = "default plan"
  , planFilters = const True
  , planMaps    = id
  , planAxis    = mempty
  , planLines   = mempty
  , planMerge   = average
  , planLinAxis = True
  }

-- | The result of a plan is always going to be a map from
--   (abstract but comparable) keys to some kind of result
--   value.
type Result r = Map (AbstractTuple, AbstractTuple) r

-- | Given a selector and a list of rows, groups into sub-lists that
--   share a common value for the selector.
groupOn :: (Select r) -> [r] -> [[r]]
groupOn l rs = [ filter (go v) rs |  v <- getUniqVals l rs ]
  where go v r = extract l r == v

-- | Extract every unique value that appears in the data
getUniqVals :: (Select r) -> [r] -> [AbstractTuple]
getUniqVals l rs = sort (nub (map (extract l) rs))

-- | Merge the focused value using the merge function
combine :: (r -> f) -> Merge f -> [r] -> f
combine focus merge = merge . map focus

-- | Group into a map based on the axis values and lines
group :: Select r -> Select r -> [r] -> [((AbstractTuple, AbstractTuple), [r])]
group l a rs =
  [ ((extract a (head r), extract l (head r)), r)
  | r <- groupOn (l <> a) rs
  ]

-- | Execute a given 'Plan' on a set of data and return the 'Result' set.
runPlanToMap :: Plan r f -> [r] -> Result f
runPlanToMap p@(Plan { .. }) rawData =
  fromList $ map (second (combine planFocus planMerge))
           $ group planLines planAxis
           $ map planMaps
           $ filter planFilters
           $ rawData

-- | A convenience function for writing maps over rows
opBy :: (a -> a -> a) -> (a -> r -> r) -> (r -> a) -> (r -> a) -> r -> r
opBy op set l r x = set ((l x) `op` (r x)) x

-- | A specialized convenience function for normalizing a given value
--   by another by division
-- divBy :: Lens' r Double -> (r -> Double) -> r -> r
-- divBy l = opBy (/) l (^.l)

-- | A specialized convenience function for normalizing a given value
--   by another using an nth root
-- rootBy :: Lens' r Double -> (r -> Double) -> r -> r
-- rootBy l = undefined

-- | Get the unique points to graph along the X axis
getAxisValues :: Plan r f -> [r] -> [AbstractTuple]
getAxisValues (Plan { .. }) rows =
  getUniqVals planAxis (filter planFilters rows)

-- | Get the human-readable name of the X axis variable
getAxisName :: Plan r f -> String
getAxisName = slName . planAxis

-- | Get the unique field values which determine the set of lines to produce
getLineValues :: Plan r f -> [r] -> [AbstractTuple]
getLineValues (Plan { .. }) rows =
  getUniqVals planLines (filter planFilters rows)

-- | Get the human-readable name of the lines
getLineName :: Plan r f -> String
getLineName = slName . planLines

-- | Determine whether a given (abstract) value appears in the data set
validRowValue :: Select r -> [r] -> AbstractTuple -> Bool
validRowValue l rs a = any (== a) (getUniqVals l rs)