FICO
FICO Xpress Optimization Examples Repository
FICO Optimization Community FICO Xpress Optimization Home
Back to examples browserNext example

Energyforecast - regression analysis with R

Description
Forecast of energy demand using regression analysis on historic data.

energyforecast.zip[download all files]

Source Files

Data Files





energyforecast.mos

(!******************************************************
   Mosel R Example Problems
   ========================

   file energyforecast.mos
   ```````````````````````
   Forecast of energy demand using regression analysis
   on historic data 
  
   !!! This example requires an installation of R, see the
   !!! chapter 'R' of the 'Mosel Language Reference' for
   !!! compatible versions and setup instructions

  (c) 2015 Fair Isaac Corporation
      author: S.Lannez, Jul. 2015
*******************************************************!)
model forecast
  options noimplicit
  uses "mmsheet"   ! Spreadsheet I/O
  uses "mmsystem"
  uses "r"         ! Load R interface

  parameters
    DATADIR=""

    ! Historical data
    HISTFILE=DATADIR+'energyforecast.dat' 

    ! Install R packages on the-fly
    R_SETUP = false

    ! The peak threshold. If the ratio between
    ! the daily demand and the previous year average
    ! daily demand is greater than the following threshold
    ! then the period is considered a peak period.
    FORECAST_PEAK_THR = 2.5

    ! The peak period probability threshold. If
    ! the predicted probability of being in a
    ! peak period is greater than the following
    ! threshold, then the period is considered
    ! to be a peak period.
    FORECAST_PEAK_PROB = 0.5

  end-parameters

  ! **** Historical data **** !

  declarations
    Days: set of string            ! Days
    Years: set of integer          ! Years
    Periods: range                 ! Time periods

    HourTypes: set of integer      ! Hour types
    DayTypes: set of integer       ! Day types
    WeekTypes: set of integer      ! Week types

    AttrGDP: set of string         ! YearGDP attributes
    AttrIncome: set of string      ! Income attributes
    
    HourDemand: dynamic array(Days,Periods) of real     ! Historical demand
    YearGDP: dynamic array(Years,AttrGDP) of real       ! Historical YearGDP
    YearIncome: dynamic array(Years,AttrIncome) of real ! Historical income

    first,last: date
    tnow: datetime
  end-declarations

  ! **** Return the hour type ****
  function gethourtype(t: integer): integer
    declarations
      h=t/48 ! Input data has 48 time periods
    end-declarations
    returned := floor(h*HourTypes.size)
  end-function

  ! **** Return the week type ****
  function getweektype(d: date): integer      
    returned := getdaynum(date(d)) div 7
  end-function

  ! **** Return the day type ****
  function getdaytype(d: date): integer
    declarations
      daynum: integer
      Summer=91..274
      SummerHoliday=268..274
      WinterHoliday=84..90
    end-declarations
    returned := 1 ! unknown
    daynum := getdaynum(d)
    if daynum in Summer then ! Summer
      if daynum in SummerHoliday then
        returned := 2 ! Summer public holiday
      else
        case getweekday(d) of
          1,2,3,4,5: returned := 0 ! Summer week
          6,7: returned := 1 ! Summer weekend
        end-case
      end-if
    else ! Winter
      if daynum in WinterHoliday then
        returned := 5 ! Winter public holiday
      else
        case getweekday(d) of
          1,2,3,4,5: returned := 3 ! Winter week
          6,7: returned := 4 ! Winter weekend
        end-case
      end-if
    end-if 
  end-function

  ! **** Run a regression analysis of forecast the energy consumption ****
  procedure forecast(first,last: date,predict:boolean)
    ! Sets the parameters of the forecast
    ! function
    declarations
      TimePeriods,_TimePeriods: set of string
      
      ! Derived data
      PeriodAvgEnergy: dynamic array(_TimePeriods) of real ! Average daily energy of the year

      ! Observed data or forecasted from external tool
      PeriodGDP: dynamic array(TimePeriods) of real ! Current year GDP
      PeriodIncome: dynamic array(TimePeriods) of real ! Current year income
      
      PeriodWeekType: dynamic array(TimePeriods,WeekTypes) of boolean ! Day type
      PeriodDayType: dynamic array(TimePeriods,DayTypes) of boolean ! Day type
      PeriodHourType: dynamic array(TimePeriods,HourTypes) of boolean ! Hour type

      PeriodWeekTypeS: dynamic array(TimePeriods) of string ! String representation of week type
      PeriodDayTypeS: dynamic array(TimePeriods) of string ! String representation of day type
      PeriodHourTypeS: dynamic array(TimePeriods) of string ! String representation of hour type
      
      ! Observed demand
      PeriodDemand: dynamic array(TimePeriods) of real ! baseline demand
      PeriodPeak: dynamic array(TimePeriods) of real ! peak demand
      PeriodIPeak: dynamic array(TimePeriods) of real ! peak period

      ! Period forecast
      PeriodDemandP: dynamic array(TimePeriods) of real ! Baseline energy demand forecast
      PeriodPeakP: dynamic array(TimePeriods) of real ! Peak energy demand forecast
      PeriodIPeakP: dynamic array(TimePeriods) of real ! Probability of being in a peak

      ! Misc
      id,newdata: string
      i,y: integer
      v,w: real
      E,_E,__E,err,a,b: real     
      Calendar: dynamic array(Years,Days,Periods) of string
    end-declarations    

    writeln('-'*60)
    if (predict) then
      writeln('Forecasting...')
    else
      writeln('Running regression...')
      if (R_SETUP) then
        Reval('install.packages("mlogit",repos="http://cran.r-project.org")')
      end-if
    end-if

    ! Create lookup array
    writeln("Creating calendar...")
    forall(d in Days, p in Periods | exists(HourDemand(d,p))) do
      Calendar(getyear(date(d)),d,p) := d+"-"+if(p>=10,""+p,"0"+p)
    end-do

    ! Compute previous year average consumed energy.
    ! The forecast function is based on energy demand growth
    ! and needs relative demand increase from year-to-year
    forall(yy in Years) do
      i := 0
      E := sum(d in Days, p in Periods | exists(Calendar(yy-1,d,p)),i as counter) HourDemand(d,p)
      if (i>0) then
        E := E/i
      else
        E := 0
      end-if
      forall(d in Days, p in Periods | exists(Calendar(yy,d,p))) do
        PeriodAvgEnergy(Calendar(yy,d,p)) := E
      end-do
    end-do    

    ! Populate the predictors used in the regression or
    ! during the prediction.
    writeln("Populating predictors...")
    forall(d in Days | date(d)>=first and date(d)<=last) do
      y := getyear(date(d))
      forall (p in Periods | exists(Calendar(y,d,p))) do
        id := Calendar(y,d,p)

        ! Continous input
        PeriodGDP(id) := YearGDP(y,'YearGDPGrowthRate')
        PeriodIncome(id) := YearIncome(y,'Income')

        ! Continuous input
        PeriodWeekType(id,getweektype(date(d))) := true
        PeriodDayType(id,getdaytype(date(d))) := true
        PeriodHourType(id,gethourtype(p)) := true
        
        ! Discrete input
        PeriodWeekTypeS(id) := ""+getweektype(date(p))
        PeriodDayTypeS(id) := ""+getdaytype(date(p))
        PeriodHourTypeS(id) := ""+gethourtype(p)

        ! Value to be forecasted
        if (exists(HourDemand(d,p))) then
          w := HourDemand(d,p) / PeriodAvgEnergy(id)
          ! We split baseline and peak forecasting
          ! in two different models in order
          ! to improve the quality of the prediction
          if (w>=FORECAST_PEAK_THR) then
            PeriodPeak(id) := w-FORECAST_PEAK_THR
            PeriodIPeak(id) := 1
          else
            PeriodPeak(id) := 0
            PeriodIPeak(id) := 0
          end-if
          PeriodDemand(id) := w
        end-if
      end-do
    end-do

    writeln("Loading data to R...")

    ! Copy data to R
    Rset('timeperiods',TimePeriods)
    Reval('MyData <- data.frame(row.names=timeperiods)')
    Rset('MyData$gdp',PeriodGDP)
    Rset('MyData$income',PeriodIncome)
    Rset('MyData$weektype',PeriodWeekType)
    Rset('MyData$daytype',PeriodDayType) 
    Rset('MyData$hourtype',PeriodHourType)

    Rset('MyData$weektypestr',PeriodWeekTypeS)
    Rset('MyData$daytypestr',PeriodDayTypeS)
    Rset('MyData$hourtypestr',PeriodHourTypeS)
    
    ! Fill NAs cell with 0
    Reval('MyData$hourtype[ is.na(MyData$hourtype) ] <- 0')
    Reval('MyData$daytype[ is.na(MyData$daytype) ] <- 0')
    Reval('MyData$weektype[ is.na(MyData$weektype) ] <- 0')

    ! When not predicting we need to set the demand, peak
    ! and indicator peak period in order to run the regression
    if (not predict) then
      Rset('MyData$demand',PeriodDemand)
      Rset('MyData$peak',PeriodPeak)
      Rset('MyData$ipeak',PeriodIPeak)
    end-if
    
    ! Predict
    if (not predict) then
      ! Define our prediction model
    
      ! This model is more advanced and takes into account the income and
      ! gdp of the year, the weektype, daytype and hourtype. It will generates
      ! a set of coefficients that can be used to forecast the demand by taking
      ! into account the forecasted mean temperature, gdp and annual income of
      ! the future periods.
      writeln("Fitting '"+TimePeriods.size+"' time periods...")

      ! Fitting the peak
      writeln("... peak indicators ...")
      Reval('peakModelI <- glm(ipeak ~ weektypestr + daytypestr + hourtypestr, data=MyData, family="binomial")')
      ! Uncomment to get significance of each parameter.
      ! This computation can take minutes.
      ! Rprint('anova(peakModelI,test="Chisq")') ! Print predictors relevance

      ! Fitting the baseline consumption
      ! We assume baseline is a linear response of the gdp/income/peak probability
      writeln("... baseline consumption ...")
      Reval('demandModel <- glm(demand ~ gdp + income + weektype + daytype*hourtype, data=MyData)')
      ! Uncomment to get significance of each parameter.
      ! This computation can take minutes.
      ! Rprint('anova(demandModel,test="Chisq")') ! Print predictors relevance

      ! Fitting the peak consumption
      ! We assume peak consumption is an exponential response of the gdp/daytype/hourtype predictors
      writeln("... peak consumption ...")
      Reval('peakModel <- glm(peak ~ gdp + income + weektype + daytype*hourtype, data=MyData)')
      ! Uncomment to get significance of each parameter.
      ! This computation can take minutes.
      ! Rprint('anova(peakModel,test="Chisq")') ! Print predictors relevance

      ! Save the forecasting model
      ! Uncomment the following line to extract the models to
      ! a file that can be read by R
      ! Reval('save(demandModel, peakModel, peakModelI, file="'+DATADIR+'forecaster.Rdata")') ! Debug

      ! Free some memory
      Reval('rm(MyData)')
    else
      writeln("Predicting '"+TimePeriods.size+"' time periods...")

      ! Predicting
      Reval('MyData$ipeak <- predict(peakModelI, newdata = MyData, type = "response")') ! Peak hours
      Reval('MyData$demand <- predict(demandModel, newdata = MyData)') ! Baseline
      Reval('MyData$peak <- predict(peakModel, newdata = MyData)') ! Peak value

      writeln("==== Summary ===========")
      writeln("= Peak period forecast =")
      Rprint('summary(MyData$ipeak)')

      writeln("= Baseline demand forecast =")
      Rprint('summary(MyData$demand)')

      writeln("= Peak demand forecast =")
      Rprint('summary(MyData$peak)')

      ! Copy the data to Mosel
      Rgetarr('mmarray(MyData,"demand")',PeriodDemandP)
      Rgetarr('mmarray(MyData,"ipeak")',PeriodIPeakP)
      Rgetarr('mmarray(MyData,"peak")',PeriodPeakP)

      ! Clean up R environment
      Reval('rm(MyData)')

      writeln("==== Analysis ==============")
      ! Print some summary
      E := 0 ; _E := 0; __E := 0 ; err := 0
      forall(t in TimePeriods) do
        __E += PeriodAvgEnergy(t)
        _E += PeriodDemand(t) * PeriodAvgEnergy(t)
        E += PeriodDemandP(t) * PeriodAvgEnergy(t)
        err += abs(PeriodDemandP(t) - PeriodDemand(t))*PeriodAvgEnergy(t)
      end-do
      writeln(" === Baseline forecaster === ")
      writeln("Previous year total energy  : ",textfmt(__E/10e6,6,3)," TWh")
      writeln("Original total energy       : ",textfmt(_E/10e6,6,3)," TWh")
      writeln("Forecasted total energy     : ",textfmt(E/10e6,6,3)," TWh")
      if (maxlist(E,_E)>0) then
        writeln("Absolute error              : ",textfmt(round(abs(E-_E)/10e3),6)," GWh")
        writeln("Relative error              : ",textfmt(round(100*abs(E-_E)/maxlist(E,_E)),6)+" %")
      end-if
      writeln("Cumulated period error      : ",textfmt(err/10e3,6)," GWh")

      E := 0 ; _E := 0; __E := 0 ; err := 0
      forall(t in TimePeriods) do
        __E += PeriodAvgEnergy(t)
        _E += PeriodPeak(t) * PeriodAvgEnergy(t)
        E += PeriodPeakP(t) * PeriodAvgEnergy(t)
        err += abs(PeriodPeakP(t) - PeriodPeak(t))*PeriodAvgEnergy(t)
      end-do
      writeln(" === Peak forecaster === ")
      writeln("Previous year total energy  : ",textfmt(__E/10e6,6,3)," TWh")
      writeln("Original total energy       : ",textfmt(_E/10e6,6,3)," TWh")
      writeln("Forecasted total energy     : ",textfmt(E/10e6,6,3)," TWh")
      if (maxlist(E,_E)>0) then
        writeln("Absolute error              : ",textfmt(round(abs(E-_E)/10e3),6)," GWh")
        writeln("Relative error              : ",textfmt(round(100*abs(E-_E)/maxlist(E,_E)),6)+" %")
      end-if      
      writeln("Cumulated period error      : ",textfmt(err/10e3,6)," GWh")

    end-if

    writeln("Done.")

  end-procedure

  ! ************************ Application ************************ !

  ! Define the sets
  AttrGDP := {'YearGDP','YearGDPGrowthRate'} ! Available YearGDP data
  AttrIncome := {'Income','Population','PopulationGrowthRate','IncomeGrowthRate'} ! Available income data
  Periods := 0..47 ! Set of time periods in a day
  HourTypes := 0..47 ! Set of hour types
  DayTypes := {-1,0,1,2,3,4,5} ! Set of day types

  ! Helper tool to convert from data.frame to
  ! simple arrays
  Reval('mmarray <- function(ar,col) { a <- cbind(ar[,col]) ; colnames(a) <- col ; rownames(a) <- rownames(ar) ; a[,col] }')

  ! In this example we have historical data
  ! for the years 2010,2011,2012,2013. We have
  ! gdp and income forecast for 2014 and we
  ! want to derive the energy demand.
  !
  ! Note: today is the 1st of January, 2014
  !
  ! Load historical and forecast data.
  ! All data before 1st of January, 2014 are considered
  ! past data
  initializations from HISTFILE
    HourDemand as  "E_DEMAND" ! HourDemand
    YearGDP as "IND_GDP" ! YearGDP
    YearIncome as "IND_INCOME" ! Income
  end-initializations       
 
  ! **** Run regression (2011-2013) **** !

  first := date('2011-01-01')
  last := date('2013-12-31')
  forecast(first,last,false)

  ! **** Predict demand (2014) **** !

  first := date('2014-02-01')
  last := date('2014-04-30')
  forecast(first,last,true)

end-model


Back to examples browserNext example