FICO
FICO Xpress Optimization Examples Repository
FICO Optimization Community FICO Xpress Optimization Home
Back to examples browserNext example

Energyforecast - regression analysis with R

Description
Forecast of energy demand using regression analysis on historic data.

energyforecast.zip[download all files]

Source Files

Data Files





energyforecast.mos

(!******************************************************
   Mosel R Example Problems
   ========================

   file energyforecast.mos
   ```````````````````````
   Forecast of energy demand using regression analysis
   on historic data 
   
  (c) 2015 Fair Isaac Corporation
      author: S.Lannez, Jul. 2015
*******************************************************!)
model forecast
  options noimplicit
  uses "mmsheet"   ! Spreadsheet I/O
  uses "mmsystem"
  uses "r"         ! Load R interface

  parameters
    DATADIR=""

    ! Historical data
    HISTFILE=DATADIR+'energyforecast.dat' 

    ! Install R packages on the-fly
    R_SETUP = false

    ! The peak threshold. If the ratio between
    ! the daily demand and the previous year average
    ! daily demand is greater than the following threshold
    ! then the period is considered a peak period.
    FORECAST_PEAK_THR = 2.5

    ! The peak period probability threshold. If
    ! the predicted probability of being in a
    ! peak period is greater than the following
    ! threshold, then the period is considered
    ! to be a peak period.
    FORECAST_PEAK_PROB = 0.5

  end-parameters

  ! **** Historical data **** !

  declarations
    Days: set of string            ! Days
    Years: set of integer          ! Years
    Periods: range                 ! Time periods

    HourTypes: set of integer      ! Hour types
    DayTypes: set of integer       ! Day types
    WeekTypes: set of integer      ! Week types

    AttrGDP: set of string         ! YearGDP attributes
    AttrIncome: set of string      ! Income attributes
    
    HourDemand: dynamic array(Days,Periods) of real     ! Historical demand
    YearGDP: dynamic array(Years,AttrGDP) of real       ! Historical YearGDP
    YearIncome: dynamic array(Years,AttrIncome) of real ! Historical income

    first,last: date
    tnow: datetime
  end-declarations

  ! **** Return the hour type ****
  function gethourtype(t: integer): integer
    declarations
      h=t/48 ! Input data has 48 time periods
    end-declarations
    returned := floor(h*HourTypes.size)
  end-function

  ! **** Return the week type ****
  function getweektype(d: date): integer      
    returned := getdaynum(date(d)) div 7
  end-function

  ! **** Return the day type ****
  function getdaytype(d: date): integer
    declarations
      daynum: integer
      Summer=91..274
      SummerHoliday=268..274
      WinterHoliday=84..90
    end-declarations
    returned := 1 ! unknown
    daynum := getdaynum(d)
    if daynum in Summer then ! Summer
      if daynum in SummerHoliday then
        returned := 2 ! Summer public holiday
      else
        case getweekday(d) of
          1,2,3,4,5: returned := 0 ! Summer week
          6,7: returned := 1 ! Summer weekend
        end-case
      end-if
    else ! Winter
      if daynum in WinterHoliday then
        returned := 5 ! Winter public holiday
      else
        case getweekday(d) of
          1,2,3,4,5: returned := 3 ! Winter week
          6,7: returned := 4 ! Winter weekend
        end-case
      end-if
    end-if 
  end-function

  ! **** Run a regression analysis of forecast the energy consumption ****
  procedure forecast(first,last: date,predict:boolean)
    ! Sets the parameters of the forecast
    ! function
    declarations
      TimePeriods,_TimePeriods: set of string
      
      ! Derived data
      PeriodAvgEnergy: dynamic array(_TimePeriods) of real ! Average daily energy of the year

      ! Observed data or forecasted from external tool
      PeriodGDP: dynamic array(TimePeriods) of real ! Current year GDP
      PeriodIncome: dynamic array(TimePeriods) of real ! Current year income
      
      PeriodWeekType: dynamic array(TimePeriods,WeekTypes) of boolean ! Day type
      PeriodDayType: dynamic array(TimePeriods,DayTypes) of boolean ! Day type
      PeriodHourType: dynamic array(TimePeriods,HourTypes) of boolean ! Hour type

      PeriodWeekTypeS: dynamic array(TimePeriods) of string ! String representation of week type
      PeriodDayTypeS: dynamic array(TimePeriods) of string ! String representation of day type
      PeriodHourTypeS: dynamic array(TimePeriods) of string ! String representation of hour type
      
      ! Observed demand
      PeriodDemand: dynamic array(TimePeriods) of real ! baseline demand
      PeriodPeak: dynamic array(TimePeriods) of real ! peak demand
      PeriodIPeak: dynamic array(TimePeriods) of real ! peak period

      ! Period forecast
      PeriodDemandP: dynamic array(TimePeriods) of real ! Baseline energy demand forecast
      PeriodPeakP: dynamic array(TimePeriods) of real ! Peak energy demand forecast
      PeriodIPeakP: dynamic array(TimePeriods) of real ! Probability of being in a peak

      ! Misc
      id,newdata: string
      i,y: integer
      v,w: real
      E,_E,__E,err,a,b: real     
      Calendar: dynamic array(Years,Days,Periods) of string
    end-declarations    

    writeln('-'*60)
    if (predict) then
      writeln('Forecasting...')
    else
      writeln('Running regression...')
      if (R_SETUP) then
        Reval('install.packages("mlogit",repos="http://cran.r-project.org")')
      end-if
    end-if

    ! Create lookup array
    writeln("Creating calendar...")
    forall(d in Days, p in Periods | exists(HourDemand(d,p))) do
      Calendar(getyear(date(d)),d,p) := d+"-"+if(p>=10,""+p,"0"+p)
    end-do

    ! Compute previous year average consumed energy.
    ! The forecast function is based on energy demand growth
    ! and needs relative demand increase from year-to-year
    forall(yy in Years) do
      i := 0
      E := sum(d in Days, p in Periods | exists(Calendar(yy-1,d,p)),i as counter) HourDemand(d,p)
      if (i>0) then
        E := E/i
      else
        E := 0
      end-if
      forall(d in Days, p in Periods | exists(Calendar(yy,d,p))) do
        PeriodAvgEnergy(Calendar(yy,d,p)) := E
      end-do
    end-do    

    ! Populate the predictors used in the regression or
    ! during the prediction.
    writeln("Populating predictors...")
    forall(d in Days | date(d)>=first and date(d)<=last) do
      y := getyear(date(d))
      forall (p in Periods | exists(Calendar(y,d,p))) do
        id := Calendar(y,d,p)

        ! Continous input
        PeriodGDP(id) := YearGDP(y,'YearGDPGrowthRate')
        PeriodIncome(id) := YearIncome(y,'Income')

        ! Continuous input
        PeriodWeekType(id,getweektype(date(d))) := true
        PeriodDayType(id,getdaytype(date(d))) := true
        PeriodHourType(id,gethourtype(p)) := true
        
        ! Discrete input
        PeriodWeekTypeS(id) := ""+getweektype(date(p))
        PeriodDayTypeS(id) := ""+getdaytype(date(p))
        PeriodHourTypeS(id) := ""+gethourtype(p)

        ! Value to be forecasted
        if (exists(HourDemand(d,p))) then
          w := HourDemand(d,p) / PeriodAvgEnergy(id)
          ! We split baseline and peak forecasting
          ! in two different models in order
          ! to improve the quality of the prediction
          if (w>=FORECAST_PEAK_THR) then
            PeriodPeak(id) := w-FORECAST_PEAK_THR
            PeriodIPeak(id) := 1
          else
            PeriodPeak(id) := 0
            PeriodIPeak(id) := 0
          end-if
          PeriodDemand(id) := w
        end-if
      end-do
    end-do

    writeln("Loading data to R...")

    ! Copy data to R
    Rset('timeperiods',TimePeriods)
    Reval('MyData <- data.frame(row.names=timeperiods)')
    Rset('MyData$gdp',PeriodGDP)
    Rset('MyData$income',PeriodIncome)
    Rset('MyData$weektype',PeriodWeekType)
    Rset('MyData$daytype',PeriodDayType) 
    Rset('MyData$hourtype',PeriodHourType)

    Rset('MyData$weektypestr',PeriodWeekTypeS)
    Rset('MyData$daytypestr',PeriodDayTypeS)
    Rset('MyData$hourtypestr',PeriodHourTypeS)
    
    ! Fill NAs cell with 0
    Reval('MyData$hourtype[ is.na(MyData$hourtype) ] <- 0')
    Reval('MyData$daytype[ is.na(MyData$daytype) ] <- 0')
    Reval('MyData$weektype[ is.na(MyData$weektype) ] <- 0')

    ! When not predicting we need to set the demand, peak
    ! and indicator peak period in order to run the regression
    if (not predict) then
      Rset('MyData$demand',PeriodDemand)
      Rset('MyData$peak',PeriodPeak)
      Rset('MyData$ipeak',PeriodIPeak)
    end-if
    
    ! Predict
    if (not predict) then
      ! Define our prediction model
    
      ! This model is more advanced and takes into account the income and
      ! gdp of the year, the weektype, daytype and hourtype. It will generates
      ! a set of coefficients that can be used to forecast the demand by taking
      ! into account the forecasted mean temperature, gdp and annual income of
      ! the future periods.
      writeln("Fitting '"+TimePeriods.size+"' time periods...")

      ! Fitting the peak
      writeln("... peak indicators ...")
      Reval('peakModelI <- glm(ipeak ~ weektypestr + daytypestr + hourtypestr, data=MyData, family="binomial")')
      ! Uncomment to get significance of each parameter.
      ! This computation can take minutes.
      ! Rprint('anova(peakModelI,test="Chisq")') ! Print predictors relevance

      ! Fitting the baseline consumption
      ! We assume baseline is a linear response of the gdp/income/peak probability
      writeln("... baseline consumption ...")
      Reval('demandModel <- glm(demand ~ gdp + income + weektype + daytype*hourtype, data=MyData)')
      ! Uncomment to get significance of each parameter.
      ! This computation can take minutes.
      ! Rprint('anova(demandModel,test="Chisq")') ! Print predictors relevance

      ! Fitting the peak consumption
      ! We assume peak consumption is an exponential response of the gdp/daytype/hourtype predictors
      writeln("... peak consumption ...")
      Reval('peakModel <- glm(peak ~ gdp + income + weektype + daytype*hourtype, data=MyData)')
      ! Uncomment to get significance of each parameter.
      ! This computation can take minutes.
      ! Rprint('anova(peakModel,test="Chisq")') ! Print predictors relevance

      ! Save the forecasting model
      ! Uncomment the following line to extract the models to
      ! a file that can be read by R
      ! Reval('save(demandModel, peakModel, peakModelI, file="'+DATADIR+'forecaster.Rdata")') ! Debug

      ! Free some memory
      Reval('rm(MyData)')
    else
      writeln("Predicting '"+TimePeriods.size+"' time periods...")

      ! Predicting
      Reval('MyData$ipeak <- predict(peakModelI, newdata = MyData, type = "response")') ! Peak hours
      Reval('MyData$demand <- predict(demandModel, newdata = MyData)') ! Baseline
      Reval('MyData$peak <- predict(peakModel, newdata = MyData)') ! Peak value

      writeln("==== Summary ===========")
      writeln("= Peak period forecast =")
      Rprint('summary(MyData$ipeak)')

      writeln("= Baseline demand forecast =")
      Rprint('summary(MyData$demand)')

      writeln("= Peak demand forecast =")
      Rprint('summary(MyData$peak)')

      ! Copy the data to Mosel
      Rgetarr('mmarray(MyData,"demand")',PeriodDemandP)
      Rgetarr('mmarray(MyData,"ipeak")',PeriodIPeakP)
      Rgetarr('mmarray(MyData,"peak")',PeriodPeakP)

      ! Clean up R environment
      Reval('rm(MyData)')

      writeln("==== Analysis ==============")
      ! Print some summary
      E := 0 ; _E := 0; __E := 0 ; err := 0
      forall(t in TimePeriods) do
        __E += PeriodAvgEnergy(t)
        _E += PeriodDemand(t) * PeriodAvgEnergy(t)
        E += PeriodDemandP(t) * PeriodAvgEnergy(t)
        err += abs(PeriodDemandP(t) - PeriodDemand(t))*PeriodAvgEnergy(t)
      end-do
      writeln(" === Baseline forecaster === ")
      writeln("Previous year total energy  : ",textfmt(__E/10e6,6,3)," TWh")
      writeln("Original total energy       : ",textfmt(_E/10e6,6,3)," TWh")
      writeln("Forecasted total energy     : ",textfmt(E/10e6,6,3)," TWh")
      if (maxlist(E,_E)>0) then
        writeln("Absolute error              : ",textfmt(round(abs(E-_E)/10e3),6)," GWh")
        writeln("Relative error              : ",textfmt(round(100*abs(E-_E)/maxlist(E,_E)),6)+" %")
      end-if
      writeln("Cumulated period error      : ",textfmt(err/10e3,6)," GWh")

      E := 0 ; _E := 0; __E := 0 ; err := 0
      forall(t in TimePeriods) do
        __E += PeriodAvgEnergy(t)
        _E += PeriodPeak(t) * PeriodAvgEnergy(t)
        E += PeriodPeakP(t) * PeriodAvgEnergy(t)
        err += abs(PeriodPeakP(t) - PeriodPeak(t))*PeriodAvgEnergy(t)
      end-do
      writeln(" === Peak forecaster === ")
      writeln("Previous year total energy  : ",textfmt(__E/10e6,6,3)," TWh")
      writeln("Original total energy       : ",textfmt(_E/10e6,6,3)," TWh")
      writeln("Forecasted total energy     : ",textfmt(E/10e6,6,3)," TWh")
      if (maxlist(E,_E)>0) then
        writeln("Absolute error              : ",textfmt(round(abs(E-_E)/10e3),6)," GWh")
        writeln("Relative error              : ",textfmt(round(100*abs(E-_E)/maxlist(E,_E)),6)+" %")
      end-if      
      writeln("Cumulated period error      : ",textfmt(err/10e3,6)," GWh")

    end-if

    writeln("Done.")

  end-procedure

  ! ************************ Application ************************ !

  ! Define the sets
  AttrGDP := {'YearGDP','YearGDPGrowthRate'} ! Available YearGDP data
  AttrIncome := {'Income','Population','PopulationGrowthRate','IncomeGrowthRate'} ! Available income data
  Periods := 0..47 ! Set of time periods in a day
  HourTypes := 0..47 ! Set of hour types
  DayTypes := {-1,0,1,2,3,4,5} ! Set of day types

  ! Helper tool to convert from data.frame to
  ! simple arrays
  Reval('mmarray <- function(ar,col) { a <- cbind(ar[,col]) ; colnames(a) <- col ; rownames(a) <- rownames(ar) ; a[,col] }')

  ! In this example we have historical data
  ! for the years 2010,2011,2012,2013. We have
  ! gdp and income forecast for 2014 and we
  ! want to derive the energy demand.
  !
  ! Note: today is the 1st of January, 2014
  !
  ! Load historical and forecast data.
  ! All data before 1st of January, 2014 are considered
  ! past data
  initializations from HISTFILE
    HourDemand as  "E_DEMAND" ! HourDemand
    YearGDP as "IND_GDP" ! YearGDP
    YearIncome as "IND_INCOME" ! Income
  end-initializations       
 
  ! **** Run regression (2011-2013) **** !

  first := date('2011-01-01')
  last := date('2013-12-31')
  forecast(first,last,false)

  ! **** Predict demand (2014) **** !

  first := date('2014-02-01')
  last := date('2014-04-30')
  forecast(first,last,true)

end-model


Back to examples browserNext example