Chapter 4 Statistical Background For TS Analysis & Forecasting

rm(list = ls())
setwd("C:/Users/Tejendra/Desktop/FoldersOnDesktop/UdemyCourse/Section4")
require(tidyverse)
require(tidymodels)
require(data.table)
require(tidyposterior)
require(tsibble)  #tsibble for time series based on tidy principles
require(fable)  #for forecasting based on tidy principles
require(ggfortify)  #for plotting timeseries
require(forecast)  #for forecast function
require(tseries)
require(chron)
require(lubridate)
require(directlabels)
require(zoo)
require(lmtest)

setwd("C:/Users/Tejendra/Desktop/FoldersOnDesktop/UdemyCourse/Section4")
#loading the data
lynx

## Time Series:
## Start = 1821 
## End = 1934 
## Frequency = 1 
##   [1]  269  321  585  871 1475 2821 3928 5943 4950 2577  523   98  184  279
##  [15]  409 2285 2685 3409 1824  409  151   45   68  213  546 1033 2129 2536
##  [29]  957  361  377  225  360  731 1638 2725 2871 2119  684  299  236  245
##  [43]  552 1623 3311 6721 4254  687  255  473  358  784 1594 1676 2251 1426
##  [57]  756  299  201  229  469  736 2042 2811 4431 2511  389   73   39   49
##  [71]   59  188  377 1292 4031 3495  587  105  153  387  758 1307 3465 6991
##  [85] 6313 3794 1836  345  382  808 1388 2713 3800 3091 2985 3790  674   81
##  [99]   80  108  229  399 1132 2432 3574 2935 1537  529  485  662 1000 1590
## [113] 2657 3396

#looking at the data
time(lynx) #to look at the time stamps of the time series data

## Time Series:
## Start = 1821 
## End = 1934 
## Frequency = 1 
##   [1] 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834
##  [15] 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848
##  [29] 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862
##  [43] 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876
##  [57] 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
##  [71] 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904
##  [85] 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
##  [99] 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
## [113] 1933 1934

length(lynx) #number of observations in the time series data

## [1] 114

tail(lynx) # last 6 observations

## Time Series:
## Start = 1929 
## End = 1934 
## Frequency = 1 
## [1]  485  662 1000 1590 2657 3396

mean(lynx); median(lynx) #simple descriptive statistics of the data

## [1] 1538.018

## [1] 771

plot(lynx) #see how time series is behaving

theme_set(theme_minimal())
autoplot(lynx) +
  xlab("") + ylab("") +
  ggtitle("Time Series Plot of the lynx data") +
  theme(plot.title = element_text(hjust = 0.5))  #for centering the text

sort(lynx)

##   [1]   39   45   49   59   68   73   80   81   98  105  108  151  153  184
##  [15]  188  201  213  225  229  229  236  245  255  269  279  299  299  321
##  [29]  345  358  360  361  377  377  382  387  389  399  409  409  469  473
##  [43]  485  523  529  546  552  585  587  662  674  684  687  731  736  756
##  [57]  758  784  808  871  957 1000 1033 1132 1292 1307 1388 1426 1475 1537
##  [71] 1590 1594 1623 1638 1676 1824 1836 2042 2119 2129 2251 2285 2432 2511
##  [85] 2536 2577 2657 2685 2713 2725 2811 2821 2871 2935 2985 3091 3311 3396
##  [99] 3409 3465 3495 3574 3790 3794 3800 3928 4031 4254 4431 4950 5943 6313
## [113] 6721 6991

sort(lynx)[c(57,58)]

## [1] 758 784

quantile(lynx) #to give the quantiles of the time series

##      0%     25%     50%     75%    100% 
##   39.00  348.25  771.00 2566.75 6991.00

quantile(lynx, 
         prob = seq(0, 1, length = 11), #number of categories
         type = 5) #deciles

##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90% 
##   39.0  146.7  259.2  380.5  546.6  771.0 1470.1 2165.6 2818.0 3790.4 
##   100% 
## 6991.0

### simple forecast methods

set.seed(95)
myts <- ts(rnorm(200), start = (1818))
plot(myts)

autoplot(myts) +
  xlab("") + ylab("") +
  ggtitle("Time Series Plot of the custome time series data") +
  theme(plot.title = element_text(hjust = 0.5))  #for centering the text

meanm <- meanf(myts, h=20) #forecast by taking the mean of the values
naivem <- naive(myts, h=20) #forecast by taking the last observation forward
driftm <- rwf(myts, h=20, drift = T) #forecast by drift model

plot(meanm, plot.conf = F, main = "")

## Warning in plot.window(xlim, ylim, log, ...): "plot.conf" is not a
## graphical parameter

## Warning in title(main = main, xlab = xlab, ylab = ylab, ...): "plot.conf"
## is not a graphical parameter

## Warning in axis(1, ...): "plot.conf" is not a graphical parameter

## Warning in axis(2, ...): "plot.conf" is not a graphical parameter

## Warning in box(...): "plot.conf" is not a graphical parameter

lines(naivem$mean, col=123, lwd = 2)
lines(driftm$mean, col=22, lwd = 2)
legend("topleft",lty=1,col=c(4,123,22),
       legend=c("Mean method","Naive method","Drift Method"))

autoplot(myts) + 
  autolayer(meanm$mean, series = "Mean Method") +
  autolayer(naivem$mean, series = "Naive Method") +
  autolayer(driftm$mean, series = "Drift Method") +
  ggtitle("Forecast by different methods") + 
  xlab("Time (year)") + ylab("Value") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

###### accuracy and model comparison
#subset the time series. Divide into training and the testing set. Forecast on the training time series. Then use accuracy() function to check how well the forecast performs. 
set.seed(95)
myts <- ts(rnorm(200), start = (1818))
mytstrain <- window(myts, start = 1818, end = 1988) #subset the time series using window function
plot(mytstrain)

autoplot(mytstrain) +
  ggtitle("Time Series Example: Subsetting using windows()") + 
  xlab("Time (year)") + ylab("Value") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

meanm <- meanf(mytstrain, h=30)
naivem <- naive(mytstrain, h=30)
driftm <- rwf(mytstrain, h=30, drift = T)

mytstest <- window(myts, start = 1988)

accuracy(meanm, mytstest) #accuracy function allows one to compare the performance of two models

##                         ME     RMSE       MAE       MPE     MAPE      MASE
## Training set  1.407307e-17 1.003956 0.8164571  77.65393 133.4892 0.7702074
## Test set     -2.459828e-01 1.138760 0.9627571 100.70356 102.7884 0.9082199
##                   ACF1 Theil's U
## Training set 0.1293488        NA
## Test set     0.2415939  0.981051

accuracy(naivem, mytstest)

##                         ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -0.0002083116 1.323311 1.060048 -152.73569 730.9655 1.000000
## Test set      0.8731935861 1.413766 1.162537   86.29346 307.9891 1.096683
##                    ACF1 Theil's U
## Training set -0.4953144        NA
## Test set      0.2415939  2.031079

accuracy(driftm, mytstest)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -1.957854e-17 1.323311 1.060041 -152.64988 730.8626 0.9999931
## Test set      8.763183e-01 1.415768 1.163981   85.96496 308.7329 1.0980447
##                    ACF1 Theil's U
## Training set -0.4953144        NA
## Test set      0.2418493   2.03317

###### Residuals

set.seed(95)
myts <- ts(rnorm(200), start = (1818))
plot(myts)

autoplot(myts) +
  ggtitle("Time Series Example") + 
  xlab("Time (year)") + ylab("Value") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

meanm <- meanf(myts, h=20)
naivem <- naive(myts, h=20)
driftm <- rwf(myts, h=20, drift = T)

var(meanm$residuals)

## [1] 1.053807

mean(meanm$residuals)

## [1] -5.95498e-18

mean(naivem$residuals)

## [1] NA

naivwithoutNA <- naivem$residuals
naivwithoutNA <- naivwithoutNA[2:200] #naive and drift models need one observation to start with. Thus the first observation is NA. 
var(naivwithoutNA)

## [1] 1.798592

mean(naivwithoutNA)

## [1] 0.006605028

driftwithoutNA <- driftm$residuals
driftwithoutNA <- driftwithoutNA[2:200]
var(driftwithoutNA)

## [1] 1.798592

mean(driftwithoutNA)

## [1] -4.502054e-17

hist(driftm$residuals)

ggplot() + 
  geom_histogram(aes(x = driftm$residuals)) +
  ggtitle("Histogram of residuals") + 
  xlab("Residuals") + ylab("Frequency") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 1 rows containing non-finite values (stat_bin).

acf(driftwithoutNA) #acf is used to identify the moving average part of the ARIMA model and pacf is used to identify the AR part of the ARIMA model
autoplot(acf(driftwithoutNA)) +
  ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") + 
  xlab("Lag") + ylab("ACF") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

### Stationarity
set.seed(2019)
x <- rnorm(1000) # no unit-root, stationary

adf.test(x) # augmented Dickey Fuller Test Augmented Dickey-Fuller test removes autocorrelation and tests for stationarity

## Warning in adf.test(x): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  x
## Dickey-Fuller = -10.647, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary

plot(nottem) # Let s see the nottem dataset

autoplot(nottem) +
  ggtitle("Plot of `nottem' data") + 
  xlab("") + ylab("") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

plot(decompose(nottem))

autoplot(decompose(nottem)) +
  ggtitle("Plot of Decomposed `nottem' data") + 
  xlab("") + ylab("") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

adf.test(nottem)

## Warning in adf.test(nottem): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  nottem
## Dickey-Fuller = -12.998, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary

y <- diffinv(x) # non-stationary

plot(y)

ggplot() +
  geom_line(aes(y = y, x= index(y))) +
  ggtitle("Plot of Decomposed `nottem' data") + 
  xlab("") + ylab("") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

adf.test(y)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  y
## Dickey-Fuller = -2.6432, Lag order = 9, p-value = 0.3061
## alternative hypothesis: stationary

### Autocorrelation

# Durbin Watson test for autocorrelation
length(lynx); head(lynx); head(lynx[-1]); head(lynx[-114]) # check the required traits for the test. chopping the first and last observation so that we get the first lag difference.

## [1] 114

## Time Series:
## Start = 1821 
## End = 1826 
## Frequency = 1 
## [1]  269  321  585  871 1475 2821

## [1]  321  585  871 1475 2821 3928

## [1]  269  321  585  871 1475 2821

dwtest(lynx[-114] ~ lynx[-1])

## 
##  Durbin-Watson test
## 
## data:  lynx[-114] ~ lynx[-1]
## DW = 1.1296, p-value = 1.148e-06
## alternative hypothesis: true autocorrelation is greater than 0

set.seed(2019)
x = rnorm(700) # Lets take a look at random numbers
dwtest(x[-700] ~ x[-1])

## 
##  Durbin-Watson test
## 
## data:  x[-700] ~ x[-1]
## DW = 1.996, p-value = 0.4789
## alternative hypothesis: true autocorrelation is greater than 0

length(nottem) # and the nottem dataset

## [1] 240

dwtest(nottem[-240] ~ nottem[-1])

## 
##  Durbin-Watson test
## 
## data:  nottem[-240] ~ nottem[-1]
## DW = 1.0093, p-value = 5.097e-15
## alternative hypothesis: true autocorrelation is greater than 0

### ACF and PACF
acf(lynx, lag.max = 20); pacf(lynx, lag.max =20, plot = FALSE)

## 
## Partial autocorrelations of series 'lynx', by lag
## 
##      1      2      3      4      5      6      7      8      9     10 
##  0.711 -0.588 -0.039 -0.250 -0.094 -0.052  0.119  0.301  0.055 -0.081 
##     11     12     13     14     15     16     17     18     19     20 
## -0.089 -0.040 -0.099 -0.014 -0.113 -0.108 -0.006  0.116 -0.016 -0.018

pacf(lynx, lag.max =20, plot = TRUE)

autoplot(acf(lynx, lag.max = 20))

autoplot(pacf(lynx, lag.max =20)) #in acf() first correlation is with itself, so we can ignore it

# lag.max for numbers of lags to be calculated
# plot = F to suppress plotting
set.seed(2019)
acf(rnorm(500), lag.max = 20)

autoplot(acf(rnorm(500), lag.max = 20))

## Exercise messy data
set.seed(54)
myts <- ts(c(rnorm(50, 34, 10), 
             rnorm(67, 7, 1), 
             runif(23, 3, 14)))

#1. Plot the data and examine it. 
autoplot(myts) +
  ggtitle("Plot of the exercise data") + 
  xlab("Index") + ylab("Value") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

#2. three forecasting models
meanm <- meanf(myts, h = 10)
naivem <- naive(myts, h= 10)
driftm <- rwf(myts, h =10, drift = TRUE)

#3. plot forecasts from three forecasting models
forecast::autoplot(myts,
         PI = TRUE,
         flwd = 2) + 
  autolayer(meanm$mean, series = "Mean Method", PI = TRUE) +
  autolayer(naivem$mean, series = "Naive Method", PI = TRUE) +
  autolayer(driftm$mean, series = "Drift Method", PI = TRUE) +
  ggtitle("Forecast by different methods") + 
  xlab("Time (Index)") + ylab("Value") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

## Warning: Ignoring unknown parameters: PI, flwd

## Warning: Ignoring unknown parameters: PI

## Warning: Ignoring unknown parameters: PI

## Warning: Ignoring unknown parameters: PI

#4. Which model looks most promising 

#5. Get the error measures and compare them
var(meanm$residuals)

## [1] 211.0364

mean(meanm$residuals)

## [1] -1.591522e-15

mean(naivem$residuals)

## [1] NA

naivwithoutNA <- naivem$residuals
naivwithoutNA <- naivwithoutNA[2:140] #naive and drift models need one observation to start with. Thus the first observation is NA. 
var(naivwithoutNA)

## [1] 84.53723

mean(naivwithoutNA)

## [1] -0.3435748

driftwithoutNA <- driftm$residuals
driftwithoutNA <- driftwithoutNA[2:140]
var(driftwithoutNA)

## [1] 84.53723

mean(driftwithoutNA)

## [1] 2.648343e-15

hist(driftm$residuals)

ggplot() + 
  geom_histogram(aes(x = driftm$residuals)) +
  ggtitle("Histogram of residuals") + 
  xlab("Residuals") + ylab("Frequency") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 1 rows containing non-finite values (stat_bin).

acf(driftwithoutNA)
autoplot(acf(driftwithoutNA)) +
  ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") + 
  xlab("Lag") + ylab("ACF") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

#6 check all relevant statistical traits
mytstrain <- window(myts, start = 1, end = 112 )
mytstest <- window(myts, start = 113)

meanma <- meanf(mytstrain, h=28)
naivema <- naive(mytstrain, h=28)
driftma <- rwf(mytstrain, h=28, drift = T)
accuracy(meanma, mytstest)

##                         ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -6.408719e-16 15.31467 13.94736  -84.86989 120.4406 2.231924
## Test set     -1.125187e+01 11.61073 11.25187 -180.27778 180.2778 1.800578
##                    ACF1 Theil's U
## Training set  0.7632991        NA
## Test set     -0.1703445  2.002248

accuracy(naivema, mytstest)

##                      ME      RMSE      MAE        MPE     MAPE      MASE
## Training set -0.4131666 10.024449 6.249032 -11.909758 33.36297 1.0000000
## Test set      0.9663084  3.022963 2.482045  -1.869095 33.51426 0.3971888
##                    ACF1 Theil's U
## Training set -0.4901263        NA
## Test set     -0.1703445 0.6497522

accuracy(driftma, mytstest)

##                         ME      RMSE      MAE       MPE     MAPE     MASE
## Training set -1.624594e-17 10.015931 6.265918 -7.901602 33.29565 1.002702
## Test set      6.957224e+00  8.172915 6.974530 86.321252 86.72796 1.116098
##                    ACF1 Theil's U
## Training set -0.4901263        NA
## Test set      0.4327471   1.62159

shapiro.test(naivem$residuals) # test for normal distribution, normal distr can be rejected

## 
##  Shapiro-Wilk normality test
## 
## data:  naivem$residuals
## W = 0.89587, p-value = 2.061e-08

acf(naivem$residuals[2:140]) # autocorrelation test, autocorrelation present
autoplot(acf(naivem$residuals[2:140])) +
  ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") + 
  xlab("Lag") + ylab("ACF") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "bottom")

#Repeating with the log of the data
 myts <- log(myts)
 #1. Plot the data and examine it. 
 autoplot(myts) +
   ggtitle("Plot of the exercise data") + 
   xlab("Index") + ylab("Value") +
   theme_bw() +
   theme(plot.title = element_text(hjust = 0.5),
         legend.position = "bottom")

 #2. three forecasting models
 meanm <- meanf(myts, h = 10)
 naivem <- naive(myts, h= 10)
 driftm <- rwf(myts, h =10, drift = TRUE)
 
 #3. plot forecasts from three forecasting models
 forecast::autoplot(myts,
                    PI = TRUE,
                    flwd = 2) + 
   autolayer(meanm$mean, series = "Mean Method", PI = TRUE) +
   autolayer(naivem$mean, series = "Naive Method", PI = TRUE) +
   autolayer(driftm$mean, series = "Drift Method", PI = TRUE) +
   ggtitle("Forecast by different methods") + 
   xlab("Time (Index)") + ylab("Value") +
   theme_bw() +
   theme(plot.title = element_text(hjust = 0.5),
         legend.position = "bottom")

## Warning: Ignoring unknown parameters: PI, flwd

## Warning: Ignoring unknown parameters: PI

## Warning: Ignoring unknown parameters: PI

## Warning: Ignoring unknown parameters: PI

 #4. Which model looks most promising 
 
 #5. Get the error measures and compare them
 var(meanm$residuals)

## [1] 0.6290444

 mean(meanm$residuals)

## [1] 1.510731e-16

 mean(naivem$residuals)

## [1] NA

 naivwithoutNA <- naivem$residuals
 naivwithoutNA <- naivwithoutNA[2:140] #naive and drift models need one observation to start with. Thus the first observation is NA. 
 var(naivwithoutNA)

## [1] 0.2067372

 mean(naivwithoutNA)

## [1] -0.01684688

 driftwithoutNA <- driftm$residuals
 driftwithoutNA <- driftwithoutNA[2:140]
 var(driftwithoutNA)

## [1] 0.2067372

 mean(driftwithoutNA)

## [1] -4.472841e-17

 hist(driftm$residuals)

 ggplot() + 
   geom_histogram(aes(x = driftm$residuals)) +
   ggtitle("Histogram of residuals") + 
   xlab("Residuals") + ylab("Frequency") +
   theme_bw() +
   theme(plot.title = element_text(hjust = 0.5),
         legend.position = "bottom")

## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 1 rows containing non-finite values (stat_bin).

 acf(driftwithoutNA)
 autoplot(acf(driftwithoutNA)) +
   ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") + 
   xlab("Lag") + ylab("ACF") +
   theme_bw() +
   theme(plot.title = element_text(hjust = 0.5),
         legend.position = "bottom")

 #6 check all relevant statistical traits
 mytstrain <- window(myts, start = 1, end = 112 )
 mytstest <- window(myts, start = 113)
 
 meanma <- meanf(mytstrain, h=28)
 naivema <- naive(mytstrain, h=28)
 driftma <- rwf(mytstrain, h=28, drift = T)
 accuracy(meanma, mytstest)

##                         ME      RMSE       MAE        MPE     MAPE
## Training set  1.846038e-16 0.8163346 0.7714004  -9.839234 31.23484
## Test set     -6.198835e-01 0.7307518 0.6204553 -36.737960 36.75971
##                  MASE       ACF1 Theil's U
## Training set 2.684607  0.8615304        NA
## Test set     2.159291 -0.2306541 0.9716032

 accuracy(naivema, mytstest)

##                       ME      RMSE       MAE       MPE     MAPE     MASE
## Training set -0.01824047 0.4071185 0.2873421 -1.870232 11.33626 1.000000
## Test set      0.05893104 0.3914276 0.3316489 -1.328849 17.76316 1.154196
##                    ACF1 Theil's U
## Training set -0.4450652        NA
## Test set     -0.2306541  0.577168

 accuracy(driftma, mytstest)

##                         ME      RMSE       MAE       MPE     MAPE     MASE
## Training set -9.802537e-17 0.4067096 0.2876207 -1.103181 11.31901 1.000970
## Test set      3.234179e-01 0.5206086 0.4411263 12.524858 21.27240 1.535196
##                    ACF1 Theil's U
## Training set -0.4450652        NA
## Test set     -0.1049056 0.7824451

 shapiro.test(naivem$residuals) # test for normal distribution, normal distr can be rejected

## 
##  Shapiro-Wilk normality test
## 
## data:  naivem$residuals
## W = 0.961, p-value = 0.0005413

 acf(naivem$residuals[2:140]) # autocorrelation test, autocorrelation present
 autoplot(acf(naivem$residuals[2:140])) +
   ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") + 
   xlab("Lag") + ylab("ACF") +
   theme_bw() +
   theme(plot.title = element_text(hjust = 0.5),
         legend.position = "bottom")

#=================================================================================== 
 set.seed(54)
 myts <- ts(c(rnorm(50, 34, 10), 
              rnorm(67, 7, 1), 
              runif(23, 3, 14)))
plot(myts)

library(forecast)
meanm <- meanf(myts, h=10)
naivem <- naive(myts, h=10)
driftm <- rwf(myts, h=10, drift = T)

plot(meanm, main = "", bty = "l")
lines(naivem$mean, col=123, lwd = 2)
lines(driftm$mean, col=22, lwd = 2)
legend("bottomleft",lty=1,col=c(4,123,22), bty = "n", cex = 0.75,
       legend=c("Mean method","Naive method","Drift Method"))

length(myts)

## [1] 140

mytstrain <- window(myts, start = 1, end = 112 )
mytstest <- window(myts, start = 113)

meanma <- meanf(mytstrain, h=28)
naivema <- naive(mytstrain, h=28)
driftma <- rwf(mytstrain, h=28, drift = T)

accuracy(meanma, mytstest)

##                         ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -6.408719e-16 15.31467 13.94736  -84.86989 120.4406 2.231924
## Test set     -1.125187e+01 11.61073 11.25187 -180.27778 180.2778 1.800578
##                    ACF1 Theil's U
## Training set  0.7632991        NA
## Test set     -0.1703445  2.002248

accuracy(naivema, mytstest)

##                      ME      RMSE      MAE        MPE     MAPE      MASE
## Training set -0.4131666 10.024449 6.249032 -11.909758 33.36297 1.0000000
## Test set      0.9663084  3.022963 2.482045  -1.869095 33.51426 0.3971888
##                    ACF1 Theil's U
## Training set -0.4901263        NA
## Test set     -0.1703445 0.6497522

accuracy(driftma, mytstest)

##                         ME      RMSE      MAE       MPE     MAPE     MASE
## Training set -1.624594e-17 10.015931 6.265918 -7.901602 33.29565 1.002702
## Test set      6.957224e+00  8.172915 6.974530 86.321252 86.72796 1.116098
##                    ACF1 Theil's U
## Training set -0.4901263        NA
## Test set      0.4327471   1.62159

plot(naivem$residuals)

mean(naivem$residuals[2:140])

## [1] -0.3435748

hist(naivem$residuals) # normal distribution

shapiro.test(naivem$residuals) # test for normal distribution, normal distr can be rejected

## 
##  Shapiro-Wilk normality test
## 
## data:  naivem$residuals
## W = 0.89587, p-value = 2.061e-08

acf(naivem$residuals[2:140]) # autocorrelation test, autocorrelation present