# Chapter 4 Statistical Background For TS Analysis & Forecasting

rm(list = ls())
setwd("C:/Users/Tejendra/Desktop/FoldersOnDesktop/UdemyCourse/Section4")
require(tidyverse)
require(tidymodels)
require(data.table)
require(tidyposterior)
require(tsibble)  #tsibble for time series based on tidy principles
require(fable)  #for forecasting based on tidy principles
require(ggfortify)  #for plotting timeseries
require(forecast)  #for forecast function
require(tseries)
require(chron)
require(lubridate)
require(directlabels)
require(zoo)
require(lmtest)
setwd("C:/Users/Tejendra/Desktop/FoldersOnDesktop/UdemyCourse/Section4")
lynx
## Time Series:
## Start = 1821
## End = 1934
## Frequency = 1
##   [1]  269  321  585  871 1475 2821 3928 5943 4950 2577  523   98  184  279
##  [15]  409 2285 2685 3409 1824  409  151   45   68  213  546 1033 2129 2536
##  [29]  957  361  377  225  360  731 1638 2725 2871 2119  684  299  236  245
##  [43]  552 1623 3311 6721 4254  687  255  473  358  784 1594 1676 2251 1426
##  [57]  756  299  201  229  469  736 2042 2811 4431 2511  389   73   39   49
##  [71]   59  188  377 1292 4031 3495  587  105  153  387  758 1307 3465 6991
##  [85] 6313 3794 1836  345  382  808 1388 2713 3800 3091 2985 3790  674   81
##  [99]   80  108  229  399 1132 2432 3574 2935 1537  529  485  662 1000 1590
## [113] 2657 3396
#looking at the data
time(lynx) #to look at the time stamps of the time series data
## Time Series:
## Start = 1821
## End = 1934
## Frequency = 1
##   [1] 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834
##  [15] 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848
##  [29] 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862
##  [43] 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876
##  [57] 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
##  [71] 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904
##  [85] 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
##  [99] 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
## [113] 1933 1934
length(lynx) #number of observations in the time series data
## [1] 114
tail(lynx) # last 6 observations
## Time Series:
## Start = 1929
## End = 1934
## Frequency = 1
## [1]  485  662 1000 1590 2657 3396
mean(lynx); median(lynx) #simple descriptive statistics of the data
## [1] 1538.018
## [1] 771
plot(lynx) #see how time series is behaving

theme_set(theme_minimal())
autoplot(lynx) +
xlab("") + ylab("") +
ggtitle("Time Series Plot of the lynx data") +
theme(plot.title = element_text(hjust = 0.5))  #for centering the text

sort(lynx)
##   [1]   39   45   49   59   68   73   80   81   98  105  108  151  153  184
##  [15]  188  201  213  225  229  229  236  245  255  269  279  299  299  321
##  [29]  345  358  360  361  377  377  382  387  389  399  409  409  469  473
##  [43]  485  523  529  546  552  585  587  662  674  684  687  731  736  756
##  [57]  758  784  808  871  957 1000 1033 1132 1292 1307 1388 1426 1475 1537
##  [71] 1590 1594 1623 1638 1676 1824 1836 2042 2119 2129 2251 2285 2432 2511
##  [85] 2536 2577 2657 2685 2713 2725 2811 2821 2871 2935 2985 3091 3311 3396
##  [99] 3409 3465 3495 3574 3790 3794 3800 3928 4031 4254 4431 4950 5943 6313
## [113] 6721 6991
sort(lynx)[c(57,58)]
## [1] 758 784
quantile(lynx) #to give the quantiles of the time series 
##      0%     25%     50%     75%    100%
##   39.00  348.25  771.00 2566.75 6991.00
quantile(lynx,
prob = seq(0, 1, length = 11), #number of categories
type = 5) #deciles
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%
##   39.0  146.7  259.2  380.5  546.6  771.0 1470.1 2165.6 2818.0 3790.4
##   100%
## 6991.0
### simple forecast methods

set.seed(95)
myts <- ts(rnorm(200), start = (1818))
plot(myts)

autoplot(myts) +
xlab("") + ylab("") +
ggtitle("Time Series Plot of the custome time series data") +
theme(plot.title = element_text(hjust = 0.5))  #for centering the text

meanm <- meanf(myts, h=20) #forecast by taking the mean of the values
naivem <- naive(myts, h=20) #forecast by taking the last observation forward
driftm <- rwf(myts, h=20, drift = T) #forecast by drift model

plot(meanm, plot.conf = F, main = "")
## Warning in plot.window(xlim, ylim, log, ...): "plot.conf" is not a
## graphical parameter
## Warning in title(main = main, xlab = xlab, ylab = ylab, ...): "plot.conf"
## is not a graphical parameter
## Warning in axis(1, ...): "plot.conf" is not a graphical parameter
## Warning in axis(2, ...): "plot.conf" is not a graphical parameter
## Warning in box(...): "plot.conf" is not a graphical parameter
lines(naivem$mean, col=123, lwd = 2) lines(driftm$mean, col=22, lwd = 2)
legend("topleft",lty=1,col=c(4,123,22),
legend=c("Mean method","Naive method","Drift Method"))

autoplot(myts) +
autolayer(meanm$mean, series = "Mean Method") + autolayer(naivem$mean, series = "Naive Method") +
autolayer(driftm$mean, series = "Drift Method") + ggtitle("Forecast by different methods") + xlab("Time (year)") + ylab("Value") + theme_bw() + theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom")  ###### accuracy and model comparison #subset the time series. Divide into training and the testing set. Forecast on the training time series. Then use accuracy() function to check how well the forecast performs. set.seed(95) myts <- ts(rnorm(200), start = (1818)) mytstrain <- window(myts, start = 1818, end = 1988) #subset the time series using window function plot(mytstrain) autoplot(mytstrain) + ggtitle("Time Series Example: Subsetting using windows()") + xlab("Time (year)") + ylab("Value") + theme_bw() + theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom")  meanm <- meanf(mytstrain, h=30) naivem <- naive(mytstrain, h=30) driftm <- rwf(mytstrain, h=30, drift = T) mytstest <- window(myts, start = 1988) accuracy(meanm, mytstest) #accuracy function allows one to compare the performance of two models ## ME RMSE MAE MPE MAPE MASE ## Training set 1.407307e-17 1.003956 0.8164571 77.65393 133.4892 0.7702074 ## Test set -2.459828e-01 1.138760 0.9627571 100.70356 102.7884 0.9082199 ## ACF1 Theil's U ## Training set 0.1293488 NA ## Test set 0.2415939 0.981051 accuracy(naivem, mytstest) ## ME RMSE MAE MPE MAPE MASE ## Training set -0.0002083116 1.323311 1.060048 -152.73569 730.9655 1.000000 ## Test set 0.8731935861 1.413766 1.162537 86.29346 307.9891 1.096683 ## ACF1 Theil's U ## Training set -0.4953144 NA ## Test set 0.2415939 2.031079 accuracy(driftm, mytstest) ## ME RMSE MAE MPE MAPE MASE ## Training set -1.957854e-17 1.323311 1.060041 -152.64988 730.8626 0.9999931 ## Test set 8.763183e-01 1.415768 1.163981 85.96496 308.7329 1.0980447 ## ACF1 Theil's U ## Training set -0.4953144 NA ## Test set 0.2418493 2.03317 ###### Residuals set.seed(95) myts <- ts(rnorm(200), start = (1818)) plot(myts) autoplot(myts) + ggtitle("Time Series Example") + xlab("Time (year)") + ylab("Value") + theme_bw() + theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom")  meanm <- meanf(myts, h=20) naivem <- naive(myts, h=20) driftm <- rwf(myts, h=20, drift = T) var(meanm$residuals)
## [1] 1.053807
mean(meanm$residuals) ## [1] -5.95498e-18 mean(naivem$residuals)
## [1] NA
naivwithoutNA <- naivem$residuals naivwithoutNA <- naivwithoutNA[2:200] #naive and drift models need one observation to start with. Thus the first observation is NA. var(naivwithoutNA) ## [1] 1.798592 mean(naivwithoutNA) ## [1] 0.006605028 driftwithoutNA <- driftm$residuals
driftwithoutNA <- driftwithoutNA[2:200]
var(driftwithoutNA)
## [1] 1.798592
mean(driftwithoutNA)
## [1] -4.502054e-17
hist(driftm$residuals) ggplot() + geom_histogram(aes(x = driftm$residuals)) +
ggtitle("Histogram of residuals") +
xlab("Residuals") + ylab("Frequency") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom") 
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## stat_bin() using bins = 30. Pick better value with binwidth.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

acf(driftwithoutNA) #acf is used to identify the moving average part of the ARIMA model and pacf is used to identify the AR part of the ARIMA model
autoplot(acf(driftwithoutNA)) +
ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") +
xlab("Lag") + ylab("ACF") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom") 

### Stationarity
set.seed(2019)
x <- rnorm(1000) # no unit-root, stationary

adf.test(x) # augmented Dickey Fuller Test Augmented Dickey-Fuller test removes autocorrelation and tests for stationarity
## Warning in adf.test(x): p-value smaller than printed p-value
##
##  Augmented Dickey-Fuller Test
##
## data:  x
## Dickey-Fuller = -10.647, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
plot(nottem) # Let s see the nottem dataset

autoplot(nottem) +
ggtitle("Plot of nottem' data") +
xlab("") + ylab("") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")

plot(decompose(nottem))

autoplot(decompose(nottem)) +
ggtitle("Plot of Decomposed nottem' data") +
xlab("") + ylab("") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")

adf.test(nottem)
## Warning in adf.test(nottem): p-value smaller than printed p-value
##
##  Augmented Dickey-Fuller Test
##
## data:  nottem
## Dickey-Fuller = -12.998, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
y <- diffinv(x) # non-stationary

plot(y)

ggplot() +
geom_line(aes(y = y, x= index(y))) +
ggtitle("Plot of Decomposed nottem' data") +
xlab("") + ylab("") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")

adf.test(y)
##
##  Augmented Dickey-Fuller Test
##
## data:  y
## Dickey-Fuller = -2.6432, Lag order = 9, p-value = 0.3061
## alternative hypothesis: stationary
### Autocorrelation

# Durbin Watson test for autocorrelation
length(lynx); head(lynx); head(lynx[-1]); head(lynx[-114]) # check the required traits for the test. chopping the first and last observation so that we get the first lag difference.
## [1] 114
## Time Series:
## Start = 1821
## End = 1826
## Frequency = 1
## [1]  269  321  585  871 1475 2821
## [1]  321  585  871 1475 2821 3928
## [1]  269  321  585  871 1475 2821
dwtest(lynx[-114] ~ lynx[-1])
##
##  Durbin-Watson test
##
## data:  lynx[-114] ~ lynx[-1]
## DW = 1.1296, p-value = 1.148e-06
## alternative hypothesis: true autocorrelation is greater than 0
set.seed(2019)
x = rnorm(700) # Lets take a look at random numbers
dwtest(x[-700] ~ x[-1])
##
##  Durbin-Watson test
##
## data:  x[-700] ~ x[-1]
## DW = 1.996, p-value = 0.4789
## alternative hypothesis: true autocorrelation is greater than 0
length(nottem) # and the nottem dataset
## [1] 240
dwtest(nottem[-240] ~ nottem[-1])
##
##  Durbin-Watson test
##
## data:  nottem[-240] ~ nottem[-1]
## DW = 1.0093, p-value = 5.097e-15
## alternative hypothesis: true autocorrelation is greater than 0
### ACF and PACF
acf(lynx, lag.max = 20); pacf(lynx, lag.max =20, plot = FALSE)

##
## Partial autocorrelations of series 'lynx', by lag
##
##      1      2      3      4      5      6      7      8      9     10
##  0.711 -0.588 -0.039 -0.250 -0.094 -0.052  0.119  0.301  0.055 -0.081
##     11     12     13     14     15     16     17     18     19     20
## -0.089 -0.040 -0.099 -0.014 -0.113 -0.108 -0.006  0.116 -0.016 -0.018
pacf(lynx, lag.max =20, plot = TRUE)

autoplot(acf(lynx, lag.max = 20))

autoplot(pacf(lynx, lag.max =20)) #in acf() first correlation is with itself, so we can ignore it

# lag.max for numbers of lags to be calculated
# plot = F to suppress plotting
set.seed(2019)
acf(rnorm(500), lag.max = 20)

autoplot(acf(rnorm(500), lag.max = 20))

## Exercise messy data
set.seed(54)
myts <- ts(c(rnorm(50, 34, 10),
rnorm(67, 7, 1),
runif(23, 3, 14)))

#1. Plot the data and examine it.
autoplot(myts) +
ggtitle("Plot of the exercise data") +
xlab("Index") + ylab("Value") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")

#2. three forecasting models
meanm <- meanf(myts, h = 10)
naivem <- naive(myts, h= 10)
driftm <- rwf(myts, h =10, drift = TRUE)

#3. plot forecasts from three forecasting models
forecast::autoplot(myts,
PI = TRUE,
flwd = 2) +
autolayer(meanm$mean, series = "Mean Method", PI = TRUE) + autolayer(naivem$mean, series = "Naive Method", PI = TRUE) +
autolayer(driftm$mean, series = "Drift Method", PI = TRUE) + ggtitle("Forecast by different methods") + xlab("Time (Index)") + ylab("Value") + theme_bw() + theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom")  ## Warning: Ignoring unknown parameters: PI, flwd ## Warning: Ignoring unknown parameters: PI ## Warning: Ignoring unknown parameters: PI ## Warning: Ignoring unknown parameters: PI #4. Which model looks most promising #5. Get the error measures and compare them var(meanm$residuals)
## [1] 211.0364
mean(meanm$residuals) ## [1] -1.591522e-15 mean(naivem$residuals)
## [1] NA
naivwithoutNA <- naivem$residuals naivwithoutNA <- naivwithoutNA[2:140] #naive and drift models need one observation to start with. Thus the first observation is NA. var(naivwithoutNA) ## [1] 84.53723 mean(naivwithoutNA) ## [1] -0.3435748 driftwithoutNA <- driftm$residuals
driftwithoutNA <- driftwithoutNA[2:140]
var(driftwithoutNA)
## [1] 84.53723
mean(driftwithoutNA)
## [1] 2.648343e-15
hist(driftm$residuals) ggplot() + geom_histogram(aes(x = driftm$residuals)) +
ggtitle("Histogram of residuals") +
xlab("Residuals") + ylab("Frequency") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom") 
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## stat_bin() using bins = 30. Pick better value with binwidth.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

acf(driftwithoutNA)
autoplot(acf(driftwithoutNA)) +
ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") +
xlab("Lag") + ylab("ACF") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom") 

#6 check all relevant statistical traits
mytstrain <- window(myts, start = 1, end = 112 )
mytstest <- window(myts, start = 113)

meanma <- meanf(mytstrain, h=28)
naivema <- naive(mytstrain, h=28)
driftma <- rwf(mytstrain, h=28, drift = T)
accuracy(meanma, mytstest)
##                         ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -6.408719e-16 15.31467 13.94736  -84.86989 120.4406 2.231924
## Test set     -1.125187e+01 11.61073 11.25187 -180.27778 180.2778 1.800578
##                    ACF1 Theil's U
## Training set  0.7632991        NA
## Test set     -0.1703445  2.002248
accuracy(naivema, mytstest)
##                      ME      RMSE      MAE        MPE     MAPE      MASE
## Training set -0.4131666 10.024449 6.249032 -11.909758 33.36297 1.0000000
## Test set      0.9663084  3.022963 2.482045  -1.869095 33.51426 0.3971888
##                    ACF1 Theil's U
## Training set -0.4901263        NA
## Test set     -0.1703445 0.6497522
accuracy(driftma, mytstest)
##                         ME      RMSE      MAE       MPE     MAPE     MASE
## Training set -1.624594e-17 10.015931 6.265918 -7.901602 33.29565 1.002702
## Test set      6.957224e+00  8.172915 6.974530 86.321252 86.72796 1.116098
##                    ACF1 Theil's U
## Training set -0.4901263        NA
## Test set      0.4327471   1.62159
shapiro.test(naivem$residuals) # test for normal distribution, normal distr can be rejected ## ## Shapiro-Wilk normality test ## ## data: naivem$residuals
## W = 0.89587, p-value = 2.061e-08
acf(naivem$residuals[2:140]) # autocorrelation test, autocorrelation present autoplot(acf(naivem$residuals[2:140])) +
ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") +
xlab("Lag") + ylab("ACF") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom") 

#Repeating with the log of the data
myts <- log(myts)
#1. Plot the data and examine it.
autoplot(myts) +
ggtitle("Plot of the exercise data") +
xlab("Index") + ylab("Value") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")

 #2. three forecasting models
meanm <- meanf(myts, h = 10)
naivem <- naive(myts, h= 10)
driftm <- rwf(myts, h =10, drift = TRUE)

#3. plot forecasts from three forecasting models
forecast::autoplot(myts,
PI = TRUE,
flwd = 2) +
autolayer(meanm$mean, series = "Mean Method", PI = TRUE) + autolayer(naivem$mean, series = "Naive Method", PI = TRUE) +
autolayer(driftm$mean, series = "Drift Method", PI = TRUE) + ggtitle("Forecast by different methods") + xlab("Time (Index)") + ylab("Value") + theme_bw() + theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom")  ## Warning: Ignoring unknown parameters: PI, flwd ## Warning: Ignoring unknown parameters: PI ## Warning: Ignoring unknown parameters: PI ## Warning: Ignoring unknown parameters: PI  #4. Which model looks most promising #5. Get the error measures and compare them var(meanm$residuals)
## [1] 0.6290444
 mean(meanm$residuals) ## [1] 1.510731e-16  mean(naivem$residuals)
## [1] NA
 naivwithoutNA <- naivem$residuals naivwithoutNA <- naivwithoutNA[2:140] #naive and drift models need one observation to start with. Thus the first observation is NA. var(naivwithoutNA) ## [1] 0.2067372  mean(naivwithoutNA) ## [1] -0.01684688  driftwithoutNA <- driftm$residuals
driftwithoutNA <- driftwithoutNA[2:140]
var(driftwithoutNA)
## [1] 0.2067372
 mean(driftwithoutNA)
## [1] -4.472841e-17
 hist(driftm$residuals)  ggplot() + geom_histogram(aes(x = driftm$residuals)) +
ggtitle("Histogram of residuals") +
xlab("Residuals") + ylab("Frequency") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom") 
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## stat_bin() using bins = 30. Pick better value with binwidth.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

 acf(driftwithoutNA)
autoplot(acf(driftwithoutNA)) +
ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") +
xlab("Lag") + ylab("ACF") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom") 

 #6 check all relevant statistical traits
mytstrain <- window(myts, start = 1, end = 112 )
mytstest <- window(myts, start = 113)

meanma <- meanf(mytstrain, h=28)
naivema <- naive(mytstrain, h=28)
driftma <- rwf(mytstrain, h=28, drift = T)
accuracy(meanma, mytstest)
##                         ME      RMSE       MAE        MPE     MAPE
## Training set  1.846038e-16 0.8163346 0.7714004  -9.839234 31.23484
## Test set     -6.198835e-01 0.7307518 0.6204553 -36.737960 36.75971
##                  MASE       ACF1 Theil's U
## Training set 2.684607  0.8615304        NA
## Test set     2.159291 -0.2306541 0.9716032
 accuracy(naivema, mytstest)
##                       ME      RMSE       MAE       MPE     MAPE     MASE
## Training set -0.01824047 0.4071185 0.2873421 -1.870232 11.33626 1.000000
## Test set      0.05893104 0.3914276 0.3316489 -1.328849 17.76316 1.154196
##                    ACF1 Theil's U
## Training set -0.4450652        NA
## Test set     -0.2306541  0.577168
 accuracy(driftma, mytstest)
##                         ME      RMSE       MAE       MPE     MAPE     MASE
## Training set -9.802537e-17 0.4067096 0.2876207 -1.103181 11.31901 1.000970
## Test set      3.234179e-01 0.5206086 0.4411263 12.524858 21.27240 1.535196
##                    ACF1 Theil's U
## Training set -0.4450652        NA
## Test set     -0.1049056 0.7824451
 shapiro.test(naivem$residuals) # test for normal distribution, normal distr can be rejected ## ## Shapiro-Wilk normality test ## ## data: naivem$residuals
## W = 0.961, p-value = 0.0005413
 acf(naivem$residuals[2:140]) # autocorrelation test, autocorrelation present autoplot(acf(naivem$residuals[2:140])) +
ggtitle("Auto- and Cross- Covariance and -Correlation Function Estimation") +
xlab("Lag") + ylab("ACF") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom") 

#===================================================================================
set.seed(54)
myts <- ts(c(rnorm(50, 34, 10),
rnorm(67, 7, 1),
runif(23, 3, 14)))
plot(myts)

library(forecast)
meanm <- meanf(myts, h=10)
naivem <- naive(myts, h=10)
driftm <- rwf(myts, h=10, drift = T)

plot(meanm, main = "", bty = "l")
lines(naivem$mean, col=123, lwd = 2) lines(driftm$mean, col=22, lwd = 2)
legend("bottomleft",lty=1,col=c(4,123,22), bty = "n", cex = 0.75,
legend=c("Mean method","Naive method","Drift Method"))

length(myts)
## [1] 140
mytstrain <- window(myts, start = 1, end = 112 )
mytstest <- window(myts, start = 113)

meanma <- meanf(mytstrain, h=28)
naivema <- naive(mytstrain, h=28)
driftma <- rwf(mytstrain, h=28, drift = T)

accuracy(meanma, mytstest)
##                         ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -6.408719e-16 15.31467 13.94736  -84.86989 120.4406 2.231924
## Test set     -1.125187e+01 11.61073 11.25187 -180.27778 180.2778 1.800578
##                    ACF1 Theil's U
## Training set  0.7632991        NA
## Test set     -0.1703445  2.002248
accuracy(naivema, mytstest)
##                      ME      RMSE      MAE        MPE     MAPE      MASE
## Training set -0.4131666 10.024449 6.249032 -11.909758 33.36297 1.0000000
## Test set      0.9663084  3.022963 2.482045  -1.869095 33.51426 0.3971888
##                    ACF1 Theil's U
## Training set -0.4901263        NA
## Test set     -0.1703445 0.6497522
accuracy(driftma, mytstest)
##                         ME      RMSE      MAE       MPE     MAPE     MASE
## Training set -1.624594e-17 10.015931 6.265918 -7.901602 33.29565 1.002702
## Test set      6.957224e+00  8.172915 6.974530 86.321252 86.72796 1.116098
##                    ACF1 Theil's U
## Training set -0.4901263        NA
## Test set      0.4327471   1.62159
plot(naivem$residuals) mean(naivem$residuals[2:140])
## [1] -0.3435748
hist(naivem$residuals) # normal distribution shapiro.test(naivem$residuals) # test for normal distribution, normal distr can be rejected
##
##  Shapiro-Wilk normality test
##
## data:  naivem$residuals ## W = 0.89587, p-value = 2.061e-08 acf(naivem$residuals[2:140]) # autocorrelation test, autocorrelation present`