3 Random Walk

Zunächst wollen wir uns mit dem einfachsten Modell für Log-Returns von Aktienkursen beschäftigen - dem Random Walk.

3.1 Daten beziehen

Im ersten Schritt laden wir uns Zeitreihen von Aktienkursen herunter. Hierbei verwenden wir das Paket tidyverse, mit dem wir unter anderem Daten über yahoo-Finance beziehen können. Hierfür müssen wir die Ticker der Unternehmen von Interesse von yahoo finance vorab herausfinden. Damit ist es sehr leicht Daten zu beziehen.

library(tidyquant)
library(data.table)
library(plotly)

#yahoo finance tickers
tickers <- c('ADS', 'NKE', 'BMW.DE', 'VOW3.DE', 'AAPL', 'GOOG')

#Wir beziehen manuell einen Datensatz mit den Daten der letzten 10 Jahre
our_data <- tq_get(tickers, from = "2010-01-01", to = "2020-02-28")
#transform to data table
df <- data.table(our_data)


#Beispielsweise können wir mittels plotly einfach einen Open, High, Low, Close, Price Chart erstellen
fig <- plot_ly(tail(df[symbol == 'AAPL'], 50), x = ~date, type="ohlc",
          open = ~open, close = ~close,
          high = ~high, low = ~low) 
fig <- fig %>% layout(title = "Basic OHLC Chart")

fig

#Beispielsweise können wir mittels plotly einfach einen Candlestick Price Chart erstellen
fig <- plot_ly(tail(df[symbol == 'AAPL'], 50), x = ~date, type="candlestick",
          open = ~open, close = ~close,
          high = ~high, low = ~low) 
fig <- fig %>% layout(title = "Basic Candlestick Chart")

fig

Wir wollen uns jedoch nun auf die Kurse zum Ende jedes Handelstages beschränken und für die zugehörigen Log-Returns ein Modell schätzen. Beispielsweise für die Apple-Aktie über den Verlauf der letzten 10 Jahre.

p1 <- plot_ly(df[symbol == 'AAPL']) %>%
        add_lines(x = ~date, y = ~close, line = list(color = 'skyblue'), name = 'Kurs (Close)') %>%
        layout(title = 'Apple Aktie')

p2 <- plot_ly() %>%
  add_lines(x = df[symbol == 'AAPL']$date[-c(1)], y = diff(log(df[symbol == 'AAPL']$close)), line = list(color = 'grey'), name = 'Log-Return')

subplot(p1, p2, margin = 0.05, titleX = TRUE, titleY = TRUE)

3.2 Modell schätzen

Im folgenden schätzen wir für die Log-Returns der Apple-Aktie. Hierbei verwenden wir wieder die ‘fun_fun’-Funktion. Beispielsweise können wir eine Normalverteilung folgendermaßen schätzen,

#Funktion, welche die angegebene Verteilung schätzt, ll, aic, hmi, pp- und qq-plot ausgibt
library(rugarch)

fun_fun <- function(x, distr = 'norm'){

  distr_set <- c('norm', 'snorm', 'std', 'sstd', 'ged', 'sged')
  
  if(!(any(distr == distr_set))){
    stop(paste('Bitte verwenden Sie eine dieser Verteilungen: norm, snorm, std, sstd, ged, sged'))
  }
  
  
  fit <- fitdist(distribution = distr, x)
  
  
  if(distr == 'norm'){
    
    ll <- sum(log(ddist(distribution = distr, x, mu = fit$pars[1], sigma = fit$pars[2])))
    aic <- -2*ll + 2 * length(fit$pars)
    hmi <- 2 * integrate(function(p1, m, s, data){abs(p1 - pdist(distribution = distr, quantile(data, p1), mu = m, sigma = s))},
                         lower = 0, upper = 1, m = fit$pars[1], s = fit$pars[2], data = x, subdivisions = 1000)$value
    
    p_theor <- pdist(distribution = distr, sort(x), mu = fit$pars[1], sigma = fit$pars[2])
    p_emp <- rank(sort(x))/length(x)
    
    pp_plot <- plot_ly() %>%
      add_markers(x = p_theor, y = p_emp, name = 'pp-plot', marker = list(color = 'grey', symbol = 'x')) %>%
      add_lines(x = c(0, 1), y = c(0, 1), line = list(color = 'black'), showlegend = FALSE) %>%
      layout(
        xaxis = list(title = 'Theoretische Verteilung'),
        yaxis = list(title = 'Empirische Verteilung')
      )
    
    q_theor <- qdist(distribution = distr, rank(sort(x))/(length(x) + 1), mu = fit$pars[1], sigma = fit$pars[2])
    q_emp <- quantile(x, rank(sort(x))/(length(x) + 1))
  
    qq_plot <- plot_ly() %>%
      add_markers(x = q_theor, y = q_emp, name = 'qq-plot', marker = list(color = 'skyblue', symbol = 'x')) %>%
      add_lines(x = c(min(x), max(x)), y = c(min(x), max(x)), line = list(color = 'black'), showlegend = FALSE) %>%
      layout(
        xaxis = list(title = 'Theoretische Verteilung'),
        yaxis = list(title = 'Empirische Verteilung')
      )
    
    p <- subplot(pp_plot, qq_plot, margin = 0.05, titleX = TRUE, titleY = TRUE, which_layout = 1)
    
    list_out <- list(ll = ll, aic = aic, hmi = hmi, p = p, params = fit$pars)
    
  }
  
  if(any(distr == c('std', 'ged'))){
    
    ll <- sum(log(ddist(distribution = distr, x, mu = fit$pars[1], sigma = fit$pars[2], shape = fit$pars[3])))
    aic <- -2*ll + 2 * length(fit$pars)
    hmi <- 2 * integrate(function(p1, m, s, nu, data){abs(p1 - pdist(distribution = distr, quantile(data, p1), mu = m, sigma = s, shape = nu))},
                         lower = 0, upper = 1, m = fit$pars[1], s = fit$pars[2], nu = fit$pars[3], data = x, subdivisions = 1000)$value
    
    p_theor <- pdist(distribution = distr, sort(x), mu = fit$pars[1], sigma = fit$pars[2], shape = fit$pars[3])
    p_emp <- rank(sort(x))/length(x)
    
    pp_plot <- plot_ly() %>%
      add_markers(x = p_theor, y = p_emp, name = 'pp-plot', marker = list(color = 'grey', symbol = 'x')) %>%
      add_lines(x = c(0, 1), y = c(0, 1), line = list(color = 'black'), showlegend = FALSE) %>%
      layout(
        xaxis = list(title = 'Theoretische Verteilung'),
        yaxis = list(title = 'Empirische Verteilung')
      )
    
    q_theor <- qdist(distribution = distr, rank(sort(x))/(length(x) + 1), mu = fit$pars[1], sigma = fit$pars[2], shape = fit$pars[3])
    q_emp <- quantile(x, rank(sort(x))/(length(x) + 1))
    
    qq_plot <- plot_ly() %>%
      add_markers(x = q_theor, y = q_emp, name = 'qq-plot', marker = list(color = 'skyblue', symbol = 'x')) %>%
      add_lines(x = c(min(x), max(x)), y = c(min(x), max(x)), line = list(color = 'black'), showlegend = FALSE) %>%
      layout(
        xaxis = list(title = 'Theoretische Verteilung'),
        yaxis = list(title = 'Empirische Verteilung')
      )
    
    p <- subplot(pp_plot, qq_plot, margin = 0.05, titleX = TRUE, titleY = TRUE, which_layout = 1)
    
    list_out <- list(ll = ll, aic = aic, hmi = hmi, p = p, params = fit$pars)
    
  }
  
  if(any(distr == c('snorm', 'sstd', 'sged'))){
    
    ll <- sum(log(ddist(distribution = distr, x, mu = fit$pars[1], sigma = fit$pars[2], skew = fit$pars[3], shape = fit$pars[4])))
    aic <- -2*ll + 2 * length(fit$pars)
    hmi <- 2 * integrate(function(p1, m, s, skew, nu, data){abs(p1 - pdist(distribution = distr, quantile(data, p1), mu = m, sigma = s, skew = skew, shape = nu))},
                         lower = 0, upper = 1, m = fit$pars[1], s = fit$pars[2], skew = fit$pars[3], nu = fit$pars[4], data = x, subdivisions = 1000)$value
    
    p_theor <- pdist(distribution = distr, sort(x), mu = fit$pars[1], sigma = fit$pars[2], skew = fit$pars[3], shape = fit$pars[4])
    p_emp <- rank(sort(x))/length(x)
    
    pp_plot <- plot_ly() %>%
      add_markers(x = p_theor, y = p_emp, name = 'pp-plot', marker = list(color = 'grey', symbol = 'x')) %>%
      add_lines(x = c(0, 1), y = c(0, 1), line = list(color = 'black'), showlegend = FALSE) %>%
      layout(
        xaxis = list(title = 'Theoretische Verteilung'),
        yaxis = list(title = 'Empirische Verteilung')
      )
    
    q_theor <- qdist(distribution = distr, rank(sort(x))/(length(x) + 1), mu = fit$pars[1], sigma = fit$pars[2], skew = fit$pars[3], shape = fit$pars[4])
    q_emp <- quantile(x, rank(sort(x))/(length(x) + 1))
    
    qq_plot <- plot_ly() %>%
      add_markers(x = q_theor, y = q_emp, name = 'qq-plot', marker = list(color = 'skyblue', symbol = 'x')) %>%
      add_lines(x = c(min(x), max(x)), y = c(min(x), max(x)), line = list(color = 'black'), showlegend = FALSE) %>%
      layout(
        xaxis = list(title = 'Theoretische Verteilung'),
        yaxis = list(title = 'Empirische Verteilung')
      )
    
    p <- subplot(pp_plot, qq_plot, margin = 0.05, titleX = TRUE, titleY = TRUE, which_layout = 1)
    
    list_out <- list(ll = ll, aic = aic, hmi = hmi, p = p, params = fit$pars)
    
  }
  
return(list_out)
  
}

library(rugarch)

log_returns <- diff(log(df[symbol == 'AAPL']$close))

fit <- fun_fun(log_returns, distr = 'norm')

fit

## $ll
## [1] 6882.14
## 
## $aic
## [1] -13760.28
## 
## $hmi
## [1] 0.06486762
## 
## $p
## 
## $params
##           mu        sigma 
## 0.0008568798 0.0163519700

fit$p

Wir wollen jedoch eine Bandbreite verschiedenener Funktionen schätzen und anschließend die Güte der Anpassung visuell und mittels Kennzahlen überprüfen.

norm_fit <- fun_fun(log_returns, distr = 'norm')
snorm_fit <- fun_fun(log_returns, distr = 'snorm')
std_fit <- fun_fun(log_returns, distr = 'std')
sstd_fit <- fun_fun(log_returns, distr = 'sstd')
ged_fit <- fun_fun(log_returns, distr = 'ged')
sged_fit <- fun_fun(log_returns, distr = 'sged')


result_summary <- data.frame('AIC' = c(norm_fit$aic, snorm_fit$aic, std_fit$aic, sstd_fit$aic, ged_fit$aic, sged_fit$aic),
                 'HMI' = c(norm_fit$hmi, snorm_fit$hmi, std_fit$hmi, sstd_fit$hmi, ged_fit$hmi, sged_fit$hmi),
                 row.names = c('Normal', 'Normal (schief)', 'Student', 'Student (schief)', 'GED', 'GED (schief)'))

library(knitr)
kable(result_summary, caption = 'Übersicht des Datenfits')

Table 3.1: Übersicht des Datenfits
	AIC	HMI
Normal	-13760.28	0.0648676
Normal (schief)	-13766.37	0.0646243
Student	-14116.23	0.0103100
Student (schief)	-14115.38	0.0106401
GED	-14091.40	0.0156534
GED (schief)	-14089.57	0.0162033

Gemäß dem AIC und dem HMI scheint die (symmetrische) Student t Verteilung am besten geeignet, um die Log-Returns der Apple Aktie zu modellieren. Wir können uns die Güte dieser Schätzung erneut auch visuell ansehen.

std_fit$p

Mit diesem Modell könnten wir z.B. bestimmen, wie hoch die Wahrscheinlichkeit ist, dass die Log-Rendite bis morgen größer als 2% ist.

1 - pdist(distribution = 'std', 0.02, mu = std_fit$params[1], sigma = std_fit$params[2], shape = std_fit$params[3])

## [1] 0.09073137

Oder welcher Wert innerhalb eines Tages maximal mit einer Wahrscheinlichkeit von 5% unterschritten wird.

qdist(distribution = 'std', 0.05, mu = std_fit$params[1], sigma = std_fit$params[2], shape = std_fit$params[3])

## [1] -0.02386804

Und wir können uns zukünfige Aktienkursverläufe simulieren, gehen wir hierbei vom letzten verfügbaren Kurs aus.

#letzter verfügbarer Kurs
S_0 <- df[symbol == 'AAPL' & date == "2020-02-27"]$close

#tatsächlicher Kurs seit dem Stichtag
df_apple <- tq_get('AAPL', from = "2020-02-28")

#Anzahl der Tage in die Zukunft
num_days <- dim(df_apple)[1]

#Anzahl der simulierten Pfade
num_path <- 1000

log_r_sim <- rdist(distribution = 'std', n = num_days * num_path, mu = std_fit$params[1], sigma = std_fit$params[2], shape = std_fit$params[3])
log_r_sim <- matrix(log_r_sim, nrow = num_days, ncol = num_path)

S_t <- rep(S_0, num_path)
S_out <- S_t

for(i in 1:num_days){
  
  S_t <- S_t * exp(log_r_sim[i,])
  S_out <- rbind(S_out, S_t)
  
}



p <- plot_ly() 
for(i in 1:num_path){
  p <- p %>% add_lines(x = 1:(1 + num_days), y = S_out[,i], line = list(color = "grey"), showlegend = FALSE)
}

p <- p %>% add_lines(x = 1:(1 + num_days), y = c(S_0, df_apple$close), line = list(color = 'purple'), showlegend = FALSE) %>%
            layout(title = 'Simulation des Aktienkursverlaufs von Apple', xaxis = list(title = 'Anzahl der Prognosetage'), yaxis = list(title = 'Kursverlauf'))

p

Immerhin liegt der tatsächliche Verlauf innerhalb der simulierten Verläufe!

fig <- plot_ly(alpha = 0.6)
fig <- fig %>% add_histogram(x = S_out[4,], histnorm = 'probability density', name = 'Tag 3', marker = list(color = 'olivedrab'))
fig <- fig %>% add_histogram(x = S_out[9,], histnorm = 'probability density', name = 'Tag 8', marker = list(color = 'purple'))
fig <- fig %>% layout(barmode = "overlay", title = 'Verteilung des Aktienkurses')

fig

Mit höherem Zeithorizont wird die Prognose volatiler, was sich in einer breiteren Vereteilung der prognostizierten Werte äußert!