Chapter 5 Analisando dataframe de Ação
5.1 Análise descritiva
Coletando os dados
library(lubridate)
library(tidyverse)
= read_excel("dados/03 B1 ESTATISTICAS DESC E REGRESSAO.xlsx", sheet = 2, n_max = 1) series
## New names:
## • `44824` -> `44824...2`
## • `44824` -> `44824...4`
## • `44824` -> `44824...6`
## • `44824` -> `44824...8`
## • `44824` -> `44824...10`
## • `44824` -> `44824...12`
## • `44824` -> `44824...14`
## • `44824` -> `44824...16`
## • `44824` -> `44824...18`
## • `44824` -> `44824...20`
= names(series)[seq(1,20,2)]
nomes = read_excel("dados/03 B1 ESTATISTICAS DESC E REGRESSAO.xlsx", sheet = 2, skip = 1) series
## New names:
## • `Date` -> `Date...1`
## • `Close` -> `Close...2`
## • `Date` -> `Date...3`
## • `Close` -> `Close...4`
## • `Date` -> `Date...5`
## • `Close` -> `Close...6`
## • `Date` -> `Date...7`
## • `Close` -> `Close...8`
## • `Date` -> `Date...9`
## • `Close` -> `Close...10`
## • `Date` -> `Date...11`
## • `Close` -> `Close...12`
## • `Date` -> `Date...13`
## • `Close` -> `Close...14`
## • `Date` -> `Date...15`
## • `Close` -> `Close...16`
## • `Date` -> `Date...17`
## • `Close` -> `Close...18`
## • `Date` -> `Date...19`
## • `Close` -> `Close...20`
names(series)[seq(1,20,2)] = paste0("DATA_", nomes)
names(series)[seq(2,20,2)] = nomes
series
## # A tibble: 3,147 × 20
## DATA_ELET3 ELET3 DATA_ITUB4 ITUB4 DATA_ITSA4 ITSA4 DATA_PETR4 PETR4 DATA_NUBR33 NUBR33 DATA_MGLU3 MGLU3 DATA_BBDC4 BBDC4
## <dttm> <dbl> <dttm> <dbl> <dttm> <dbl> <dttm> <dbl> <dttm> <dbl> <dttm> <dbl> <dttm> <dbl>
## 1 2010-01-04 16:56:00 37.4 2010-01-04 16:56:00 20.1 2010-01-04 16:56:00 8.05 2010-01-04 16:56:00 37.3 2021-12-09 16:56:00 10.0 2011-05-02 16:56:00 0.51 2010-01-04 16:56:00 12.2
## 2 2010-01-05 16:56:00 37.1 2010-01-05 16:56:00 20.2 2010-01-05 16:56:00 8.02 2010-01-05 16:56:00 37 2021-12-10 16:56:00 11.5 2011-05-03 16:56:00 0.51 2010-01-05 16:56:00 12.1
## 3 2010-01-06 16:56:00 36.6 2010-01-06 16:56:00 20.0 2010-01-06 16:56:00 7.92 2010-01-06 16:56:00 37.5 2021-12-13 18:00:00 10.6 2011-05-04 16:56:00 0.52 2010-01-06 16:56:00 12
## 4 2010-01-07 16:56:00 37.4 2010-01-07 16:56:00 19.8 2010-01-07 16:56:00 7.88 2010-01-07 16:56:00 37.2 2021-12-14 18:00:00 9.25 2011-05-05 16:56:00 0.51 2010-01-07 16:56:00 12.0
## 5 2010-01-08 16:56:00 38.1 2010-01-08 16:56:00 19.5 2010-01-08 16:56:00 7.82 2010-01-08 16:56:00 37.0 2021-12-15 18:00:00 9.54 2011-05-06 16:56:00 0.51 2010-01-08 16:56:00 12.0
## 6 2010-01-11 16:56:00 37.5 2010-01-11 16:56:00 19.4 2010-01-11 16:56:00 7.79 2010-01-11 16:56:00 36.8 2021-12-16 18:00:00 9.49 2011-05-10 16:56:00 0.5 2010-01-11 16:56:00 12.0
## 7 2010-01-12 16:56:00 37.1 2010-01-12 16:56:00 19.2 2010-01-12 16:56:00 7.79 2010-01-12 16:56:00 36.4 2021-12-17 18:00:00 8.95 2011-05-11 16:56:00 0.51 2010-01-12 16:56:00 12
## 8 2010-01-13 16:56:00 37.5 2010-01-13 16:56:00 19.2 2010-01-13 16:56:00 7.77 2010-01-13 16:56:00 36.3 2021-12-20 18:00:00 8.5 2011-05-12 16:56:00 0.51 2010-01-13 16:56:00 12.1
## 9 2010-01-14 16:56:00 36.9 2010-01-14 16:56:00 19.0 2010-01-14 16:56:00 7.66 2010-01-14 16:56:00 35.7 2021-12-21 18:00:00 8.7 2011-05-13 16:56:00 0.51 2010-01-14 16:56:00 11.8
## 10 2010-01-15 16:56:00 35.6 2010-01-15 16:56:00 18.7 2010-01-15 16:56:00 7.47 2010-01-15 16:56:00 35.8 2021-12-22 18:00:00 8.54 2011-05-17 16:56:00 0.5 2010-01-15 16:56:00 11.6
## # … with 3,137 more rows, and 6 more variables: DATA_VALE3 <dttm>, VALE3 <dbl>, DATA_BBAS3 <dttm>, BBAS3 <dbl>, DATA_LREN3 <dttm>, LREN3 <dbl>
for (i in seq(1,20,2)) {
= as.Date(series[[i]])
series[[i]] }
plot(x=series$DATA_ELET3, y= series$ELET3, type = "l", xlab = "Tempo (dias)", ylab = "Preço fechamento (R$)", main = "Performance de algumas ações ao longo do tempo", ylim = c(0, 60) )
lines(x=series$DATA_ITUB4, y= series$ITUB4, col = 2)
lines(x=series$DATA_PETR4, y= series$PETR4, col = 3)
lines(x=series$DATA_NUBR33, y= series$NUBR33, col = 4)
legend("topleft", legend = c("ELET3", "ITUB4", "PETR4", "NUBR33"), fill = c(1,2,3,4))
## Selecionando um tipo de Ação para Analise Descritiva
= as.Date("2020-01-01")
dataInicial = as.Date("2021-01-01")
dataFinal = filter(series, DATA_ITUB4 >= dataInicial & DATA_ITUB4 <= dataFinal) subConjunto
Apresentado os dados selecionado em um grafico
= subConjunto$DATA_ITUB4
Data = subConjunto$ITUB4
Ser = mean(Ser, na.rm = T)
media media
## [1] 26.91806
= sd(Ser, na.rm = T)
desvio desvio
## [1] 4.125498
plot(x=Data, y= Ser, type = "l", xlab = "Tempo (dias)", ylab = "Preço fechamento (R$)")
Apresentado os dados selecionado em um grafico
plot(Ser ~ Data, type = "l", xlab = "Tempo (dias)", ylab = "Preço fechamento (R$)", ylim = c(-10, 80))
abline(h = media)
abline(h = media + 1*desvio, col = 2, lwd = 2)
abline(h = media - 1*desvio, col = 2, lwd = 2)
abline(h = media + 2*desvio, col = 3, lwd = 2)
abline(h = media - 2*desvio, col = 3, lwd = 2)
Apresentado os dados selecionado em um grafico
#caixa = boxplot(Ser ~ Data, add = T)
#caixa
#limSup = caixa$stats[5,1]
#Q3 = caixa$stats[4,1]
#Q2 = caixa$stats[3,1]
#Q1 = caixa$stats[2,1]
#limInf = caixa$stats[1,1]
#plot(Ser ~ Data, type = "l", xlab = "Tempo (dias)", ylab = "Preço fechamento (R$)", ylim = c(-0, 40))
#abline(h = Q2)
#abline(h = Q3, col = 2, lwd = 2)
#abline(h = Q1, col = 2, lwd = 2)
#abline(h = limSup, col = 3, lwd = 2)
#abline(h = limInf, col = 3, lwd = 2)