This is an exploratory study of data breaches reported to HHS according to HIPPA
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.1.9000 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
data <- read_csv("breach_report_all.csv", skip = 1,
col_types = cols(num_individuals = col_integer()),
col_names = c("name", "state", "type_of_entity", "num_individuals", "date",
"type_of_breach", "point_of_breach", "business","description" ))
data <- data %>% mutate (date = mdy(date)) %>%
mutate(year = year(date), month = month(date), day = day(date))
data <- data %>% drop_na(num_individuals)%>%
mutate( type =(str_split(type_of_breach," ", simplify = TRUE)))%>%
mutate (type_of_breach1 = paste0(type[,1])) %>%
mutate(type_of_breach1 = recode(type_of_breach1,
"Loss,"= "Loss",
"Other," ="Unknown",
"Theft,"="Theft",
"NA" = "Unknown",
"Other"="Unknown"))%>%
select(c(-type))
data <- data %>%
mutate( type =(str_split(point_of_breach," ", simplify = TRUE)))%>%
mutate (point_of_breach1 = paste0(type[,1])) %>%
mutate(point_of_breach1 = recode(point_of_breach1,
"Email,"= "Email",
"Laptop,"="Laptop",
"Other,"="Other",
"Electronic"="Electronic Medical Record",
"Network"="Network Server"))%>%
select(c(-type))
data %>% drop_na(type_of_entity)%>%
ggplot(aes(x = as.factor(type_of_entity), y = log(num_individuals))) +
geom_boxplot()+
theme_classic()+
theme(axis.text.x = element_text(angle = -45))+
labs(y = "number of breach incidents",
x = element_blank())
data %>% drop_na(type_of_entity)%>%
ggplot(aes(x = type_of_entity, y = log(num_individuals))) +
geom_boxplot()+
geom_jitter()+
theme_classic()
data %>% drop_na(type_of_entity)%>%
group_by(type_of_breach1, year) %>%
summarise(num = n(), total = mean(num_individuals)) %>%
filter(year > 2015)%>%
filter(year!=2015)%>%
filter(type_of_breach1 != "Unknown")%>%
ggplot()+
geom_col(aes(x = (num), y= reorder(type_of_breach1, num)))+
facet_wrap(~year)+
theme_classic()
## `summarise()` has grouped output by 'type_of_breach1'. You can override using
## the `.groups` argument.
data %>% mutate(month = month(date), year = year(date)) %>%
group_by(year, month, type_of_breach1) %>%
mutate(num= n(), avg = mean(num_individuals)) %>%
mutate(date = make_date(year, month, day)) %>%
ungroup() %>%
filter(2018 < year & year < 2022)%>%
ggplot(aes(date, (num))) +
geom_line(size = 1, col = 2)+
facet_wrap(vars(type_of_breach1))+
#geom_jitter(aes(y = log(num_individuals)))+
theme_classic()
data %>% mutate(month = month(date), year = year(date)) %>%
group_by(year, month, point_of_breach1) %>%
summarise(num= n(), avg = mean(num_individuals)) %>%
mutate(date = make_date(year, month))
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.
## # A tibble: 968 × 6
## # Groups: year, month [160]
## year month point_of_breach1 num avg date
## <dbl> <dbl> <chr> <int> <dbl> <date>
## 1 2009 10 Network Server 1 1000 2009-10-01
## 2 2009 10 Other 1 501 2009-10-01
## 3 2009 10 Paper/Films 1 1000 2009-10-01
## 4 2009 11 Desktop 5 3675. 2009-11-01
## 5 2009 11 Laptop 4 4261. 2009-11-01
## 6 2009 12 Desktop 2 1323 2009-12-01
## 7 2009 12 Email 1 610 2009-12-01
## 8 2009 12 Other 2 5298 2009-12-01
## 9 2009 12 Paper/Films 1 83000 2009-12-01
## 10 2010 1 Desktop 1 9309 2010-01-01
## # … with 958 more rows
data %>% mutate(month = month(date), year = year(date)) %>%
group_by(year, month, point_of_breach1) %>%
mutate(num= n(), total = sum(num_individuals)) %>%
mutate(date = make_date(year, month)) %>%
ungroup() %>%
filter(2015 < year & year < 2022)%>%
ggplot(aes(date, log(total))) +
geom_line(size = 1, col = 2)+
facet_wrap(vars(point_of_breach1))+
#geom_jitter(aes(y = log(num_individuals)))+
theme_classic()
bookdown::publish_book()
## Preparing to deploy site...DONE
## Uploading bundle for site: 11570...DONE
## Deploying bundle: 80681 for site: 11570 ...
## [Connect] Building static content...
## [Connect] Launching static content...
## Site successfully deployed to https://bookdown.org/mehdibarati22/my-website/