1 + 1
[1] 2
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:
1 + 1
[1] 2
You can add options to executable code like this
library(data.table)
=fread("~/Downloads/testing.csv")
kelvinlibrary(ggplot2)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:data.table':
between, first, last
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Assuming your data frame is called 'kelvin'
# Step 1: Create the adjusted model
<- lm(log(kelvin$gensini_score+2e-6) ~ kelvin$scaled_adj_PRS +
adjusted_model $PC1 + kelvin$PC2 + kelvin$PC3 + kelvin$PC4 +
kelvin$gendercoded + kelvin$Age_at_Cath)
kelvin
# Step 2: Create a new data frame with the adjusted values
<- kelvin %>%
kelvin_adjusted mutate(
log_gensini_adjusted = log(gensini_score+2e-6) -
* coef(adjusted_model)["kelvin$PC1"] +
(PC1 * coef(adjusted_model)["kelvin$PC2"] +
PC2 * coef(adjusted_model)["kelvin$PC3"] +
PC3 * coef(adjusted_model)["kelvin$PC4"] +
PC4 * coef(adjusted_model)["kelvin$gendercoded"] +
gendercoded * coef(adjusted_model)["kelvin$Age_at_Cath"])
Age_at_Cath
)
# Step 3: Create the plot
=ggplot(kelvin_adjusted, aes(x = scaled_adj_PRS, y = log_gensini_adjusted)) +
kgeom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red") +
theme_minimal() +
labs(
title = "Adjusted Log Gensini Score vs Scaled Adjusted PRS",
x = "Scaled Adjusted PRS",
y = "Log Gensini Score (Adjusted)",
caption = "Adjusted for PCs, Gender, and Age at Catheterization"
+
) theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(face = "bold")
)
ggsave(k,
filename = "~/Desktop/adjusted_plot.pdf", width = 8, height = 6, units = "in", dpi = 300)
`geom_smooth()` using formula = 'y ~ x'
library(ggplot2)
library(dplyr)
# Assuming your data frame is still called 'kelvin_adjusted'
# Create bins for scaled_adj_PRS
<- kelvin_adjusted %>%
kelvin_binned mutate(prs_bin = cut(scaled_adj_PRS, breaks = 20)) %>%
group_by(prs_bin) %>%
summarise(
mean_prs = mean(scaled_adj_PRS),
mean_gensini = mean(log_gensini_adjusted),
se_gensini = sd(log_gensini_adjusted) / sqrt(n()),
n = n()
)
# Create the improved plot
=ggplot(kelvin_binned, aes(x = mean_prs, y = mean_gensini)) +
pgeom_point(aes(size = n), alpha = 0.7) +
geom_errorbar(aes(ymin = mean_gensini - se_gensini,
ymax = mean_gensini + se_gensini),
width = 0.1, alpha = 0.5) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
theme_minimal() +
labs(
title = "Binned Adjusted Log Gensini Score vs Scaled Adjusted PRS",
x = "Scaled Adjusted PRS",
y = "Mean Log Gensini Score (Adjusted)",
size = "Sample Size",
caption = "Adjusted for PCs, Gender, and Age at Catheterization\nError bars represent standard error"
+
) theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(face = "bold")
+
) scale_size_continuous(range = c(1, 10))
ggsave(p,
filename = "~/Desktop/binned_plot.pdf", width = 8, height = 6, units = "in", dpi = 300)
`geom_smooth()` using formula = 'y ~ x'
# independent variable: prsgroup
# covariates: gendercoded, Age_at_Cath, PC1, PC2, PC3, PC4
# outcomes: return_angio, revascularization, ISR, hf_coded, died or Died
# time-to-event: I slightly forget the exact variable names, but they should start with "t_" or "years_" followed by the outcome names
# censoring: I slightly forget the exact variable names, but they should start with "c_" followed by the outcome names
#I think we can only show the metrics for CAD PRS?
# baseline model of age, sex, and 1st four PCs
# baseline model + traditional risk factors (hypertension, hyperchol, t2dm, current smoking)
# baseline model + CAD PRS
# baseline model + traditional risk factors + CAD PRS
##1. Fit a Cox proportional hazards model for each outcome using the prsgroup as the independent variable and the covariates as the covariates. You can use the coxph function from the survival package in R.
library(survival)
=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance'] mod1
concordance
0.5690509
=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance'] mod2
concordance
0.6270752
=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance'] mod3
concordance
0.575213
=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance'] mod4
concordance
0.6291428
###
####
=coxph(Surv(years_to_ISR,ISR ) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance'] mod1
concordance
0.6431484
=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance'] mod2
concordance
0.6900397
=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance'] mod3
concordance
0.6528994
=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance'] mod4
concordance
0.6947062
###
=coxph(Surv(years_to_revasc,revascularization ) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance'] mod1
concordance
0.5767621
=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance'] mod2
concordance
0.6335211
=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance'] mod3
concordance
0.5919207
=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance'] mod4
concordance
0.6384582
library(survival)
library(pec)
Loading required package: prodlim
# Refit the models with x=TRUE to store the design matrix
<- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin, x = TRUE)
mod1 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin, x = TRUE)
mod2 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin, x = TRUE)
mod3 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin, x = TRUE)
mod4
# Compute the Brier score for mod1
<- pec(object = list("Model 1" = mod1, "Model 2" = mod2, "Model 3" = mod3, "Model 4" = mod4),
brier_model1 formula = Surv(years_return_angio, return_angio) ~ 1,
data = kelvin,
times = c(1, 5, 10), # Time points for evaluation
exact = FALSE)
No covariates specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary
summary(brier_model1)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 3518 0.000 0.000 0.000 0.000 0.000
2 1 3011 0.111 0.110 0.109 0.110 0.109
3 5 2123 0.194 0.191 0.184 0.191 0.184
4 10 1090 0.235 0.235 0.216 0.234 0.215
library(dplyr)
colSums(is.na(kelvin[, c("years_to_hf", "hf_coded", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4")]))
years_to_hf hf_coded prsgroup gendercoded Age_at_Cath PC1
1915 1915 0 0 0 0
PC2 PC3 PC4
0 0 0
<- kelvin %>%
kelvin_clean filter(!is.na(years_to_hf) & !is.na(hf_coded) &
!is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) &
!is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4))
=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean,x = TRUE)
mod1$concordance['concordance'] mod1
concordance
0.580283
=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean,x = TRUE)
mod2$concordance['concordance'] mod2
concordance
0.6307815
=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean,x = TRUE)
mod3$concordance['concordance'] mod3
concordance
0.5827276
=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS,data = kelvin_clean,x = TRUE)
mod4$concordance['concordance'] mod4
concordance
0.6304602
# Compute the Brier score for mod1
<- pec(object = list("Model 1" = mod1, "Model 2" = mod2, "Model 3" = mod3, "Model 4" = mod4),
brier_model1 formula = Surv(years_to_hf, hf_coded) ~ 1,
data = kelvin_clean,
times = c(1, 5, 10), # Time points for evaluation
exact = FALSE)
No covariates specified: Kaplan-Meier for censoring times used for weighting.
summary(brier_model1)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 1603 0.000 0.000 0.000 0.000 0.000
2 1 1559 0.016 0.016 0.016 0.016 0.016
3 5 1231 0.068 0.067 0.066 0.067 0.066
4 10 628 0.124 0.120 0.117 0.120 0.117
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("years_to_ISR", "ISR", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))
years_to_ISR ISR prsgroup gendercoded Age_at_Cath
27 27 0 0 0
PC1 PC2 PC3 PC4 hypertension
0 0 0 0 0
hyperchol t2dm currentsmoking scaled_adj_PRS
0 0 0 0
# Remove rows with missing values
<- kelvin %>%
kelvin_clean_ISR filter(!is.na(years_to_ISR) & !is.na(ISR) &
!is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) &
!is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
!is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for ISR
<- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_ISR, x = TRUE)
mod1_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_ISR, x = TRUE)
mod2_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_ISR, x = TRUE)
mod3_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_ISR, x = TRUE)
mod4_ISR
# Print concordance for each model
$concordance['concordance'] mod1_ISR
concordance
0.6431484
$concordance['concordance'] mod2_ISR
concordance
0.6900397
$concordance['concordance'] mod3_ISR
concordance
0.6528994
$concordance['concordance'] mod4_ISR
concordance
0.6947062
# Compute Brier scores for ISR
<- pec(object = list("Model 1" = mod1_ISR, "Model 2" = mod2_ISR, "Model 3" = mod3_ISR, "Model 4" = mod4_ISR),
brier_ISR formula = Surv(years_to_ISR, ISR) ~ 1,
data = kelvin_clean_ISR,
times = c(1, 5, 10), # Evaluate at 1, 5, and 10 years
exact = FALSE)
No covariates specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for ISR
summary(brier_ISR)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 3491 0.000 0.000 0.000 0.000 0.000
2 1 3334 0.021 0.021 0.021 0.021 0.021
3 5 2686 0.045 0.044 0.044 0.044 0.044
4 10 1606 0.070 0.070 0.068 0.070 0.068
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("years_to_revasc", "revascularization", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))
years_to_revasc revascularization prsgroup gendercoded
12 12 0 0
Age_at_Cath PC1 PC2 PC3
0 0 0 0
PC4 hypertension hyperchol t2dm
0 0 0 0
currentsmoking scaled_adj_PRS
0 0
# Remove rows with missing values
<- kelvin %>%
kelvin_clean_revasc filter(!is.na(years_to_revasc) & !is.na(revascularization) &
!is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) &
!is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
!is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for revascularization
# Fit Cox models for revascularization
<- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_revasc, x = TRUE)
mod1_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_revasc, x = TRUE)
mod2_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_revasc, x = TRUE)
mod3_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_revasc, x = TRUE)
mod4_revasc
# Print concordance for each model
$concordance['concordance'] mod1_revasc
concordance
0.5767621
$concordance['concordance'] mod2_revasc
concordance
0.6335211
$concordance['concordance'] mod3_revasc
concordance
0.5919207
$concordance['concordance'] mod4_revasc
concordance
0.6384582
# Compute Brier scores for revascularization
<- pec(object = list("Model 1" = mod1_revasc, "Model 2" = mod2_revasc, "Model 3" = mod3_revasc, "Model 4" = mod4_revasc),
brier_revasc formula = Surv(years_to_revasc, revascularization) ~ 1,
data = kelvin_clean_revasc,
times = c(1, 5, 10), # Evaluate at 1, 5, and 10 years
exact = FALSE)
No covariates specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for revascularization
summary(brier_revasc)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 3506 0.000 0.000 0.000 0.000 0.000
2 1 3283 0.043 0.043 0.043 0.043 0.043
3 5 2552 0.099 0.099 0.097 0.098 0.097
4 10 1436 0.151 0.150 0.145 0.149 0.145
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("time_to_death1", "died1", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))
time_to_death1 died1 prsgroup gendercoded Age_at_Cath
13 13 0 0 0
PC1 PC2 PC3 PC4 hypertension
0 0 0 0 0
hyperchol t2dm currentsmoking scaled_adj_PRS
0 0 0 0
# Remove rows with missing values
<- kelvin %>%
kelvin_clean_death filter(!is.na(time_to_death1) & !is.na(died1) &
!is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) &
!is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
!is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for revascularization
# Fit Cox models for revascularization
<- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_death, x = TRUE)
mod1_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_death, x = TRUE)
mod2_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_death, x = TRUE)
mod3_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_death, x = TRUE)
mod4_death
# Print concordance for each model
$concordance['concordance'] mod1_death
concordance
0.7085782
$concordance['concordance'] mod2_death
concordance
0.7254571
$concordance['concordance'] mod3_death
concordance
0.7087645
$concordance['concordance'] mod4_death
concordance
0.7258427
# Compute Brier scores for revascularization
<- pec(object = list("Model 1" = mod1_revasc, "Model 2" = mod2_revasc, "Model 3" = mod3_revasc, "Model 4" = mod4_revasc),
brier_death formula = Surv(time_to_death1, died1) ~ 1,
data = kelvin_clean_death,
times = c(1, 5, 10), # Evaluate at 1, 5, and 10 years
exact = FALSE)
No covariates specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for revascularization
summary(brier_death)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 3505 0.000 0.000 0.000 0.000 0.000
2 1 3376 0.018 0.019 0.019 0.019 0.019
3 5 2671 0.081 0.083 0.083 0.083 0.084
4 10 1602 0.156 0.161 0.160 0.162 0.161