1 + 1[1] 2
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:
1 + 1[1] 2
You can add options to executable code like this
library(data.table)
kelvin=fread("~/Downloads/testing.csv")
library(ggplot2)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:data.table':
between, first, last
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Assuming your data frame is called 'kelvin'
# Step 1: Create the adjusted model
adjusted_model <- lm(log(kelvin$gensini_score+2e-6) ~ kelvin$scaled_adj_PRS +
kelvin$PC1 + kelvin$PC2 + kelvin$PC3 + kelvin$PC4 +
kelvin$gendercoded + kelvin$Age_at_Cath)
# Step 2: Create a new data frame with the adjusted values
kelvin_adjusted <- kelvin %>%
mutate(
log_gensini_adjusted = log(gensini_score+2e-6) -
(PC1 * coef(adjusted_model)["kelvin$PC1"] +
PC2 * coef(adjusted_model)["kelvin$PC2"] +
PC3 * coef(adjusted_model)["kelvin$PC3"] +
PC4 * coef(adjusted_model)["kelvin$PC4"] +
gendercoded * coef(adjusted_model)["kelvin$gendercoded"] +
Age_at_Cath * coef(adjusted_model)["kelvin$Age_at_Cath"])
)
# Step 3: Create the plot
k=ggplot(kelvin_adjusted, aes(x = scaled_adj_PRS, y = log_gensini_adjusted)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red") +
theme_minimal() +
labs(
title = "Adjusted Log Gensini Score vs Scaled Adjusted PRS",
x = "Scaled Adjusted PRS",
y = "Log Gensini Score (Adjusted)",
caption = "Adjusted for PCs, Gender, and Age at Catheterization"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(face = "bold")
)
ggsave(k,
filename = "~/Desktop/adjusted_plot.pdf", width = 8, height = 6, units = "in", dpi = 300)`geom_smooth()` using formula = 'y ~ x'
library(ggplot2)
library(dplyr)
# Assuming your data frame is still called 'kelvin_adjusted'
# Create bins for scaled_adj_PRS
kelvin_binned <- kelvin_adjusted %>%
mutate(prs_bin = cut(scaled_adj_PRS, breaks = 20)) %>%
group_by(prs_bin) %>%
summarise(
mean_prs = mean(scaled_adj_PRS),
mean_gensini = mean(log_gensini_adjusted),
se_gensini = sd(log_gensini_adjusted) / sqrt(n()),
n = n()
)
# Create the improved plot
p=ggplot(kelvin_binned, aes(x = mean_prs, y = mean_gensini)) +
geom_point(aes(size = n), alpha = 0.7) +
geom_errorbar(aes(ymin = mean_gensini - se_gensini,
ymax = mean_gensini + se_gensini),
width = 0.1, alpha = 0.5) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
theme_minimal() +
labs(
title = "Binned Adjusted Log Gensini Score vs Scaled Adjusted PRS",
x = "Scaled Adjusted PRS",
y = "Mean Log Gensini Score (Adjusted)",
size = "Sample Size",
caption = "Adjusted for PCs, Gender, and Age at Catheterization\nError bars represent standard error"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(face = "bold")
) +
scale_size_continuous(range = c(1, 10))
ggsave(p,
filename = "~/Desktop/binned_plot.pdf", width = 8, height = 6, units = "in", dpi = 300)`geom_smooth()` using formula = 'y ~ x'
# independent variable: prsgroup
# covariates: gendercoded, Age_at_Cath, PC1, PC2, PC3, PC4
# outcomes: return_angio, revascularization, ISR, hf_coded, died or Died
# time-to-event: I slightly forget the exact variable names, but they should start with "t_" or "years_" followed by the outcome names
# censoring: I slightly forget the exact variable names, but they should start with "c_" followed by the outcome names
#I think we can only show the metrics for CAD PRS?
# baseline model of age, sex, and 1st four PCs
# baseline model + traditional risk factors (hypertension, hyperchol, t2dm, current smoking)
# baseline model + CAD PRS
# baseline model + traditional risk factors + CAD PRS
##1. Fit a Cox proportional hazards model for each outcome using the prsgroup as the independent variable and the covariates as the covariates. You can use the coxph function from the survival package in R.
library(survival)
mod1=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']concordance
0.5690509
mod2=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']concordance
0.6270752
mod3=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']concordance
0.575213
mod4=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']concordance
0.6291428
###
####
mod1=coxph(Surv(years_to_ISR,ISR ) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']concordance
0.6431484
mod2=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']concordance
0.6900397
mod3=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']concordance
0.6528994
mod4=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']concordance
0.6947062
###
mod1=coxph(Surv(years_to_revasc,revascularization ) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']concordance
0.5767621
mod2=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']concordance
0.6335211
mod3=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']concordance
0.5919207
mod4=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']concordance
0.6384582
library(survival)
library(pec)Loading required package: prodlim
# Refit the models with x=TRUE to store the design matrix
mod1 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin, x = TRUE)
mod2 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin, x = TRUE)
mod3 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin, x = TRUE)
mod4 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin, x = TRUE)
# Compute the Brier score for mod1
brier_model1 <- pec(object = list("Model 1" = mod1, "Model 2" = mod2, "Model 3" = mod3, "Model 4" = mod4),
formula = Surv(years_return_angio, return_angio) ~ 1,
data = kelvin,
times = c(1, 5, 10), # Time points for evaluation
exact = FALSE)No covariates specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary
summary(brier_model1)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 3518 0.000 0.000 0.000 0.000 0.000
2 1 3011 0.111 0.110 0.109 0.110 0.109
3 5 2123 0.194 0.191 0.184 0.191 0.184
4 10 1090 0.235 0.235 0.216 0.234 0.215
library(dplyr)
colSums(is.na(kelvin[, c("years_to_hf", "hf_coded", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4")]))years_to_hf hf_coded prsgroup gendercoded Age_at_Cath PC1
1915 1915 0 0 0 0
PC2 PC3 PC4
0 0 0
kelvin_clean <- kelvin %>%
filter(!is.na(years_to_hf) & !is.na(hf_coded) &
!is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) &
!is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4))
mod1=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean,x = TRUE)
mod1$concordance['concordance']concordance
0.580283
mod2=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean,x = TRUE)
mod2$concordance['concordance']concordance
0.6307815
mod3=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean,x = TRUE)
mod3$concordance['concordance']concordance
0.5827276
mod4=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS,data = kelvin_clean,x = TRUE)
mod4$concordance['concordance']concordance
0.6304602
# Compute the Brier score for mod1
brier_model1 <- pec(object = list("Model 1" = mod1, "Model 2" = mod2, "Model 3" = mod3, "Model 4" = mod4),
formula = Surv(years_to_hf, hf_coded) ~ 1,
data = kelvin_clean,
times = c(1, 5, 10), # Time points for evaluation
exact = FALSE)No covariates specified: Kaplan-Meier for censoring times used for weighting.
summary(brier_model1)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 1603 0.000 0.000 0.000 0.000 0.000
2 1 1559 0.016 0.016 0.016 0.016 0.016
3 5 1231 0.068 0.067 0.066 0.067 0.066
4 10 628 0.124 0.120 0.117 0.120 0.117
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("years_to_ISR", "ISR", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")])) years_to_ISR ISR prsgroup gendercoded Age_at_Cath
27 27 0 0 0
PC1 PC2 PC3 PC4 hypertension
0 0 0 0 0
hyperchol t2dm currentsmoking scaled_adj_PRS
0 0 0 0
# Remove rows with missing values
kelvin_clean_ISR <- kelvin %>%
filter(!is.na(years_to_ISR) & !is.na(ISR) &
!is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) &
!is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
!is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for ISR
mod1_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_ISR, x = TRUE)
mod2_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_ISR, x = TRUE)
mod3_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_ISR, x = TRUE)
mod4_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_ISR, x = TRUE)
# Print concordance for each model
mod1_ISR$concordance['concordance']concordance
0.6431484
mod2_ISR$concordance['concordance']concordance
0.6900397
mod3_ISR$concordance['concordance']concordance
0.6528994
mod4_ISR$concordance['concordance']concordance
0.6947062
# Compute Brier scores for ISR
brier_ISR <- pec(object = list("Model 1" = mod1_ISR, "Model 2" = mod2_ISR, "Model 3" = mod3_ISR, "Model 4" = mod4_ISR),
formula = Surv(years_to_ISR, ISR) ~ 1,
data = kelvin_clean_ISR,
times = c(1, 5, 10), # Evaluate at 1, 5, and 10 years
exact = FALSE)No covariates specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for ISR
summary(brier_ISR)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 3491 0.000 0.000 0.000 0.000 0.000
2 1 3334 0.021 0.021 0.021 0.021 0.021
3 5 2686 0.045 0.044 0.044 0.044 0.044
4 10 1606 0.070 0.070 0.068 0.070 0.068
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("years_to_revasc", "revascularization", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")])) years_to_revasc revascularization prsgroup gendercoded
12 12 0 0
Age_at_Cath PC1 PC2 PC3
0 0 0 0
PC4 hypertension hyperchol t2dm
0 0 0 0
currentsmoking scaled_adj_PRS
0 0
# Remove rows with missing values
kelvin_clean_revasc <- kelvin %>%
filter(!is.na(years_to_revasc) & !is.na(revascularization) &
!is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) &
!is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
!is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for revascularization
# Fit Cox models for revascularization
mod1_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_revasc, x = TRUE)
mod2_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_revasc, x = TRUE)
mod3_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_revasc, x = TRUE)
mod4_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_revasc, x = TRUE)
# Print concordance for each model
mod1_revasc$concordance['concordance']concordance
0.5767621
mod2_revasc$concordance['concordance']concordance
0.6335211
mod3_revasc$concordance['concordance']concordance
0.5919207
mod4_revasc$concordance['concordance']concordance
0.6384582
# Compute Brier scores for revascularization
brier_revasc <- pec(object = list("Model 1" = mod1_revasc, "Model 2" = mod2_revasc, "Model 3" = mod3_revasc, "Model 4" = mod4_revasc),
formula = Surv(years_to_revasc, revascularization) ~ 1,
data = kelvin_clean_revasc,
times = c(1, 5, 10), # Evaluate at 1, 5, and 10 years
exact = FALSE)No covariates specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for revascularization
summary(brier_revasc)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 3506 0.000 0.000 0.000 0.000 0.000
2 1 3283 0.043 0.043 0.043 0.043 0.043
3 5 2552 0.099 0.099 0.097 0.098 0.097
4 10 1436 0.151 0.150 0.145 0.149 0.145
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("time_to_death1", "died1", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))time_to_death1 died1 prsgroup gendercoded Age_at_Cath
13 13 0 0 0
PC1 PC2 PC3 PC4 hypertension
0 0 0 0 0
hyperchol t2dm currentsmoking scaled_adj_PRS
0 0 0 0
# Remove rows with missing values
kelvin_clean_death <- kelvin %>%
filter(!is.na(time_to_death1) & !is.na(died1) &
!is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) &
!is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
!is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for revascularization
# Fit Cox models for revascularization
mod1_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_death, x = TRUE)
mod2_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_death, x = TRUE)
mod3_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_death, x = TRUE)
mod4_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_death, x = TRUE)
# Print concordance for each model
mod1_death$concordance['concordance']concordance
0.7085782
mod2_death$concordance['concordance']concordance
0.7254571
mod3_death$concordance['concordance']concordance
0.7087645
mod4_death$concordance['concordance']concordance
0.7258427
# Compute Brier scores for revascularization
brier_death <- pec(object = list("Model 1" = mod1_revasc, "Model 2" = mod2_revasc, "Model 3" = mod3_revasc, "Model 4" = mod4_revasc),
formula = Surv(time_to_death1, died1) ~ 1,
data = kelvin_clean_death,
times = c(1, 5, 10), # Evaluate at 1, 5, and 10 years
exact = FALSE)No covariates specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for revascularization
summary(brier_death)
Prediction error curves
No data splitting: either apparent or independent test sample performance
AppErr
time n.risk Reference Model 1 Model 2 Model 3 Model 4
1 0 3505 0.000 0.000 0.000 0.000 0.000
2 1 3376 0.018 0.019 0.019 0.019 0.019
3 5 2671 0.081 0.083 0.083 0.083 0.084
4 10 1602 0.156 0.161 0.160 0.162 0.161