Kelvin

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

1 + 1
[1] 2

You can add options to executable code like this

library(data.table)
kelvin=fread("~/Downloads/testing.csv")
library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:data.table':

    between, first, last
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# Assuming your data frame is called 'kelvin'

# Step 1: Create the adjusted model
adjusted_model <- lm(log(kelvin$gensini_score+2e-6) ~ kelvin$scaled_adj_PRS +
                       kelvin$PC1 + kelvin$PC2 + kelvin$PC3 + kelvin$PC4 +
                       kelvin$gendercoded + kelvin$Age_at_Cath)

# Step 2: Create a new data frame with the adjusted values
kelvin_adjusted <- kelvin %>%
  mutate(
    log_gensini_adjusted = log(gensini_score+2e-6) -
      (PC1 * coef(adjusted_model)["kelvin$PC1"] +
         PC2 * coef(adjusted_model)["kelvin$PC2"] +
         PC3 * coef(adjusted_model)["kelvin$PC3"] +
         PC4 * coef(adjusted_model)["kelvin$PC4"] +
         gendercoded * coef(adjusted_model)["kelvin$gendercoded"] +
         Age_at_Cath * coef(adjusted_model)["kelvin$Age_at_Cath"])
  )

# Step 3: Create the plot
k=ggplot(kelvin_adjusted, aes(x = scaled_adj_PRS, y = log_gensini_adjusted)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", color = "red") +
  theme_minimal() +
  labs(
    title = "Adjusted Log Gensini Score vs Scaled Adjusted PRS",
    x = "Scaled Adjusted PRS",
    y = "Log Gensini Score (Adjusted)",
    caption = "Adjusted for PCs, Gender, and Age at Catheterization"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(face = "bold")
  )

ggsave(k,
       filename = "~/Desktop/adjusted_plot.pdf", width = 8, height = 6, units = "in", dpi = 300)
`geom_smooth()` using formula = 'y ~ x'
library(ggplot2)
library(dplyr)

# Assuming your data frame is still called 'kelvin_adjusted'


# Create bins for scaled_adj_PRS
kelvin_binned <- kelvin_adjusted %>%
  mutate(prs_bin = cut(scaled_adj_PRS, breaks = 20)) %>%
  group_by(prs_bin) %>%
  summarise(
    mean_prs = mean(scaled_adj_PRS),
    mean_gensini = mean(log_gensini_adjusted),
    se_gensini = sd(log_gensini_adjusted) / sqrt(n()),
    n = n()
  )

# Create the improved plot
p=ggplot(kelvin_binned, aes(x = mean_prs, y = mean_gensini)) +
  geom_point(aes(size = n), alpha = 0.7) +
  geom_errorbar(aes(ymin = mean_gensini - se_gensini,
                    ymax = mean_gensini + se_gensini),
                width = 0.1, alpha = 0.5) +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  theme_minimal() +
  labs(
    title = "Binned Adjusted Log Gensini Score vs Scaled Adjusted PRS",
    x = "Scaled Adjusted PRS",
    y = "Mean Log Gensini Score (Adjusted)",
    size = "Sample Size",
    caption = "Adjusted for PCs, Gender, and Age at Catheterization\nError bars represent standard error"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(face = "bold")
  ) +
  scale_size_continuous(range = c(1, 10))



ggsave(p,
       filename = "~/Desktop/binned_plot.pdf", width = 8, height = 6, units = "in", dpi = 300)
`geom_smooth()` using formula = 'y ~ x'
# independent variable: prsgroup
# covariates: gendercoded, Age_at_Cath, PC1, PC2, PC3, PC4
# outcomes: return_angio, revascularization, ISR, hf_coded, died or Died
# time-to-event: I slightly forget the exact variable names, but they should start with "t_" or "years_" followed by the outcome names
# censoring: I slightly forget the exact variable names, but they should start with "c_" followed by the outcome names

#I think we can only show the metrics for CAD PRS?
# baseline model of age, sex, and 1st four PCs
# baseline model + traditional risk factors (hypertension, hyperchol, t2dm, current smoking)
# baseline model + CAD PRS
# baseline model + traditional risk factors + CAD PRS

##1. Fit a Cox proportional hazards model for each outcome using the prsgroup as the independent variable and the covariates as the covariates. You can use the coxph function from the survival package in R.
library(survival)



mod1=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']
concordance 
  0.5690509 
mod2=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']
concordance 
  0.6270752 
mod3=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']
concordance 
   0.575213 
mod4=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']
concordance 
  0.6291428 
###



####

mod1=coxph(Surv(years_to_ISR,ISR ) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']
concordance 
  0.6431484 
mod2=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']
concordance 
  0.6900397 
mod3=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']
concordance 
  0.6528994 
mod4=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']
concordance 
  0.6947062 
###
mod1=coxph(Surv(years_to_revasc,revascularization ) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']
concordance 
  0.5767621 
mod2=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']
concordance 
  0.6335211 
mod3=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']
concordance 
  0.5919207 
mod4=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']
concordance 
  0.6384582 
library(survival)
library(pec)
Loading required package: prodlim
# Refit the models with x=TRUE to store the design matrix
mod1 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin, x = TRUE)
mod2 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin, x = TRUE)
mod3 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin, x = TRUE)
mod4 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin, x = TRUE)

# Compute the Brier score for mod1
brier_model1 <- pec(object = list("Model 1" = mod1, "Model 2" = mod2, "Model 3" = mod3, "Model 4" = mod4),
                    formula = Surv(years_return_angio, return_angio) ~ 1,
                    data = kelvin, 
                    times = c(1, 5, 10),  # Time points for evaluation
                    exact = FALSE)
No covariates  specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary
summary(brier_model1)

Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   3518     0.000   0.000   0.000   0.000   0.000
2    1   3011     0.111   0.110   0.109   0.110   0.109
3    5   2123     0.194   0.191   0.184   0.191   0.184
4   10   1090     0.235   0.235   0.216   0.234   0.215
library(dplyr)
colSums(is.na(kelvin[, c("years_to_hf", "hf_coded", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4")]))
years_to_hf    hf_coded    prsgroup gendercoded Age_at_Cath         PC1 
       1915        1915           0           0           0           0 
        PC2         PC3         PC4 
          0           0           0 
kelvin_clean <- kelvin %>%
  filter(!is.na(years_to_hf) & !is.na(hf_coded) & 
         !is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) & 
         !is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4))

mod1=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean,x = TRUE)
mod1$concordance['concordance']
concordance 
   0.580283 
mod2=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean,x = TRUE)
mod2$concordance['concordance']
concordance 
  0.6307815 
mod3=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean,x = TRUE)
mod3$concordance['concordance']
concordance 
  0.5827276 
mod4=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS,data = kelvin_clean,x = TRUE)
mod4$concordance['concordance']
concordance 
  0.6304602 
# Compute the Brier score for mod1
brier_model1 <- pec(object = list("Model 1" = mod1, "Model 2" = mod2, "Model 3" = mod3, "Model 4" = mod4),
                    formula = Surv(years_to_hf, hf_coded) ~ 1,
                    data = kelvin_clean, 
                    times = c(1, 5, 10),  # Time points for evaluation
                    exact = FALSE)
No covariates  specified: Kaplan-Meier for censoring times used for weighting.
summary(brier_model1)

Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   1603     0.000   0.000   0.000   0.000   0.000
2    1   1559     0.016   0.016   0.016   0.016   0.016
3    5   1231     0.068   0.067   0.066   0.067   0.066
4   10    628     0.124   0.120   0.117   0.120   0.117
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("years_to_ISR", "ISR", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))
  years_to_ISR            ISR       prsgroup    gendercoded    Age_at_Cath 
            27             27              0              0              0 
           PC1            PC2            PC3            PC4   hypertension 
             0              0              0              0              0 
     hyperchol           t2dm currentsmoking scaled_adj_PRS 
             0              0              0              0 
# Remove rows with missing values
kelvin_clean_ISR <- kelvin %>%
  filter(!is.na(years_to_ISR) & !is.na(ISR) & 
         !is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) & 
         !is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
         !is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))


# Fit Cox models for ISR
mod1_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_ISR, x = TRUE)
mod2_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_ISR, x = TRUE)
mod3_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_ISR, x = TRUE)
mod4_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_ISR, x = TRUE)

# Print concordance for each model
mod1_ISR$concordance['concordance']
concordance 
  0.6431484 
mod2_ISR$concordance['concordance']
concordance 
  0.6900397 
mod3_ISR$concordance['concordance']
concordance 
  0.6528994 
mod4_ISR$concordance['concordance']
concordance 
  0.6947062 
# Compute Brier scores for ISR
brier_ISR <- pec(object = list("Model 1" = mod1_ISR, "Model 2" = mod2_ISR, "Model 3" = mod3_ISR, "Model 4" = mod4_ISR),
                 formula = Surv(years_to_ISR, ISR) ~ 1,
                 data = kelvin_clean_ISR, 
                 times = c(1, 5, 10),  # Evaluate at 1, 5, and 10 years
                 exact = FALSE)
No covariates  specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for ISR
summary(brier_ISR)

Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   3491     0.000   0.000   0.000   0.000   0.000
2    1   3334     0.021   0.021   0.021   0.021   0.021
3    5   2686     0.045   0.044   0.044   0.044   0.044
4   10   1606     0.070   0.070   0.068   0.070   0.068
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("years_to_revasc", "revascularization", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))
  years_to_revasc revascularization          prsgroup       gendercoded 
               12                12                 0                 0 
      Age_at_Cath               PC1               PC2               PC3 
                0                 0                 0                 0 
              PC4      hypertension         hyperchol              t2dm 
                0                 0                 0                 0 
   currentsmoking    scaled_adj_PRS 
                0                 0 
# Remove rows with missing values
kelvin_clean_revasc <- kelvin %>%
  filter(!is.na(years_to_revasc) & !is.na(revascularization) & 
         !is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) & 
         !is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
         !is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for revascularization
# Fit Cox models for revascularization
mod1_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_revasc, x = TRUE)
mod2_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_revasc, x = TRUE)
mod3_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_revasc, x = TRUE)
mod4_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_revasc, x = TRUE)

# Print concordance for each model
mod1_revasc$concordance['concordance']
concordance 
  0.5767621 
mod2_revasc$concordance['concordance']
concordance 
  0.6335211 
mod3_revasc$concordance['concordance']
concordance 
  0.5919207 
mod4_revasc$concordance['concordance']
concordance 
  0.6384582 
# Compute Brier scores for revascularization
brier_revasc <- pec(object = list("Model 1" = mod1_revasc, "Model 2" = mod2_revasc, "Model 3" = mod3_revasc, "Model 4" = mod4_revasc),
                    formula = Surv(years_to_revasc, revascularization) ~ 1,
                    data = kelvin_clean_revasc, 
                    times = c(1, 5, 10),  # Evaluate at 1, 5, and 10 years
                    exact = FALSE)
No covariates  specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for revascularization
summary(brier_revasc)

Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   3506     0.000   0.000   0.000   0.000   0.000
2    1   3283     0.043   0.043   0.043   0.043   0.043
3    5   2552     0.099   0.099   0.097   0.098   0.097
4   10   1436     0.151   0.150   0.145   0.149   0.145
# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("time_to_death1", "died1", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))
time_to_death1          died1       prsgroup    gendercoded    Age_at_Cath 
            13             13              0              0              0 
           PC1            PC2            PC3            PC4   hypertension 
             0              0              0              0              0 
     hyperchol           t2dm currentsmoking scaled_adj_PRS 
             0              0              0              0 
# Remove rows with missing values
kelvin_clean_death <- kelvin %>%
  filter(!is.na(time_to_death1) & !is.na(died1) & 
         !is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) & 
         !is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
         !is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for revascularization
# Fit Cox models for revascularization

mod1_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_death, x = TRUE)
mod2_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_death, x = TRUE)
mod3_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_death, x = TRUE)
mod4_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_death, x = TRUE)

# Print concordance for each model
mod1_death$concordance['concordance']
concordance 
  0.7085782 
mod2_death$concordance['concordance']
concordance 
  0.7254571 
mod3_death$concordance['concordance']
concordance 
  0.7087645 
mod4_death$concordance['concordance']
concordance 
  0.7258427 
# Compute Brier scores for revascularization
brier_death <- pec(object = list("Model 1" = mod1_revasc, "Model 2" = mod2_revasc, "Model 3" = mod3_revasc, "Model 4" = mod4_revasc),
                    formula = Surv(time_to_death1, died1) ~ 1,
                    data = kelvin_clean_death, 
                    times = c(1, 5, 10),  # Evaluate at 1, 5, and 10 years
                    exact = FALSE)
No covariates  specified: Kaplan-Meier for censoring times used for weighting.
# Print Brier score summary for revascularization
summary(brier_death)

Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   3505     0.000   0.000   0.000   0.000   0.000
2    1   3376     0.018   0.019   0.019   0.019   0.019
3    5   2671     0.081   0.083   0.083   0.083   0.084
4   10   1602     0.156   0.161   0.160   0.162   0.161