Kelvin

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

1 + 1

[1] 2

You can add options to executable code like this

library(data.table)
kelvin=fread("~/Downloads/testing.csv")
library(ggplot2)
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:data.table':

    between, first, last

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

# Assuming your data frame is called 'kelvin'

# Step 1: Create the adjusted model
adjusted_model <- lm(log(kelvin$gensini_score+2e-6) ~ kelvin$scaled_adj_PRS +
                       kelvin$PC1 + kelvin$PC2 + kelvin$PC3 + kelvin$PC4 +
                       kelvin$gendercoded + kelvin$Age_at_Cath)

# Step 2: Create a new data frame with the adjusted values
kelvin_adjusted <- kelvin %>%
  mutate(
    log_gensini_adjusted = log(gensini_score+2e-6) -
      (PC1 * coef(adjusted_model)["kelvin$PC1"] +
         PC2 * coef(adjusted_model)["kelvin$PC2"] +
         PC3 * coef(adjusted_model)["kelvin$PC3"] +
         PC4 * coef(adjusted_model)["kelvin$PC4"] +
         gendercoded * coef(adjusted_model)["kelvin$gendercoded"] +
         Age_at_Cath * coef(adjusted_model)["kelvin$Age_at_Cath"])
  )

# Step 3: Create the plot
k=ggplot(kelvin_adjusted, aes(x = scaled_adj_PRS, y = log_gensini_adjusted)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", color = "red") +
  theme_minimal() +
  labs(
    title = "Adjusted Log Gensini Score vs Scaled Adjusted PRS",
    x = "Scaled Adjusted PRS",
    y = "Log Gensini Score (Adjusted)",
    caption = "Adjusted for PCs, Gender, and Age at Catheterization"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(face = "bold")
  )

ggsave(k,
       filename = "~/Desktop/adjusted_plot.pdf", width = 8, height = 6, units = "in", dpi = 300)

`geom_smooth()` using formula = 'y ~ x'

library(ggplot2)
library(dplyr)

# Assuming your data frame is still called 'kelvin_adjusted'


# Create bins for scaled_adj_PRS
kelvin_binned <- kelvin_adjusted %>%
  mutate(prs_bin = cut(scaled_adj_PRS, breaks = 20)) %>%
  group_by(prs_bin) %>%
  summarise(
    mean_prs = mean(scaled_adj_PRS),
    mean_gensini = mean(log_gensini_adjusted),
    se_gensini = sd(log_gensini_adjusted) / sqrt(n()),
    n = n()
  )

# Create the improved plot
p=ggplot(kelvin_binned, aes(x = mean_prs, y = mean_gensini)) +
  geom_point(aes(size = n), alpha = 0.7) +
  geom_errorbar(aes(ymin = mean_gensini - se_gensini,
                    ymax = mean_gensini + se_gensini),
                width = 0.1, alpha = 0.5) +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  theme_minimal() +
  labs(
    title = "Binned Adjusted Log Gensini Score vs Scaled Adjusted PRS",
    x = "Scaled Adjusted PRS",
    y = "Mean Log Gensini Score (Adjusted)",
    size = "Sample Size",
    caption = "Adjusted for PCs, Gender, and Age at Catheterization\nError bars represent standard error"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(face = "bold")
  ) +
  scale_size_continuous(range = c(1, 10))



ggsave(p,
       filename = "~/Desktop/binned_plot.pdf", width = 8, height = 6, units = "in", dpi = 300)

`geom_smooth()` using formula = 'y ~ x'

# independent variable: prsgroup
# covariates: gendercoded, Age_at_Cath, PC1, PC2, PC3, PC4
# outcomes: return_angio, revascularization, ISR, hf_coded, died or Died
# time-to-event: I slightly forget the exact variable names, but they should start with "t_" or "years_" followed by the outcome names
# censoring: I slightly forget the exact variable names, but they should start with "c_" followed by the outcome names

#I think we can only show the metrics for CAD PRS?
# baseline model of age, sex, and 1st four PCs
# baseline model + traditional risk factors (hypertension, hyperchol, t2dm, current smoking)
# baseline model + CAD PRS
# baseline model + traditional risk factors + CAD PRS

##1. Fit a Cox proportional hazards model for each outcome using the prsgroup as the independent variable and the covariates as the covariates. You can use the coxph function from the survival package in R.
library(survival)



mod1=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']

concordance 
  0.5690509

mod2=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']

concordance 
  0.6270752

mod3=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']

concordance 
   0.575213

mod4=coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']

concordance 
  0.6291428

###



####

mod1=coxph(Surv(years_to_ISR,ISR ) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']

concordance 
  0.6431484

mod2=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']

concordance 
  0.6900397

mod3=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']

concordance 
  0.6528994

mod4=coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']

concordance 
  0.6947062

###
mod1=coxph(Surv(years_to_revasc,revascularization ) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin)
mod1$concordance['concordance']

concordance 
  0.5767621

mod2=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin)
mod2$concordance['concordance']

concordance 
  0.6335211

mod3=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin)
mod3$concordance['concordance']

concordance 
  0.5919207

mod4=coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin)
mod4$concordance['concordance']

concordance 
  0.6384582

library(survival)
library(pec)

Loading required package: prodlim

# Refit the models with x=TRUE to store the design matrix
mod1 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin, x = TRUE)
mod2 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin, x = TRUE)
mod3 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin, x = TRUE)
mod4 <- coxph(Surv(years_return_angio, return_angio) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin, x = TRUE)

# Compute the Brier score for mod1
brier_model1 <- pec(object = list("Model 1" = mod1, "Model 2" = mod2, "Model 3" = mod3, "Model 4" = mod4),
                    formula = Surv(years_return_angio, return_angio) ~ 1,
                    data = kelvin, 
                    times = c(1, 5, 10),  # Time points for evaluation
                    exact = FALSE)

No covariates  specified: Kaplan-Meier for censoring times used for weighting.

# Print Brier score summary
summary(brier_model1)


Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   3518     0.000   0.000   0.000   0.000   0.000
2    1   3011     0.111   0.110   0.109   0.110   0.109
3    5   2123     0.194   0.191   0.184   0.191   0.184
4   10   1090     0.235   0.235   0.216   0.234   0.215

library(dplyr)
colSums(is.na(kelvin[, c("years_to_hf", "hf_coded", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4")]))

years_to_hf    hf_coded    prsgroup gendercoded Age_at_Cath         PC1 
       1915        1915           0           0           0           0 
        PC2         PC3         PC4 
          0           0           0

kelvin_clean <- kelvin %>%
  filter(!is.na(years_to_hf) & !is.na(hf_coded) & 
         !is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) & 
         !is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4))

mod1=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean,x = TRUE)
mod1$concordance['concordance']

concordance 
   0.580283

mod2=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean,x = TRUE)
mod2$concordance['concordance']

concordance 
  0.6307815

mod3=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean,x = TRUE)
mod3$concordance['concordance']

concordance 
  0.5827276

mod4=coxph(Surv(years_to_hf, hf_coded) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS,data = kelvin_clean,x = TRUE)
mod4$concordance['concordance']

concordance 
  0.6304602

# Compute the Brier score for mod1
brier_model1 <- pec(object = list("Model 1" = mod1, "Model 2" = mod2, "Model 3" = mod3, "Model 4" = mod4),
                    formula = Surv(years_to_hf, hf_coded) ~ 1,
                    data = kelvin_clean, 
                    times = c(1, 5, 10),  # Time points for evaluation
                    exact = FALSE)

No covariates  specified: Kaplan-Meier for censoring times used for weighting.

summary(brier_model1)


Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   1603     0.000   0.000   0.000   0.000   0.000
2    1   1559     0.016   0.016   0.016   0.016   0.016
3    5   1231     0.068   0.067   0.066   0.067   0.066
4   10    628     0.124   0.120   0.117   0.120   0.117

# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("years_to_ISR", "ISR", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))

  years_to_ISR            ISR       prsgroup    gendercoded    Age_at_Cath 
            27             27              0              0              0 
           PC1            PC2            PC3            PC4   hypertension 
             0              0              0              0              0 
     hyperchol           t2dm currentsmoking scaled_adj_PRS 
             0              0              0              0

# Remove rows with missing values
kelvin_clean_ISR <- kelvin %>%
  filter(!is.na(years_to_ISR) & !is.na(ISR) & 
         !is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) & 
         !is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
         !is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))


# Fit Cox models for ISR
mod1_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_ISR, x = TRUE)
mod2_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_ISR, x = TRUE)
mod3_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_ISR, x = TRUE)
mod4_ISR <- coxph(Surv(years_to_ISR, ISR) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_ISR, x = TRUE)

# Print concordance for each model
mod1_ISR$concordance['concordance']

concordance 
  0.6431484

mod2_ISR$concordance['concordance']

concordance 
  0.6900397

mod3_ISR$concordance['concordance']

concordance 
  0.6528994

mod4_ISR$concordance['concordance']

concordance 
  0.6947062

# Compute Brier scores for ISR
brier_ISR <- pec(object = list("Model 1" = mod1_ISR, "Model 2" = mod2_ISR, "Model 3" = mod3_ISR, "Model 4" = mod4_ISR),
                 formula = Surv(years_to_ISR, ISR) ~ 1,
                 data = kelvin_clean_ISR, 
                 times = c(1, 5, 10),  # Evaluate at 1, 5, and 10 years
                 exact = FALSE)

No covariates  specified: Kaplan-Meier for censoring times used for weighting.

# Print Brier score summary for ISR
summary(brier_ISR)


Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   3491     0.000   0.000   0.000   0.000   0.000
2    1   3334     0.021   0.021   0.021   0.021   0.021
3    5   2686     0.045   0.044   0.044   0.044   0.044
4   10   1606     0.070   0.070   0.068   0.070   0.068

# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("years_to_revasc", "revascularization", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))

  years_to_revasc revascularization          prsgroup       gendercoded 
               12                12                 0                 0 
      Age_at_Cath               PC1               PC2               PC3 
                0                 0                 0                 0 
              PC4      hypertension         hyperchol              t2dm 
                0                 0                 0                 0 
   currentsmoking    scaled_adj_PRS 
                0                 0

# Remove rows with missing values
kelvin_clean_revasc <- kelvin %>%
  filter(!is.na(years_to_revasc) & !is.na(revascularization) & 
         !is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) & 
         !is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
         !is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for revascularization
# Fit Cox models for revascularization
mod1_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_revasc, x = TRUE)
mod2_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_revasc, x = TRUE)
mod3_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_revasc, x = TRUE)
mod4_revasc <- coxph(Surv(years_to_revasc, revascularization) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_revasc, x = TRUE)

# Print concordance for each model
mod1_revasc$concordance['concordance']

concordance 
  0.5767621

mod2_revasc$concordance['concordance']

concordance 
  0.6335211

mod3_revasc$concordance['concordance']

concordance 
  0.5919207

mod4_revasc$concordance['concordance']

concordance 
  0.6384582

# Compute Brier scores for revascularization
brier_revasc <- pec(object = list("Model 1" = mod1_revasc, "Model 2" = mod2_revasc, "Model 3" = mod3_revasc, "Model 4" = mod4_revasc),
                    formula = Surv(years_to_revasc, revascularization) ~ 1,
                    data = kelvin_clean_revasc, 
                    times = c(1, 5, 10),  # Evaluate at 1, 5, and 10 years
                    exact = FALSE)

No covariates  specified: Kaplan-Meier for censoring times used for weighting.

# Print Brier score summary for revascularization
summary(brier_revasc)


Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   3506     0.000   0.000   0.000   0.000   0.000
2    1   3283     0.043   0.043   0.043   0.043   0.043
3    5   2552     0.099   0.099   0.097   0.098   0.097
4   10   1436     0.151   0.150   0.145   0.149   0.145

# Check for missing values in ISR-related columns
colSums(is.na(kelvin[, c("time_to_death1", "died1", "prsgroup", "gendercoded", "Age_at_Cath", "PC1", "PC2", "PC3", "PC4", "hypertension", "hyperchol", "t2dm", "currentsmoking", "scaled_adj_PRS")]))

time_to_death1          died1       prsgroup    gendercoded    Age_at_Cath 
            13             13              0              0              0 
           PC1            PC2            PC3            PC4   hypertension 
             0              0              0              0              0 
     hyperchol           t2dm currentsmoking scaled_adj_PRS 
             0              0              0              0

# Remove rows with missing values
kelvin_clean_death <- kelvin %>%
  filter(!is.na(time_to_death1) & !is.na(died1) & 
         !is.na(prsgroup) & !is.na(gendercoded) & !is.na(Age_at_Cath) & 
         !is.na(PC1) & !is.na(PC2) & !is.na(PC3) & !is.na(PC4) &
         !is.na(hypertension) & !is.na(hyperchol) & !is.na(t2dm) & !is.na(currentsmoking) & !is.na(scaled_adj_PRS))
# Fit Cox models for revascularization
# Fit Cox models for revascularization

mod1_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4, data = kelvin_clean_death, x = TRUE)
mod2_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking, data = kelvin_clean_death, x = TRUE)
mod3_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + scaled_adj_PRS, data = kelvin_clean_death, x = TRUE)
mod4_death <- coxph(Surv(time_to_death1, died1) ~ prsgroup + gendercoded + Age_at_Cath + PC1 + PC2 + PC3 + PC4 + hypertension + hyperchol + t2dm + currentsmoking + scaled_adj_PRS, data = kelvin_clean_death, x = TRUE)

# Print concordance for each model
mod1_death$concordance['concordance']

concordance 
  0.7085782

mod2_death$concordance['concordance']

concordance 
  0.7254571

mod3_death$concordance['concordance']

concordance 
  0.7087645

mod4_death$concordance['concordance']

concordance 
  0.7258427

# Compute Brier scores for revascularization
brier_death <- pec(object = list("Model 1" = mod1_revasc, "Model 2" = mod2_revasc, "Model 3" = mod3_revasc, "Model 4" = mod4_revasc),
                    formula = Surv(time_to_death1, died1) ~ 1,
                    data = kelvin_clean_death, 
                    times = c(1, 5, 10),  # Evaluate at 1, 5, and 10 years
                    exact = FALSE)

No covariates  specified: Kaplan-Meier for censoring times used for weighting.

# Print Brier score summary for revascularization
summary(brier_death)


Prediction error curves


No data splitting: either apparent or independent test sample performance

 AppErr 
  time n.risk Reference Model 1 Model 2 Model 3 Model 4
1    0   3505     0.000   0.000   0.000   0.000   0.000
2    1   3376     0.018   0.019   0.019   0.019   0.019
3    5   2671     0.081   0.083   0.083   0.083   0.084
4   10   1602     0.156   0.161   0.160   0.162   0.161