Contents 
   
  
 
Rare and Common Event Timing  
In general, individuals with both high common and rare variant burden have ‘earlier’ onset disease, but how can we quantify the proportion of rare/common heritability by time of event. Let’s first simulate some data:
We’ll simulate 10 (MAF 1e-1) rare and 100 (MAF 5e-2) common variants with rare variant effect sizes \N (15,0.5) and \N (0.1,1) respectively. 
Every individuals total genetic load will then be the sum of this rare and common genetic load which we can sum, and normalize N(0,1) 
 
Code 
library (survival) 
library (gridExtra) 
library (data.table) 
library (ggsci) 
library (dplyr) 
library (reshape2) 
library (dplyr) 
library (ggplot2) 
library (ggridges) 
 
set.seed (123 ) 
 
 
list.files (path= "out/" ) 
 
 [1] "APOB.pLOF.snp"                 "FH.1.long.txt.gz"             
 [3] "FH.19.long.txt.gz"             "FH.2.long.txt.gz"             
 [5] "FH.coding.txt"                 "FH.coding.txt.gz"             
 [7] "FH.ldlc.coding.txt.gz"         "FH.pathogenic.snp"            
 [9] "FH.variant.txt"                "LDLR.deleterious.snp"         
[11] "LDLR.pLOF.snp"                 "PCSK9.pLOF.snp"               
[13] "README.md"                     "ukb_exome_450k_fh.carrier.txt"
[15] "ukb_exome_450k_fh.qced.txt"    
 
 
Code 
 fhp= fread ("out/ukb_exome_450k_fh.qced.txt" ) 
 fhc= fread ("out/ukb_exome_450k_fh.carrier.txt" ) 
 m= merge (fhp,fhc,all.x= T,by.x= "x" ,by.y= "IID" ) 
 m$ carrier= ifelse (! is.na (m$ snp),1 ,0 ) 
 prs= readRDS ("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/prs_subset.rds" ) 
 prs= prs[,c ("Identifier" ,"CAD" ,"LDL_SF" ,"HDL" ,"CVD" ,"ISS" )] 
 prs$ CAD= scale (prs$ CAD) 
 prs$ LDL_SF= scale (prs$ LDL_SF) 
 prs$ HDL= scale (prs$ HDL) 
 prs$ ISS= scale (prs$ ISS) 
 m2= merge (prs,m,by.x =  "Identifier" ,by.y= "x" ) 
 phenos= readRDS ("~/Library/CloudStorage/Dropbox-Personal/df_ukb_pheno_updated.rds" ) 
 df= merge (phenos[,c ("Identifier" ,"Cad_Any" ,"Cad_censor_age" )],m2,by= "Identifier" ) 
 df= df[! is.na (df$ Cad_Any),] 
 
 bottom_5 <-  df %>%   
   filter (CAD >=  quantile (CAD, 0.0 ), 
          CAD <  quantile (CAD, 0.2 ), 
          Cad_Any ==  2 ) 
 bottom_5$ group= "Low 5 % PRS"  
 
 middle_75 <-  df %>%   
   filter (CAD >=  quantile (CAD, 0.2 ), 
          CAD <  quantile (CAD, 0.95 ), 
          Cad_Any ==  2 ) 
 middle_75$ group= "Mid 75 % PRS"  
 
 top_5 <-  df %>%   
   filter (CAD >=  quantile (CAD, 0.95 ), 
          Cad_Any ==  2 ) 
 top_5$ group= "Top 5 % PRS"  
 
 fh_carrier <-  df %>%   
   filter (carrier ==  1 , 
          Cad_Any ==  2 ) 
 fh_carrier$ group= "FH"  
 
 fh_carrier_top <-  df %>%   
   filter (carrier ==  1 & CAD >=  quantile (CAD, 0.95 )) 
 fh_carrier_top$ group= "FH&top20% Cad PRS"  
 
 combined= rbind (bottom_5,middle_75,top_5,fh_carrier,fh_carrier_top) 
 
 
Code 
 df <-  combined %>%  
   mutate (group =  factor (group, levels =  c ("FH&top20% Cad PRS" ,"FH" , "Top 5 % PRS" , "Mid 75 % PRS" , "Low 5 % PRS" ))) 
 
# Create geom_ridge plot  
 p <-  ggplot () +  
   geom_density_ridges (data =  df, aes (x =  Cad_censor_age, y =  group, fill =  group), alpha =  0.5 ) +  
   labs (title =  "Distribution of Age of CAD by CAD-PRS and FH Carrier Status" , 
        x =  "Cad_Censor_age" , 
        y =  "CAD" , 
        fill =  "PRS of FH" ) +  
   theme_minimal () +  
   scale_fill_jama () 
 
print (p) 
 
Picking joint bandwidth of 2.22 
 
Warning: Removed 87 rows containing non-finite values
(`stat_density_ridges()`). 
 
 
 
Forest Plots 
Code 
 phenos= readRDS ("~/Library/CloudStorage/Dropbox-Personal/df_ukb_pheno_updated.rds" ) 
 df= merge (phenos[,c ("Identifier" ,"Cad_Any" ,"Cad_censor_age" )],m2,by= "Identifier" ) 
 df= df[! is.na (df$ Cad_Any),] 
 
 bottom_5 <-  df %>%   
   filter (CAD >=  quantile (CAD, 0.0 ), 
          CAD <  quantile (CAD, 0.2 )) 
 bottom_5$ group= "Low 5% PRS"  
 
 middle_75 <-  df %>%   
   filter (CAD >=  quantile (CAD, 0.2 ), 
          CAD <  quantile (CAD, 0.95 )) 
 middle_75$ group= "Mid 75% PRS"  
 
 top_5 <-  df %>%   
   filter (CAD >=  quantile (CAD, 0.95 )) 
 top_5$ group= "Top 5% PRS"  
 
 fh_carrier <-  df %>%   
   filter (carrier ==  1 ) 
 fh_carrier$ group= "FH"  
 
 fh_carrier_top <-  df %>%   
   filter (carrier ==  1 & CAD >=  quantile (CAD, 0.95 )) 
 fh_carrier_top$ group= "FH&top20% Cad PRS"  
 
 combined= rbind (bottom_5,middle_75,top_5,fh_carrier,fh_carrier_top) 
 
 
 
library (dplyr) 
library (ggplot2) 
 
# Summarize age of event for each group  
 summary_age <-  combined %>%  
   filter (Cad_Any ==  2 ) %>%  
   group_by (group) %>%  
   summarize (mean_age =  mean (Cad_censor_age, na.rm =  TRUE )) 
 
# Summarize mean probability of event being 2 for each group  
 summary_prob <-  combined %>%  
   group_by (group) %>%  
   summarize (mean_prob =  mean (Cad_Any ==  2 , na.rm =  TRUE )) 
 
# Combine summary statistics into one dataframe  
 summary_df <-  merge (summary_age, summary_prob, by =  "group" ) 
 
 summary_df <-  summary_df %>%  
   mutate (group =  factor (group, levels =  c ("FH&top20% Cad PRS" ,"FH" , "Top 5% PRS" , "Mid 75% PRS" , "Low 5% PRS" ))) 
 
 m= melt (summary_df,id.vars= "group" ) 
# Create forest plot  
 p_forest <-  ggplot (summary_df, aes (x =  mean_age, y =  order (group,mean_age),col= group)) +  
   geom_point (aes (size =  mean_prob)) +  
   geom_errorbarh (aes (xmin =  mean_age -  1.96  *  sd (mean_age), xmax =  mean_age +  1.96  *  sd (mean_age))) +  
   scale_size_continuous (name =  "Mean Probability of CAD" , 
                         labels =  scales:: percent_format (accuracy =  0.1 )) + scale_color_jama ()+  
   labs (title =  "Forest Plot of Age of Event and Mean Probability by Group" , 
        x =  "Mean Age of Event" , 
        y =  "Genetic Grouping" ,color= "Genetic Status" ) +  
   theme_minimal () 
 
print (p_forest) 
 
 
 
Furthermore, we can see that rare event (FH) carriers are almost doubly enriched (overrepresented) in high overall score: