#pick four causal SNPs for the disease, make sure two of them are nearby from one chromosome and the other two are far away from another chromosome#index of causal SNPscausal_idx =c(10,11,2000,3000)
#with their row index, check the basic information for the 4 causal SNPs here#especially, look at the chromosome and position variable to make sure two of them are nearby from one chromosome and the other two are far away from another chromosomecausal_snpnames =colnames(nomonosnps[,causal_idx])causal_map = hapmap$map %>%filter(snp.name %in% causal_snpnames)print(causal_map)
4.2 Generate disease status
# generate the disease status(have the disease=1/not having the disease=0) based on one snp# copy the names of the four causal snps into the brackets here# set the intercept value and effect size, suppose all 4 causal SNPs have the same effect to causing this fake diseasebeta0 =-3effect_size =log(1.5)#Get the probability of having the disease for all 165 peoplenom =exp(beta0 + effect_size*rowSums(X[,causal_snpnames]))prob = nom/(1+nom)#Check the probabilities#prob#if the probability of having the disease is larger than 0.5, we assign this person's disease status to 1, and vice versastatus =c()for (i in1:length(prob)){ status =cbind(status, as.numeric((!is.na(prob[i])) && prob[i] >0.5))}#Check the statusstatus