Step 4 Make up a fake disease trait

4.1 Choose four causal SNPs

#pick four causal SNPs for the disease, make sure two of them are nearby from one chromosome and the other two are far away from another chromosome

#index of causal SNPs
causal_idx = c(10,11,2000,3000)
#with their row index, check the basic information for the 4 causal SNPs here
#especially, look at the chromosome and position variable to make sure two of them are nearby from one chromosome and the other two are far away from another chromosome

causal_snpnames = colnames(nomonosnps[,causal_idx])
causal_map = hapmap$map %>%
  filter(snp.name %in% causal_snpnames)

print(causal_map)

4.2 Generate disease status

# generate the disease status(have the disease=1/not having the disease=0) based on one snp
# copy the names of the four causal snps into the brackets here

# set the intercept value and effect size, suppose all 4 causal SNPs have the same effect to causing this fake disease
beta0 = -3
effect_size = log(1.5)

#Get the probability of having the disease for all 165 people
nom = exp(beta0 + effect_size*rowSums(X[,causal_snpnames]))
prob = nom/(1+nom)

#Check the probabilities
#prob

#if the probability of having the disease is larger than 0.5, we assign this person's disease status to 1, and vice versa
status = c()
for (i in 1:length(prob)){
  status = cbind(status, as.numeric((!is.na(prob[i])) && prob[i] > 0.5))
}

#Check the status
status
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,]    0    0    0    0    0    0    0    0    1     0     0     1     1
##      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## [1,]     0     1     1     0     0     0     0     1     0     0     1
##      [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35]
## [1,]     0     0     0     0     0     0     0     0     0     0     0
##      [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46]
## [1,]     0     0     0     0     0     1     0     0     0     0     1
##      [,47] [,48] [,49] [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57]
## [1,]     0     0     0     0     0     0     1     1     0     0     0
##      [,58] [,59] [,60] [,61] [,62] [,63] [,64] [,65] [,66] [,67] [,68]
## [1,]     0     0     0     0     0     0     0     0     0     1     0
##      [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77] [,78] [,79]
## [1,]     1     0     0     0     0     0     0     0     0     0     0
##      [,80] [,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90]
## [1,]     0     0     0     1     1     0     0     0     0     0     0
##      [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100] [,101]
## [1,]     0     0     0     0     0     0     0     0     0      0      0
##      [,102] [,103] [,104] [,105] [,106] [,107] [,108] [,109] [,110] [,111]
## [1,]      0      0      0      0      0      1      1      1      0      0
##      [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119] [,120] [,121]
## [1,]      1      0      0      0      0      0      0      0      0      0
##      [,122] [,123] [,124] [,125] [,126] [,127] [,128] [,129] [,130] [,131]
## [1,]      0      0      0      0      0      0      0      0      0      1
##      [,132] [,133] [,134] [,135] [,136] [,137] [,138] [,139] [,140] [,141]
## [1,]      0      0      0      0      0      0      0      0      0      0
##      [,142] [,143] [,144] [,145] [,146] [,147] [,148] [,149] [,150] [,151]
## [1,]      0      0      1      0      1      1      0      0      0      0
##      [,152] [,153] [,154] [,155] [,156] [,157] [,158] [,159] [,160] [,161]
## [1,]      1      0      0      1      1      0      1      0      0      0
##      [,162] [,163] [,164] [,165]
## [1,]      0      0      0      0
#make sure we have a disease status (0/1) for everyone in hapmap
#that means we should have 165 disease status here
length(status)
## [1] 165