3.1 Pick 2000 SNPs from 2 chromosomes as our small dataset
# How many SNPs from each chromosome you want to include in your datasetSNPS =2000
# Get all snps you want to include to include in your datasetstartrow =1rowidx =c()newmap =c()for (i inc(1:2)){ newmap =rbind(newmap, hapmap$map[(startrow:(startrow+(SNPS-1))),]) rowidx =cbind(rowidx, startrow:(startrow+SNPS-1)) startrow = startrow +nrow(hapmap$map %>%filter(chromosome==i))}
# Update hapmap to only include 2*2000 snpshapmap <-read.plink(bed, bim, fam, select.snps = rowidx)hapmap
# confirm that we have 4000 SNPs and 165 peoplehapmap$genotypes
## A SnpMatrix with 165 rows and 4000 columns
## Row names: NA06989 ... NA12865
## Col names: rs2185539 ... rs2694088
3.2 Remove the monomorphic SNPs in this subset
#get the index of SNPs whose MAF is zero to exclude monomorphic SNPsmono <-which(col.summary(hapmap$genotype)$MAF ==0)#nomonosnps contain snps that are not monomorphicnomonosnps = hapmap$genotypes[,-mono]#nomonosnps is a SnpMatrix, let's convert it to a matrix for later useX <-as(nomonosnps, "numeric")#check how many non-monomorphic SNPs do we have, and starting from here, we will use these non-monomorphic SNPs onlyncol(X)