TS_AdmixMapping=function(data.geno,data.ind,data.gamma,markerfile,alpha,qqq,y){ # # data.geno: contains genotype data for each individual at each SNP; same as the format for software of ancestrymap #SNP PID Genotype #SNP1 00-0003P 1 #SNP1 00-0004P 1 #SNP1 00-0008P 2 #SNP1 00-0013P 1 #SNP1 00-0018 1 #SNP1 00-0019 2 #SNP1 00-0029 2 #SNP1 00-0054P 2 #SNP1 00-0056 1 #SNP1 00-0060P 2 #SNP1 00-0061 1 # markerfile: contains information about each SNP; same as the format for software of ancestrymap #SNPID Chr GeneticPosition PhysicalPosition #SNP1 2 214.679793 215714130 #SNP2 9 130.438862 104650796 #SNP3 2 169.372873 158503007 #SNP4 5 157.168463 142106940 # data.indi: contains information about each individual; same as the format for software of ancestrymap #PID Gender Status #00-0003P M Control #00-0004P M Control #00-0008P M Case #00-0013P M Control #00-0018 M Case #00-0019 M Case #00-0029 M Case #00-0054P M Control # data.gamma: contains ancestry information outputed from software ancestrymap. see below # SNP PID Genotype EstimatedAncestry # SNP1 00-0003P 1 0.279 # SNP1 00-0004P 1 0.513 # SNP1 00-0008P 2 0.020 # SNP1 00-0013P 1 0.885 # SNP1 00-0018 1 0.174 # SNP1 00-0019 2 0.022 # SNP1 00-0029 2 0.002 # SNP1 00-0054P 2 0.010 # SNP1 00-0056 1 0.640 # SNP1 00-0060P 2 0.020 # qqq is the number of snps selected in the first stage, for example 10, or 50 # alpha is the total significance level, for example 0.05; # y is the phenotype with only one column such as c(0.1,0.2,1,0.7) # # ########################################################## n.samp=dim(data.ind)[1]; #n.cases+n.controls data.geno=data.geno[,3]; data.ind=data.ind[,3]; dex=length(data.ind); snpindex=markerfile[,1] if (dex>n.samp){data.ind=data.ind[2:(n.samp+1)]} data.gamma=data.gamma[,4]; ######################################################## find=function(A,B){#A>B n.A=dim(A)[1];n.B=dim(B)[1] temp=c(); for (i in 1:n.B){ temp=c(temp,c(1:n.A)[A==B[i]])} temp} ######################################################## n.snp=dim(markerfile)[1]; #the total number of snps considered ########################################################## p.twostage=c(); ####################### ########################################################################## ##### Evaluate the estimated genotype by regression ################### ##### ##### p.stage1=matrix(0,n.snp,1); ghat=c(); for (i in 1:n.snp){ #print(i)# i=70 A=data.gamma[((i-1)*n.samp+1):(i*n.samp)];#evaluate the ancestry for one marker G=data.geno[((i-1)*n.samp+1):(i*n.samp)]; mod <- lm(G ~ A); g.hat=mod$fitted.values; ghat=rbind(ghat,t(t(g.hat))); ##################################################### #### step 2 in stage 1 #### step2=glm(y~g.hat,family=gaussian(link = "identity")); dd=summary(step2); cc=dd$coefficients; p.stage1[i,1]=cc[8]; } ############## ##############choose markers for second stage ############# index2=order(p.stage1); n.stage2=length(index2); p.stage2=c(); for (i in 1:qqq){#print(i) ii=index2[i]; G=data.geno[((ii-1)*n.samp+1):(ii*n.samp)]; ghat.indi=ghat[((ii-1)*n.samp+1):(ii*n.samp)]; ##################################################### #### regression in stage 2 #### step2=glm(y~G+ghat.indi,family=gaussian(link = "identity")); dd=summary(step2); cc=dd$coefficients; pstage2=cc[11]; p.stage2=cbind(p.stage2,pstage2); } p.stage2[is.na(p.stage2)]=1 out=c(); sign=index2[which(p.stage2=1){ out=cbind(markerfile[sign,],p.stage1[sign],find(t(t(index2)),t(t(sign))),p.stage2[which(p.stage2