############################################################################################ # R Program v0.1 to calculate family history scores for probands from family data # # # # # # Using the methods by Feng et al. (2008), Williams et al. (2001), and Reed et al. (1986) # # *Feng R., McClure L., Tiwari H.K., Howard G. (2008). Statistics in medicine. In review. # # *Reed T, Wagener DK, Donahue RP and Kuller LH (1986). Genet Epidemiol 3(2): 63-71. # # *Williams RR, Hunt SC, Heiss G, Province MA, Bensen JT, Higgins M, Chamberlain RM, # # Ware J and Hopkins PN (2001). American Journal of Cardiology 87(2): 129-135. # # # # This is trial version - use at your own risk. # # For questions and bugs, email Rui Feng at rfeng@ms.soph.uab.edu # # # ############################################################################################ # This program use a R package "kinship" by Beth Atkinson and Terry Therneau # Make sure the package has been installed - other wise you can install it using the following command # install.packages("kinship", dependencies = TRUE) library(kinship) #data0<-read.table("C:/yourloculfolder/samplefamily.txt", header=TRUE, sep="\t" ) data0<-read.table("C:/Documents and Settings/rfeng/My Documents/2006/familyALS/samplefamily.txt", header=TRUE, sep="\t" ) # read in tab-deliminated data stored in local directory # the dataset contains at least 7 columns: famid, individ, fatherid, motherid, sex, event, ageonset # event is the event indicator: 1=event, 0=censor # futime is the time when event or censoring occurs names(data0) # check column names - make sure they are correctly spelled uniqueID<-unique(data0$famid) # An array of unique family IDs totalfam<-length(uniqueID) # Total number of families n<-dim(data0)[1] # Total number of subjects id<-1:n data0$id<-id selectid<-rep(NA, totalfam) pedigreesize<-rep(0,totalfam) maxindividual<-0 for(i in 1:totalfam){ tmpid<-id[data0$famid==uniqueID[i]] currentf<-data0[tmpid,] minid<-min(currentf$individ) secondid<-min(currentf$individ[currentf$individ!=minid]) selectid[i]<-tmpid[currentf$individ==minid] if(is.na(currentf$event[currentf$individ==minid]) || is.na(currentf$ageonset[currentf$individ==minid])) { selectid[i]<-tmpid[currentf$individ==secondid] } pedigreesize[i]<-length(tmpid) if(maxindividual1) { tmpw<-kin[probid,1:tmpsize] weight[i,1:(tmpsize-1)]<-tmpw[-probid] } } trailset<-data0[-selectid,] n<-dim(trailset)[1] maxindividual<-maxindividual-1 familyID<-trailset$famid event<-trailset$event # 1-event occurred; 0-censored, dead, end of the study or drop off before an event occurs futime<-trailset$ageonset # time of onset or time of censor sex<-trailset$sex id<-1:n ################################### # Step 2: calculate risk score # ################################### risk<-rep(NA,n) #please revise nagegroup and cutoff.age if you want different age groups #or you can define other groups such as birth cohort groups if other information is available nagegroup<-3 # number of age groups cutoff.age<-quantile(futime, probs=c(0.3,0.7), na.rm=TRUE) # the cutt off value for different age groups ngroup<-nagegroup*2 for(groupi in 1:ngroup-1){ groupid<-id[sex==floor(groupi/nagegroup)] groupid<-groupid[!is.na(groupid)] ageindex<-0 for(j in 1:(nagegroup-1)){ ageindex<-ageindex+(futime>cutoff.age[j]) } tmpind<-groupi%%nagegroup agegroup<-id[ageindex==tmpind] agegroup<-agegroup[!is.na(agegroup)] groupid<-agegroup[agegroup %in%groupid] subcensor<-event[groupid] subtime<-futime[groupid] period<-sort(unique(subtime[subcensor==1])) # define time period to be unique event time point nperiod<-length(period) n0<-rep(0, nperiod) # nj0 is the number of right censored observations n1<-rep(0, nperiod) # nj1 is the number of observed events for(i in 1:nperiod) { n0[i]<-sum(subtime[subcensor==0]period)+1 if(subcensor[i]==0) risk[groupid[i]]<-a0[j] else risk[groupid[i]]<-a1[j] } } # sign risk score ### } # done for all groups ### peddata<-data.frame(famid=familyID, id=id,sex=sex, time=futime, censor=event, risk=risk) est.risk1<-rep(0,totalfam) for(i in 1:totalfam) { tmpsize<-pedigreesize[i]-1 tmpw<-weight[i,1:tmpsize] tmpr<-risk[familyID==uniqueID[i]] if(sum(!is.na(tmpr))>0) { for(j in 1:length(tmpr)){ if(!is.na(tmpr[j])) est.risk1[i]<-est.risk1[i]+tmpw[j]*tmpr[j] #weight } denom<-sum(tmpw[!is.na(tmpr)]) #weight if(denom>0) est.risk1[i]<-est.risk1[i]/denom } else est.risk1[i]<-NA } ########################################## ### comparison 1: Reed et al. (1986) #### ########################################## ageinterval<-5 nonatime<-peddata$time[!is.na(peddata$time)] nonasex<-peddata$sex[!is.na(peddata$time)] nonacensor<-peddata$censor[!is.na(peddata$time)] agegroup<-ceiling((max(nonatime)-min(nonatime)+1)/ageinterval) ratesexage<-matrix(0, agegroup, 2) n<-length(nonatime) for(sexi in 0:1) { for(agei in 1:agegroup){ lage<-min(nonatime)+(agei-1)*ageinterval hage<-min(nonatime)+agei*ageinterval cevent<-0 cpersonyear<-0 for(i in 1:n){ if(!is.na(nonasex[i]) && !is.na(nonacensor[i]) ){ if(nonatime[i]>=lage && nonasex[i]==sexi){ if(nonatime[i]0) ratesexage[agei,sexi+1]<-cevent/cpersonyear ### Age-specific rates ### Annual and average annual age-specific rates are the rates per 100,000 population for 5-year age groups. ### For annual rates, the age-specific rate is the number of new cases or deaths in given age group and year ### divided by the population in that age group. ### For average annual rates, the age-specific rate is the number of new cases or deaths for the 3- or 5-year period ### divided by the number of person-years over the 3- or 5-year period. } } print(ratesexage) nonafamid<-peddata$famid[!is.na(peddata$time)] nonaid<-peddata$id[!is.na(peddata$time)] famid<-uniqueID id<-1:length(nonafamid) est.risk2<-rep(NA,totalfam) for(i in 1:totalfam) { tmpc<-nonacensor[nonafamid==famid[i]] O<-sum(tmpc[!is.na(tmpc)]) E<-0 for(j in id[nonafamid==famid[i]]){ if(!is.na(nonasex[j])) { tmp<-ratesexage[ceiling((nonatime[j]-min(nonatime)+1)/ageinterval), nonasex[j]+1] E<-E+tmp } } if(E>0) est.risk2[i]<-(E-O)/sqrt(E) } ############################################## ### comparison 2: Williams et al (1984) #### ############################################## est.risk3<-rep(NA,totalfam) for(i in 1:totalfam) { tmpc<-nonacensor[nonafamid==famid[i]] O<-sum(tmpc[!is.na(tmpc)]) E<-0 numAff<-0 for(j in id[nonafamid==famid[i]]){ if(!is.na(nonasex[j])) { tmp<-ratesexage[ceiling((nonatime[j]-min(nonatime)+1)/ageinterval), nonasex[j]+1] E<-E+tmp if(!is.na(nonacensor[j])) numAff<-numAff+nonacensor[j] } } tmp<-abs(O-E) if(numAff==1) est.risk3[i]<-0.99 else { if(E>0&&tmp>0.5) est.risk3[i]<-(tmp-0.5)*tmp/sqrt(E)/(O-E) else est.risk3[i]<-0 } } ##### Save and Post-comparison ###### resultable<-data.frame(famid, est.risk1,est.risk2,est.risk3) write.table(resultable, file="C:/yourloculfolder/resultSLFS.txt", row.name = FALSE, quote=FALSE, sep = "\t")