## Script to generate results described in:
## SM Bol et al 
## Genome-wide association study identifies single nucleotide polymorphism in DYRK1A 
## associated with replication of HIV-1 in monocyte-derived macrophages
## 
## Copyright (C)  2010 Perry D Moerland 
##
##    This program is free software: you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation, either version 3 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program.  If not, see http://www.gnu.org/licenses/.
## 
## Contact:
## Perry Moerland, PhD
## p.d.moerland@amc.uva.nl
## Room J1B-206, Bioinformatics Laboratory
## Department of Clinical Epidemiology, Biostatistics and Bioinformatics
## Academic Medical Centre, University of Amsterdam 
## PO Box 22700, 1100 DE Amsterdam, the Netherlands 

## read in data for SNPs in Table 1
gp <- read.table("Table S1.txt",sep="\t",header=TRUE)
## pheno: log10(normalized p24)
pheno <- gp[,1]
## geno: genotypes for 16 SNPs from 191 individuals 
geno  <- gp[,2:ncol(gp)]

## linear regression
anova.res <- apply(geno,2,function(x){res  <- anova(lm(pheno~(as.numeric(as.factor(x)))))
 c(res$'Pr(>F)'[1],nlevels(as.factor(x)))})
rownames(anova.res) <- c("p-value","nlevels")
## results of Table 2
t(anova.res)

## function for permutation test
snpPerm <- function(snpID, B=1000){ snp.genotype <- geno[,snpID]
snp.genotype <- as.numeric(as.factor(snp.genotype))-1

 ## observed F-statistic
 res  <- anova(lm(pheno~snp.genotype))
 s0   <- res$F[1]
 
 ## permutations
 s <- vector("numeric",length(B))
 for (i in 1:B){
  if (i%%10^4==0) cat(i,"\n")
  s[i] <- anova(lm(pheno~sample(snp.genotype)))$F[1]
 }
 ## list with three components
 ## s : F-statistics for B permutations
 ## s0: observed F-statistic
 ## p : empirical p-value
 return(list(s=s,s0=s0,p=(sum(s>=s0)+1)/length(s)))
}
## permutation test for rs2304418 with B=100 permutations,
## replace this with B=10^7 to repeat the permutation test described in the paper
res <- snpPerm(snpID="rs2304418",B=100)
## to perform a permutation test for the other SNPs in Table2, change the
## argument snpID

## plot similar to Figure 1
stripchart(pheno~geno$rs12483205,method="jitter",vertical=T,xlab="rs12483205 DYRK1A genotype",ylab="normalized p24 (log10)")

## linear regression for rs12483205 in the replication cohort of 31 donors - p24 ng/ml
## read in data
gp <- read.table("Table S2.txt",sep="\t",header=TRUE)
## pheno: log10(p24)
pheno.raw <- gp[,1]
## geno: genotypes for rs12483205 from 31 individuals 
geno  <- gp[,3:ncol(gp)]
## linear regression
res   <- anova(lm(pheno.raw~geno$rs12483205))
s0.raw <- res$F[1]
c(res$'Pr(>F)'[1])

## pheno: log10(normalized p24)
pheno.norm <- gp[,2]
res   <- anova(lm(pheno.norm~geno$rs12483205))
c(res$'Pr(>F)'[1])

## permutation test for rs12483205 on replication cohort
## warning: might take a few minutes on your PC
library(multtest)
B <- 100000
genotype.perm <- mt.sample.label(geno$rs12483205,test="f",B=B)
s <- array(dim=B)
## s : F-statistics for B permutations
s <- apply(genotype.perm,1,function(x){anova(lm(pheno.raw~x))$F[1]})
## s0.raw: observed F-statistic
nr <-  sum(s>=s0.raw) + 1
empiric.pval <- nr/length(s) 

## linear regression for rs12483205 for 393 individuals
## read in data
gp <- read.table("Table S3.txt",sep="\t",header=TRUE)
## pheno: log10(normalized p24)
pheno.norm <- gp[,1]
## geno: genotypes for rs12483205 from 393 individuals 
geno  <- gp[,2:ncol(gp)]
res   <- anova(lm(pheno.norm~geno$rs12483205))
c(res$'Pr(>F)'[1])

## linear regression for rs12483205 for 421 (393+28) individuals
## read in data
gp <- read.table("Table S4.txt",sep="\t",header=TRUE)
## pheno: log10(normalized p24)
pheno.norm <- gp[,1]
## geno: genotypes for rs12483205 from 421 individuals 
geno  <- gp[,2:ncol(gp)]
res   <- anova(lm(pheno.norm~geno$rs12483205))
c(res$'Pr(>F)'[1])

## read in data for ccr5d32 and rs12483205 for 191 individuals (0=MAJ, 1=HZ, 2=MIN)
gp2 <- read.table("Table S5.txt",sep="\t",header=T)
pheno2 <- gp2[,1]
geno2  <- gp2[,2:3]

## p24 for ccr5 with and without 32 base pair deletion
t.test(n_p24~ccr5d32,data=gp2,var.equal=T)

## contingency table for counts of donors heterozygous for the ccr5d32 genotype in DYRK1A  rs12483205 
alleles <- matrix(c(97,55,6,12,19,2), nr=3, dimnames=list(c("MAJ","Hz","MIN"), c("CCR5 wt/wt", "CCR5 wt/d32")))
alleles
fisher.test(alleles)

## covariate analysis
summary(lm(pheno2~geno2$rs12483205+geno2$ccr5d32))

## covariate analysis rs12483205 and CCR5d32 replication cohort
## read in data
gp <- read.table("Table S2.txt",sep="\t",header=TRUE)
## pheno: log10(p24)
pheno.raw <- gp[,1]
## geno: genotypes for rs12483205 from 31 individuals 
geno  <- gp[,3:ncol(gp)]

## pheno: log10(normalized p24)
pheno.norm <- gp[,2]

## covariate analysis rs12483205 and CCR5d32 - 31 donors - p24 (ng/ml)
summary(lm(pheno.raw~geno$rs12483205+geno$ccr5d32))
## covariate analysis rs12483205 and CCR5d32 - 28 donors - normalized p24
summary(lm(pheno.norm~geno$rs12483205+geno$ccr5d32))

## read in data for ccr5d32 and rs12483205 for 393 individuals (0=MAJ, 1=HZ, 2=MIN)
gp2 <- read.table("Table S3.txt",sep="\t",header=T)
pheno2 <- gp2[,1]
geno2  <- gp2[,2:3]

## covariate analysis
summary(lm(pheno2~geno2$rs12483205+geno2$ccr5d32))

## read in data for ccr5d32 and rs12483205 for 421 (393+28) individuals (0=MAJ, 1=HZ, 2=MIN)
gp2 <- read.table("Table S4.txt",sep="\t",header=T)
pheno2 <- gp2[,1]
geno2  <- gp2[,2:3]

## covariate analysis
summary(lm(pheno2~geno2$rs12483205+geno2$ccr5d32))