##Data processing ##install lumi package source("http://bioconductor.org/biocLite.R") biocLite("lumi") library(lumi) ## specify the file name fileName <- "FinalReport_SampleProbe.txt" ## load the data x.lumi <- lumiR(fileName, convertNuID = FALSE) ## quality control x.lumi <- lumiQ(x.lumi) ## summary of the quality control summary(x.lumi, 'QC') ## Log2 transformation x.lumiT <-lumiT(x.lumi, method = "log2") ## data normalization x.lumiN <- lumiN(x.lumiT, method = "RSN") ## quality control after normalization x.lumiNQ <- lumiQ(x.lumiN) ## summary of the quality control summary(x.lumiNQ, 'QC') ##remove the unexpressed and un-annotated genes presentCount <- detectionCall(x.lumi) dataMatrix <- exprs(x.lumiNQ) dataMatrixF <- dataMatrix[presentCount > 0,] ## Output the data as Tab separated text file write.table(dataMatrixF, file= "processedData.txt") ##Organize columns as samples are in groups as follows: VL patients, asymptomatic individuals, uninfected controls and treated individuals. ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ##Identify di erentially expressed genes ##install lumi package source("http://bioconductor.org/biocLite.R") biocLite("limma") library(limma) probeList <- rownames(dataMatrixF) design <- model.matrix(~ 0+factor(c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,4,4,4,4,4,4,4,4))) colnames(design) <- c("VL", "DTH", "CTRL", "TRT") ## call lmFit on your data fit <- lmFit(data_matrix, design) contrast.matrix <- makeContrasts(VL-DTH, VL-CTRL, TREATED-VL, TREATED-CTRL, TREATED-DTH, DTH-CTRL, levels=design) fit2 <- contrasts.fit(fit, contrast.matrix) fit2 <- eBayes(fit2) ## get gene SYMBOL source("http://bioconductor.org/biocLite.R") biocLite("illuminaHumanv4.db") library(illuminaHumanv4.db) source("http://bioconductor.org/biocLite.R") biocLite("annotate") library(annotate) geneSymbol <- getSYMBOL(probeList, 'illuminaHumanv4.db') fit2$genes <- data.frame(ID= probeList, geneSymbol=geneSymbol) #get differentially expressed genes VL-DTH <- topTable(fit2, coef=1, adjust="BH", number = Inf) VL-CTRL <- topTable(fit2, coef=2, adjust="BH", number = Inf) TREATED-VL <- topTable(fit2, coef=3, adjust="BH", number = Inf) TREATED-CTRL <- topTable(fit2, coef=4, adjust="BH", number = Inf) TREATED-DTH <- topTable(fit2, coef=5, adjust="BH", number = Inf) DTH-CTRL <- topTable(fit2, coef=6, adjust="BH", number = Inf) ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ##WGCNA ##Filter data for the most variant genes from the dataset (in the paper the 3700 most variant genes were used) library(WGCNA) options(stringsAsFactors = FALSE) # Choose a set of soft-thresholding powers powers = c(c(1:10), seq(from = 12, to=20, by=2)) # Call the network topology analysis function sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5) # Plot the results: sizeGrWindow(9, 5) par(mfrow = c(1,2)); cex1 = 0.9; # Scale-free topology fit index as a function of the soft-thresholding power plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red"); # this line corresponds to using an R^2 cut-off of h abline(h=0.80,col="red") # Mean connectivity as a function of the soft-thresholding power plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") #Calculate the adjacencies, softPower = 12 adjacency = adjacency(datExpr, power = softPower) # Turn adjacency into topological overlap TOM = TOMsimilarity(adjacency); dissTOM = 1-TOM # Call the hierarchical clustering function geneTree = hclust(as.dist(dissTOM), method = "average"); # Plot the resulting clustering tree (dendrogram) sizeGrWindow(12,9) plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", labels = FALSE, hang = 0.04); # Set the minimum module size relatively high: minModuleSize = 30; # Module identification using dynamic tree cut: dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize); table(dynamicMods) # Convert numeric lables into colors dynamicColors = labels2colors(dynamicMods) table(dynamicColors) # Plot the dendrogram and colors underneath sizeGrWindow(8,6) plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors") ##Merge modules # Calculate eigengenes MEList = moduleEigengenes(datExpr, colors = dynamicColors) MEs = MEList$eigengenes # Calculate dissimilarity of module eigengenes MEDiss = 1-cor(MEs); # Cluster module eigengenes METree = hclust(as.dist(MEDiss), method = "average"); # Plot the result sizeGrWindow(7, 6) plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "") MEDissThres = 0.25 # Plot the cut line into the dendrogram abline(h=MEDissThres, col = "red") # Call an automatic merging function merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3) # The merged module colors mergedColors = merge$colors; # Eigengenes of the new merged modules: mergedMEs = merge$newMEs; ##Plot new dendrogram with merged modules plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) # Rename to moduleColors moduleColors = mergedColors # Construct numerical labels corresponding to the colors colorOrder = c("grey", standardColors(50)); moduleLabels = match(moduleColors, colorOrder)-1; MEs = mergedMEs; # Save module colors and labels for use in subsequent parts save(MEs, moduleLabels, moduleColors, geneTree, file = "Merged_modules.txt" ---------------------------------------------------------------------------------------------------------------------------------------------------------------- ##Cell deconvolotuion source('http://www.bioconductor.org/biocLite.R') biocLite('CellMix', siteRepos = 'http://web.cbio.uct.ac.za/~renaud/CRAN', type='both') library(CellMix) library(GEOquery) #marker gene list m1 <- MarkerList("HaemAtlas") #Load target data (expression data) target1 = as.matrix(target) #convert IDs m1 <- convertIDs(m1, target1) ##Compute proportion proxies as mean expression cell profile meanProf <- ged(target1, m1, method = "meanProfile") # Proportion proxies are stored in the coefficient matrix coef(meanProf)