# read filtered SNP table, change SNPTABLE accordingly g<-read.table(SNPTABLE, header=T) colnames(g)<-c("chr", "pos", "ref", "alt", "qual", "setA", "setB") g$snp.id = paste(g$chr, sep=":", g$pos) # read file with chromosome lengths, change LENGTHFILE accordingly # # expected format: # chr. name : column 1 # length in bp : column 2 len<-read.table(LENGTHFILE) colnames(len)<-c("chr", "length") # select chromosome 1 to 10 subset(len, !chr %in% c("UNKNOWN", "Mt", "Pt"))->len # calculate chromosome offsets len$chr <- as.numeric(len$chr) len[order(len$chr),]->len c(0,cumsum(len$length)[1:9])->len$offset #select SNPs where set B is homozygous for the non-reference allele diff_setB<-subset(g, setB == 2) #aggregate SNP counts in 100 kb BINs merge(aggregate(data=subset(data.frame(diff_setB, bin=diff_setB$pos %/% 1e5 * 1e5), chr != "UNKNOWN"), alt ~ chr + bin, length), len[c("chr", "offset")])->agg agg$ppos<-agg$offset + agg$bin agg[order(agg$ppos),]->agg # plot counts per bin along the genome # change FILENAME accordingly offset=c(0,cumsum(len$length)) png(file=FILENAME, width=1000, height=300) par(mar=c(5,5,2,2)) par(cex=1.3) plot(type='n', 0, ylim=c(0,300), xlim=c(0,sum(len$length)), xlab="physical position in maize AGPv2", ylab="number of SNPs per 100 kb bin", las=1, xaxt='n') abline(h=seq(0,300,50), lwd=2, col="gray") abline(v=offset, lwd=2, col="blue") points(agg$ppos, agg$alt, pch=20) axis(1, offset[1:10] + c(offset[2:11] - offset[1:10])/2, paste("chr", sep="", 1:10), tick=F) dev.off()