# read filtered SNP table, change SNPTABLE accordingly

g<-read.table(SNPTABLE, header=T)
colnames(g)<-c("chr", "pos", "ref", "alt", "qual", "setA", "setB")
g$snp.id =  paste(g$chr, sep=":", g$pos)

# read file with chromosome lengths, change LENGTHFILE accordingly
#
# expected format:
# chr. name : column 1
# length in bp : column 2

len<-read.table(LENGTHFILE)
colnames(len)<-c("chr", "length")

# select chromosome 1 to 10
subset(len, !chr %in% c("UNKNOWN", "Mt", "Pt"))->len

# calculate chromosome offsets
len$chr <- as.numeric(len$chr)
len[order(len$chr),]->len
c(0,cumsum(len$length)[1:9])->len$offset

#select SNPs where set B is homozygous for the non-reference allele
diff_setB<-subset(g, setB == 2)

#aggregate SNP counts in 100 kb BINs
merge(aggregate(data=subset(data.frame(diff_setB, bin=diff_setB$pos %/% 1e5 * 1e5), chr != "UNKNOWN"), alt ~ chr + bin, length), len[c("chr", "offset")])->agg
agg$ppos<-agg$offset + agg$bin
agg[order(agg$ppos),]->agg

# plot counts per bin along the genome
# change FILENAME accordingly

offset=c(0,cumsum(len$length))
png(file=FILENAME, width=1000, height=300)
par(mar=c(5,5,2,2))
par(cex=1.3)
plot(type='n', 0, ylim=c(0,300), xlim=c(0,sum(len$length)), 
     xlab="physical position in maize AGPv2", 
     ylab="number of SNPs per 100 kb bin", las=1, xaxt='n')
abline(h=seq(0,300,50), lwd=2, col="gray")
abline(v=offset, lwd=2, col="blue")
points(agg$ppos, agg$alt, pch=20)
axis(1, offset[1:10] + c(offset[2:11] - offset[1:10])/2, paste("chr", sep="", 1:10), tick=F)
dev.off()