#set working directory setwd("/Users/lab/Documents/sequencingbulkdata/Plos_Genetics_2015/snps") #read in a table containing genotype information data <- read.table("3S_backcross_snps.txt", header = F, sep = "\t", as.is = T) names(data) <- c("c", "p", "ref", "refct", "alt", "altct") #calculate coverage and allele frequencies data$cov <- data$refct + data$altct data$fr <- (data$refct/data$cov) #Remove non-segregating regions & apply a mild filter to data split.data <- split(data, data$c) split.data <- lapply(split.data, function(x) { x$use <- (as.vector(filter(x$fr > 0.2,rep(1/50,50))) >= 0.9) x$fr[x$use == F] <- 0 x$filter <- as.vector(filter(x$fr,rep(1/10,10))) na.omit(x) }) data <- do.call("rbind", split.data.2) #print out snps present at >95% frequency on chromosome 7 data$p[data$c == 7 & d$filter >= .95] #print out snps present at >95% frequency on chromosome 15 data$p[data$c == 15 & d$filter >= .95]