Path <- "Q:/Mat og vann/2011_pH E coli 2006 Case and food distribution/R-programme/august2014Anja/Supplementary/" # needs to be changed and spefied epsilon <- 0.00001 maxExposureTime <- 14 withoutCase5 <- FALSE #change to TRUE if you want to run without case 5 withoutConsumptionExposureIntervals <- FALSE #change to TRUE if you want to run without time C_Data <- read.table(paste(Path, "cases.txt", sep = ""), header=TRUE ,sep="\t") if(withoutCase5){ C_Data <- C_Data[-which(C_Data$Case == 5),] } NumCases <- length(unique(C_Data$Case)) #Total number of unique cases in dataset C_DataTime <- C_Data[,c("Case", "firstdate")] C_DataTime <- unique(C_DataTime) names1 <- list.files(paste(Path, "Prod/", sep = ""), pattern = ".txt", all.files = FALSE, full.names = FALSE, ignore.case = TRUE) # vector of file names NumNames <- length(names1) consumedCases <- matrix(0, nrow = NumNames, ncol = NumCases) rownames(consumedCases) <- names1 colnames(consumedCases) <- paste("Case", 1:NumCases, sep = "") sold <- matrix(0, nrow = NumNames, ncol = NumCases) rownames(sold) <- names1 colnames(sold) <- paste("Case", 1:NumCases, sep = "") for(n in 1:NumNames){ P_Data <- read.table(paste(Path, "Prod/", names1[n], sep=""), header=TRUE, sep="\t") nProd <- nrow(P_Data) InSold <- rep(FALSE, nProd) for(ca in C_DataTime$Case){ C_DataCase <- C_Data[C_Data$Case == ca,] municipalities <- as.character(C_DataCase$Municipality) w <- C_DataCase$Index exposureInterval <- (C_DataTime[which(C_DataTime$Case == ca),2]-maxExposureTime):C_DataTime[which(C_DataTime$Case == ca),2] deliveredInPeriod <- rep(NA, nProd) deliveredAlready <- rep(NA, nProd) maxDate <- max(C_DataTime[which(C_DataTime$Case < (ca+1)),2]) for(i in 1:nProd){ prodDate <- P_Data[i,"Date_delivered"]:P_Data[i,"Date_expired"] deliveredInPeriod[i] <- any(exposureInterval %in% prodDate) deliveredAlready[i] <- (P_Data[i,"Date_delivered"] < maxDate + 1) } if(withoutConsumptionExposureIntervals ){ P_DatadeliveredAlready <- P_Data P_DatadeliveredInPeriod <- P_Data } else { P_DatadeliveredAlready <- P_Data[deliveredAlready,] P_DatadeliveredInPeriod <- P_Data[deliveredInPeriod,] } MunicipalitySoldInPeriod <- as.character(P_DatadeliveredInPeriod[, "Municipality"]) NumberSoldBefore <- sum(P_DatadeliveredAlready[, "numPackages"]) sold[n,which(C_DataTime$Case == ca)] <- NumberSoldBefore if(length(which(municipalities %in% MunicipalitySoldInPeriod)) > 0){ soldToCase <- as.character(municipalities[which(municipalities %in% MunicipalitySoldInPeriod)]) wCase <- w[which(municipalities %in% MunicipalitySoldInPeriod)] NumberSoldToCase <- 0 for(j in 1:length(wCase)){ NumberSoldToCase <- NumberSoldToCase + wCase[j] * sum(P_DatadeliveredInPeriod[P_DatadeliveredInPeriod$Municipality == soldToCase[j], "numPackages"]) } consumedCases[n, which(C_DataTime$Case == ca)] <- NumberSoldToCase } } } likelihoodPerProd <- matrix(0, nrow = NumNames, ncol = NumCases) rownames(likelihoodPerProd) <- names1 colnames(likelihoodPerProd) <- paste("Case", 1:NumCases, sep = "") freqPerProd <- consumedCases[,1]/sold[,1] freqPerProd[is.na(freqPerProd)] <- 0 likelihoodPerProd[,1] <- freqPerProd for(ca in 2:ncol(likelihoodPerProd)){ freqPerProd <- consumedCases[,1:ca]/sold[,ca] freqPerProd[is.na(freqPerProd)] <- 0 freqPerProd <- freqPerProd + epsilon likelihoodPerProd[,ca] <- apply(freqPerProd, 1, prod) } maxlikeperCase <- apply(likelihoodPerProd,2,max) likelihoodRatioPerProd <- likelihoodPerProd/matrix(rep(maxlikeperCase, nrow(likelihoodPerProd)), ncol = ncol(likelihoodPerProd), byrow = T) if(withoutCase5){ likelihoodRatioPerProd2 <- cbind(likelihoodRatioPerProd[,1:4], rep(NA, nrow(likelihoodRatioPerProd)), likelihoodRatioPerProd[,5:15]) likelihoodRatioPerProd <- likelihoodRatioPerProd2 } frequency <- matrix(0, nrow = NumNames, ncol = NumCases) rownames(frequency ) <- names1 colnames(frequency ) <- paste("Case", 1:NumCases, sep = "") frequency [,1] <- frequency [,1]/sold[,1] for(ca in 2:ncol(frequency )){ frequency [,ca] <- consumedCases[,ca]/sold[,ca] } frequency [is.na(frequency )] <- 0 summary(as.numeric(frequency )) table(as.numeric(frequency ) > 0) summary(as.numeric(frequency [frequency > 0])) quantile( as.numeric(frequency [frequency > 0]), 0.01) length(as.numeric(frequency )) min(frequency[frequency > 0]) maintekst <- "Likelihood ratio" if(withoutConsumptionExposureIntervals ){ maintekst <- paste(maintekst, "without time") } if(withoutCase5){ maintekst <- paste(maintekst, "without case 5") } if(withoutCase5){ likelihoodRatioPerProd2 <- cbind(likelihoodRatioPerProd[,1:4], rep(NA, nrow(likelihoodRatioPerProd)), likelihoodRatioPerProd[,5:15]) likelihoodRatioPerProd <- likelihoodRatioPerProd2 } boxplot(likelihoodRatioPerProd, xlab = "Cases included", ylab = "Likelihood ratio", main = maintekst, names = c("Case \n 1", "Case \n 1-2", "Case \n 1-3", "Case \n 1-4", "Case \n 1-5", "Case \n 1-6","Case \n 1-7", "Case \n 1-8","Case \n 1-9", "Case \n 1-10","Case \n 1-11", "Case \n 1-12","Case \n 1-13", "Case \n 1-14","Case \n 1-15","Case \n 1-16")) Product <- which(names1 == "prod2489.txt") Batch <- which(names1 == "prod2493.txt") Lot <- which(names1 == "prod2567.txt") points(1:ncol(likelihoodRatioPerProd), likelihoodRatioPerProd[Product,], col = "red3", cex = 2, pch = 15) points(1:ncol(likelihoodRatioPerProd), likelihoodRatioPerProd[Lot,], col = "red3", cex = 2, pch = 17) points(1:ncol(likelihoodRatioPerProd), likelihoodRatioPerProd[Batch,], col = "red3", cex = 2, pch = 19) whichBatch <- likelihoodRatioPerProd[Batch,] rankLikelihoodBatch <- rep(NA, ncol(likelihoodRatioPerProd)) whichLot <- likelihoodRatioPerProd[Lot,] rankLikelihoodLot <- rep(NA, ncol(likelihoodRatioPerProd)) whichProd <- likelihoodRatioPerProd[Product,] rankLikelihoodProd <- rep(NA, ncol(likelihoodRatioPerProd)) for(i in 1:ncol(likelihoodRatioPerProd)){ y <- sort(likelihoodRatioPerProd[,i], decreasing = T) rankLikelihoodBatch[i] <- min(which(y == as.numeric(whichBatch[i]))) rankLikelihoodLot[i] <- min(which(y == as.numeric(whichLot[i]))) rankLikelihoodProd[i] <- min(which(y == as.numeric(whichProd[i]))) } text((1:ncol(likelihoodRatioPerProd))[rankLikelihoodBatch < 30], whichBatch[rankLikelihoodBatch < 30], rankLikelihoodBatch[rankLikelihoodBatch < 30], 2, col = "red3") text((1:ncol(likelihoodRatioPerProd))[rankLikelihoodLot < 30], whichLot[rankLikelihoodLot < 30], rankLikelihoodLot[rankLikelihoodLot < 30], 2, col = "red3") text((1:ncol(likelihoodRatioPerProd))[rankLikelihoodProd < 30], whichProd[rankLikelihoodProd < 30], rankLikelihoodProd[rankLikelihoodProd < 30], 2, col = "red3") best <- rep(NA, 15) for(i in 1:15){ best[i] <- as.numeric(which(likelihoodRatioPerProd[,i] == 1)) [1] } names1[unique(best)] names1[best] worstProduct <- apply(rbind(rankLikelihoodBatch, rankLikelihoodLot, rankLikelihoodProd),2,max) notcontrols <- which(substr(names1,5,8) %in% 2487:2626) # 2006 data are identified as prod2488 to prod2626 ; prod2489= product X, prod2493=batch A and prod2493=lot A3C1 All <- 1:2626 controls <- All[-notcontrols] #sjekk for(i in 1:16){ nri <- as.numeric(sort(likelihoodRatioPerProd[,i],decreasing = T)[worstProduct[i]]) better <- as.numeric(which(likelihoodRatioPerProd[,i] > nri)) namesBetter <- names1[better] controlsBetter <- namesBetter[namesBetter %in% names1[controls]] controlsBetterj <- better[namesBetter %in% names1[controls]] y <- sort(likelihoodRatioPerProd[,i], decreasing = T) for(j in controlsBetterj){ whichC <- likelihoodRatioPerProd[j,i] points(i, whichC, col = "Cyan3", cex = 1, pch = 19) } } bestProduct <- apply(rbind(rankLikelihoodBatch, rankLikelihoodLot, rankLikelihoodProd),2,min) bestProduct worstProduct