# #______________________________ # Package: ARGOS.Filter.Program # # 1) CleanUp.Table() # 2) Redundancy.Deletion() # 3) Move.Parameter() # 4) Speed.Filter() # 5) Position.plot() # 6) Barycentre() # 7) Euclidean.dist() # # Script created by Tarroux, A. and Casajus, N. # #=============================================================================== #__________________________ # Filter no 1 (CleanUp) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # -> Deletion of recordings without localisations () # -> Formatting of the date # -> Projection of the geographic coordinates # -> Deletion of duplicates (same localisation) # -> Deletion of geographic abberrations #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # Arguments : # # File : Data file name, without extension # Format : Extension of the data file # Projection, Datum, Zone : informations for the geographic projection of coordinates # X.range : # Longitude limits (degrees) # Y.range : Latitude limits (degrees) CleanUp.Table <- function(File, Format, Projection, Datum, Zone, X.range, Y.range) { Package <- dir(.libPaths()) pos <- which(Package == "rdgal") if (length(pos) == 0) { stop("Please install the package 'rdgal'", "\n") } library(rdgal) options(warn = -1) # File opening if (Format == "txt") { Data <- read.delim(file = paste(getwd(), "/", File, ".txt", sep = ""), header = T, sep = " ", dec = ".") } if (Format == "xls") { pos <- which(Package == "RODBC") if (length(pos) == 0) { stop("Please install the package 'RODBC'", "\n") } channel <- odbcConnectExcel(paste(getwd(), "/", File, ".xls", sep = "")) Data <- sqlFetch(channel, sqlTables(channel)[1, 3]) } if (Format == "xlsx") { pos <- which(Package == "RODBC") if (length(pos) == 0) { stop("Please install the package 'RODBC'", "\n") } channel <- odbcConnectExcel(paste(getwd(), "/", File, ".xlsx", sep = "")) Data <- sqlFetch(channel, sqlTables(channel)[1, 3]) } # Deletion of repeated title lines pos <- which(Data$"Platform" == "Platform") if (length(pos) > 0) { Data <- Data[-(which(Data$"Platform" == "Platform"))] } str(Data) # Extraction of essential fields if (Format == "txt") { Table <- Data[, c("Platform.ID.", "Prg.No.", "Latitude", "Longitude", "Loc..quality", "Loc..date", "Sat.", "Msg", "SENSOR..01", "Semi.major.axis", "GDOP")] } if (Format == "xls" || Format == "xlsx") { Table <- Data[, c("Platform ID ", "Prg No#", "Latitude", "Longitude", "Loc# quality", "Loc# date", "Sat#", "Msg", "SENSOR #01", "Semi-major axis", "GDOP")] } colnames(Table) <- c("Platform_ID", "Prg_No", "Latitude", "Longitude", "Loc_quality", "Loc_date", "Sat", "Msg", "SENSOR_01", "Semi_major_axis", "GDOP") # Convert as numeric for (i in c(3, 4, 8, 9, 10, 11)) { Table[, i] <- as.numeric(as.character(Table[, i])) } # Deletion of recording without localisations pos <- which(is.na(Table[, "Longitude"])) if (length(pos) > 0) { Table <- Table[-pos, ] } pos <- which(is.na(Table[, "Latitude"])) if (length(pos) > 0) { Table <- Table[-pos, ] } # Date formatting Table[, "Loc_date"] <- as.POSIXct(strptime(Table[, "Loc_date"], "%Y/%m/%d %H:%M:%S")) # Deletion of geographic abberrations if (!is.null(X.range)) { if (min(Table[, "Longitude"]) < X.range[1]) { posX <- which(Table[, "Longitude"] < X.range[1]) Table <- Table[-posX, ] } if (max(Table[, "Longitude"]) > X.range[2]) { posX <- which(Table[, "Longitude"] > X.range[2]) Table <- Table[-posX, ] } } if (!is.null(Y.range)) { if (min(Table[, "Latitude"]) < Y.range[1]) { posY <- which(Table[, "Latitude"] < Y.range[1]) Table <- Table[-posY, ] } if (max(Table[, "Latitude"]) > Y.range[2]) { posY <- which(Table[, "Latitude"] > Y.range[2]) Table <- Table[-posY, ] } } rownames(Table) <- NULL # Projection dof geographic coordinates xy <- Table[, c("Longitude", "Latitude")] if (Projection == "UTM") { xy_proj <- project(xy, c(proj = "utm", zone = Zone, datum = Datum), ellps.default = "GCS") } if (Projection == "AEA") { xy_proj <- project(xy, c(proj = "aea", lat_1 = 50, lat_2 = 70, lat_0 = 40, long_0 = -96, x_0 = 0, y_0 = 0, datum = "NAD83")) } Table <- as.data.frame(cbind(Table[, 1:2], Table[, 4], Table[, 3], xy_proj$x, xy_proj$y, Table[, 5:11])) colnames(Table) <- c("Platform_ID", "Prg_No", "Longitude", "Latitude", "Long_proj", "Lat_proj", "Loc_quality", "Loc_date", "Sat", "Msg", "SENSOR_01", "Semi_major_axis", "GDOP") # Calculate average SENSOR 1 moyen & deletion of duplicates Level.ID <- levels(as.factor(as.character(Table$"Platform_ID"))) Mat_Sensor <- as.data.frame(matrix(NA, ncol = 13, nrow = 1)) colnames(Mat_Sensor) <- c("Platform_ID", "Prg_No", "Longitude", "Latitude", "Long_proj", "Lat_proj", "Loc_quality", "Loc_date", "Sat", "Msg", "SENSOR_01", "Semi_major_axis", "GDOP") Mat_Sensor[, "Loc_date"] <- as.POSIXct(Mat_Sensor[, "Loc_date"]) for (k in 1 : length(Level.ID)) { Tab <- Table[as.factor(Table[, "Platform_ID"]) == Level.ID[k], ] Level.Date <- levels(as.factor(Tab$"Loc_date")) for (i in 1 : length(Level.Date)) { temp <- Tab[as.factor(Tab[, "Loc_date"]) == Level.Date[i], ] Mat_Sensor <- as.data.frame(rbind(Mat_Sensor, temp[1, ])) Mat_Sensor[nrow(Mat_Sensor), "SENSOR_01"] <- round(mean(temp$"SENSOR_01"), 3) Mat_Sensor[nrow(Mat_Sensor), "Loc_date"] <- as.POSIXct(strptime(temp[1, "Loc_date"], "%Y-%m-%d %H:%M:%S")) } } Mat_Sensor <- Mat_Sensor[-1, ] cat("\n", "Cleaning Filter: Done...", "\n", "\n") write.table(Mat_Sensor, file = paste(File, "_CleanUp.txt", sep = ""), sep = "\t", row.names = F, col.names = T) } #=============================================================================== #_____________________________ # Filter no 2 (Redundancy) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # -> Deletion of pseudo-duplicates # <=> Different recording with a transmission interval # below a set threshold # <=> the recording with the lowest error ellipsis or the best LC, or # the first recording (in case of equality) is saved #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # Arguments : # # File : Data file name, without extension. Must be the same as the one used in Cleanup.Table # as it will use the file generated by that function # Time.Gap : Minimum time interval between 2 recordings # Redundancy.Deletion <- function(File, Time.Gap) { Data <- read.delim(file = paste(File, "_CleanUp.txt", sep = ""), header = T) i <- 1 while (i < nrow(Data)) { j <- i vec <- NULL while(difftime(Data[j+1, "Loc_date"], Data[j, "Loc_date"], units = "mins")[[1]] < Time.Gap && Data[j+1, "Platform_ID"] == Data[j, "Platform_ID"] && j <= nrow(Data)-1) { vec <- c(vec, j, j+1) j <- j + 1 if(j >= nrow(Data)) { break } } if (length(vec) > 0) { vec <- as.numeric(levels(as.factor(vec))) } if (j > i) { pos <- which(Data[vec, "Semi_major_axis"] == 0) if (length(pos) > 0) { if (length(vec[-pos]) > 0) { Data[vec[pos], "Semi_major_axis"] <- NA ind <- which.min(Data[vec, "Semi_major_axis"]) } else { ind <- NULL } } else { ind <- which.min(Data[vec, "Semi_major_axis"]) } if (is.null(ind)) { ind <- which(Data[vec, "Loc_quality"] == "3") if (length(ind) == 0) { ind <- which(Data[vec, "Loc_quality"] == "2") if (length(ind) == 0) { ind <- which(Data[vec, "Loc_quality"] == "1") if (length(ind) == 0) { ind <- which(Data[vec, "Loc_quality"] == "0") if (length(ind) == 0) { ind <- which(Data[vec, "Loc_quality"] == "A") if (length(ind) == 0) { ind <- which(Data[vec, "Loc_quality"] == "B") if (length(ind) == 0) { ind <- which(Data[vec, "Loc_quality"] == "Z") } } } } } } } Data <- Data[-vec[-ind[1]], ] } i <- i + 1 if(i >= nrow(Data)) { break } } rownames(Data) <- NULL cat("\n", "Redundancy Filter: Done...", "\n", "\n") write.table(Data, file = paste(File, "_Redundancy.txt", sep = ""), sep = "\t", row.names = F, col.names = T) } #=============================================================================== #=============================================================================== Move.Parameters <- function(Data, k) { if (Data[k, "Platform_ID"] == Data[k-1, "Platform_ID"]) { Dist <- round(sqrt((Data[k, "Long_proj"] - Data[k-1, "Long_proj"])^2 + (Data[k, "Lat_proj"] - Data[k-1, "Lat_proj"])^2) / 1000, 2) Time <- round(difftime(Data[k, "Loc_Date"], Data[k-1, "Loc_Date"], units = "mins")[[1]], 2) Speed <- round(60*Dist/Time, 2) } else { Dist <- Time <- Speed <- -9999 } List <- list(Dist, Time, Speed) names(List) <- c("Distance", "Duree", "Vitesse") return(List) } #=============================================================================== #________________________ # Filter no 3 (Speed) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # -> Deletion of recordings with an improbable movement speed # <=> Two criterias of speed : cruising speed and acceleration speed # -> Before that, recordings can also be selected depending of their location # class (e.g. deletion of LC Z, B, A...) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # # Arguments : # # data : Data frame. It is highly recommanded to load the data generated by Cleanup.Table # and Redundancy.Deletion to get the good column format. # Sel.Loc : Boolean. If TRUE, recordings are selected according to their location classes # qual : vector of LC to keep # Speed1 : numeric. Cruising speed threshold (km/h) # Speed2 : numeric. Acceleration speed threshold (km/h) # TimeSp2 : numeric. Maximum acceleration duration (min) # Plot : Boolean. If TRUE, one graph per PTT is drawn Speed.Filter <- function(data, Sel.Loc = F, qual = NULL, Speed1, Speed2, TimeSp2, Plot = F) { Info.list <- data.frame() # Sort data by PTT then by Date data <- data[order(data[, "Platform_ID"], data[, "Loc_Date"]), ] # Extract PTT ids Ind <- levels(as.factor(data[, "Platform_ID"])) # Initialize empty object (final table) TAB <- NULL # loop on PTTs for (j in 1 : length(Ind)) { # Extract data from PTT j tab <- data[data[, "Platform_ID"] == Ind[j], ] # Sort data by PTT then by Date tab <- tab[order(tab[, "Loc_Date"]), ] # Reinitialize line numbers rownames(tab) <- NULL # Extract different Localisation Classes LOC <- levels(as.factor(data[, "Loc_Quality"])) # Initialize a table with the number of deleted localisations by LC Info <- as.data.frame(matrix(0, nrow = 3, ncol = 9)) colnames(Info) <- c("Ind", "Criteria", "Loc_Z", "Loc_B", "Loc_A", "Loc_0", "Loc_1", "Loc_2", "Loc_3") Info[, "Ind"] <- rep(Ind[j], 3) Info[, "Criteria"] <- c("Quality", "Distance", "Speed") # If we want to keep only certain localisations (based on their quality) if (Sel.Loc == T) { # Initialization of an empty vector dat <- NULL # Loop on desired qualities for (i in seq_along(qual)) { # Remove from vector LOC the LC we keep LOC <- LOC[-which(LOC == as.character(qual[i]))] # Extract data of quality i dat <- as.data.frame(rbind(dat, tab[tab[, "Loc_Quality"] == qual[i], ])) } for (k in seq_along(LOC)) { Info[1, paste("Loc_", as.character(LOC[k]), sep = "")] <- nrow(tab[tab[, "Loc_Quality"] == LOC[k], ]) str(Info) } tab <- dat # Reinitialize line numbers rownames(tab) <- NULL } # Sort data by PTT then by Date tab <- tab[order(tab[, "Loc_Date"]), ] if (Plot) { # Minimal and maximal longitude where the PTT j has sent a localisation Xlim <- c(min(tab[, "Longitude"]), max(tab[, "Longitude"])) # Minimal and maximal latitude where the PTT j has sent a localisation Ylim <- c(min(tab[, "Latitude"]), max(tab[, "Latitude"])) # New grahical window separated in 2 par(mfrow = c(1, 2)) # Graph of initial localisation of the PTT Position.plot(tab[, c("Longitude", "Latitude")], Ind[j], Xlim, Ylim) } # Initialization of 3 vectors with -9999 StepLen <- DiffTim <- Speed <- rep(-9999, dim(tab)[1]) # These 3 vecteurs are added to the table tab <- cbind(tab, StepLen, DiffTim, Speed) # Initialization of 2 vectors (for the plot) Col <- xy <- NULL # Initialization of counter i i <- 2 # loop on all rows starting with row 2 while (i <= nrow(tab)) { # bugfix if (i == 1) { tab[i, "StepLen"] <- -9999 tab[i, "DiffTim"] <- -9999 tab[i, "Speed"] <- -9999 i <- i + 1 } # movement parameters between localisations i and i-1 param <- Move.Parameters(tab, i) # Parameters are saved on line i tab[i, "StepLen"] <- param$"Distance" tab[i, "DiffTim"] <- param$"Duree" tab[i, "Speed"] <- param$"Vitesse" # !!!--- # If i has a speed > cruising speed if (tab[i, "Speed"] > Speed1) { # And if i has a speed < acceleration speed AND the duration is <= maximal acceleration duration if (tab[i, "Speed"] <= Speed2 && tab[i, "DiffTim"] <= TimeSp2) { # Check if there is only one location in the day # row where the day of localisation i starts beg.day <- as.numeric(rownames(tab[substr(tab[, "Loc_Date"], 1, 10) == substr(tab[i, "Loc_Date"], 1, 10), ])[1]) # If i is the first localisation of the day and the next localisation is from another day if (beg.day == i && (tab[substr(tab[i, "Loc_Date"], 1, 10) != substr(tab[i+1, "Loc_Date"], 1, 10), ])) { #----- # We extract some points around i to check if this point has the greatest distance # to the centroid of this group of points #----- # If i is at least on the 4th row if (i >= 4) { # If there are 3 localisations after the localisation i if ((i + 3) <= nrow(tab)) { # The we take the 3 localisations before and after i vec <- seq(i-3, i+3) # If there are not 3 localisations after i } else { # we take the maximum number of localisations after (from 0 to 2) vec <- seq(i-3, nrow(tab)) } # If i is before the 4th row (between 2 and 3) } else { # If there are 3 localisations after the localisation i if ((i + 3) <= nrow(tab)) { # Then we take all localisations between i and the start of the day and 3 localisations after i vec <- seq(1, i+3) # If there are not 3 localisations after i } else { # we take the maximum number of localisations after (from 0 to 2) vec <- seq(1, nrow(tab)) } } # We calculate the centroid of the group of point we got Bar <- Barycentre(tab[vec, c("Longitude", "Latitude")]) # We calculate distances between each point and the centroid Dist <- apply(tab[vec, c("Longitude", "Latitude")], 1, G = Bar, Euclidean.Dist) # If the 1st localisation of the day has the greatest distance if (vec[which.max(Dist)] == beg.day) { # Save the coordinates of the deleted localisation xy <- rbind(xy, tab[beg.day, c("Longitude", "Latitude", "Loc_Quality")]) # Deletion according to the distance to the centroid : red Col <- c(Col, "red") # Delete the localisation tab <- tab[-beg.day, ] ; rownames(tab) <- NULL # Counter "i" comes back to the deleted position i <- beg.day # Else, } else { # Save the coordinates of the deleted localisation xy <- rbind(xy, tab[i, c("Longitude", "Latitude", "Loc_Quality")]) # Deletion according to the speed : blue Col <- c(Col, "blue") # Delete the localisation (speed too important) tab <- tab[-i, ] ; rownames(tab) <- NULL # And "i" stays "i" i <- i } # Else, } else { # The localisation is kept and we go to the next one i <- i + 1 } # If he acceleration speed AND/OF the maximal duration are not respected } else { # Row number where the day of localisation i starts beg.day <- as.numeric(rownames(tab[substr(tab[, "Loc_Date"], 1, 10) == substr(tab[i, "Loc_Date"], 1, 10), ])[1]) # If i is the first localisation of the day if (beg.day == i) { # Save the coordinates of the deleted localisation xy <- rbind(xy, tab[i, c("Longitude", "Latitude", "Loc_Quality")]) # Deletion according to the speed : blue Col <- c(Col, "blue") # Delete the localisation (bad speed) tab <- tab[-i, ] ; rownames(tab) <- NULL # And "i" stays "i" i <- i # If i is not the first localisation of the day # We need to check that this localisation is "good" } else { #----- # We extract some points around i to check if this point has the greatest distance # to the centroid of this group of points #----- # If there is a maximum of 3 localisations between the start of the day and i if ((i - beg.day) <= 3) { # If i is at least on the 4th row if (i >= 4) { # If there are 3 localisations after the localisation i if ((i + 3) <= nrow(tab)) { # The we take the 3 localisations before and after i vec <- seq(i - 3, i + 3) # If there are not 3 localisations after i } else { # we take the maximum number of localisations after (from 0 to 2) vec <- seq(i - 3, nrow(tab)) } # If i is before the 4th row (between 2 and 3) } else { # If there are 3 localisations after the localisation i if ((i + 3) <= nrow(tab)) { # Then we take all localisations between i and the start of the day and 3 localisations after i vec <- seq(1, i + 3) # If there are not 3 localisations after is i } else { # we take the maximum number of localisations after (from 0 to 2) vec <- seq(1, nrow(tab)) } } # We calculate the centroid of the group of point we got Bar <- Barycentre(tab[vec, c("Longitude", "Latitude")]) # We calculate distances between each point and the centroid Dist <- apply(tab[vec, c("Longitude", "Latitude")], 1, G = Bar, Euclidean.Dist) # If the 1st localisation of the day has the greatest distance if (vec[which.max(Dist)] == beg.day) { # Save the coordinates of the deleted localisation xy <- rbind(xy, tab[beg.day, c("Longitude", "Latitude", "Loc_Quality")]) # Deletion according to the distance to the centroid : red Col <- c(Col, "red") # Delete the localisation tab <- tab[-beg.day, ] ; rownames(tab) <- NULL # Counter "i" comes back to the deleted position i <- beg.day # Else, } else { # Save the coordinates of the deleted localisation xy <- rbind(xy, tab[i, c("Longitude", "Latitude", "Loc_Quality")]) # Deletion according to the speed : blue Col <- c(Col, "blue") # Delete the localisation (speed too important) tab <- tab[-i, ] ; rownames(tab) <- NULL # And "i" stays "i" i <- i } # Si i est loin de la 1ère position de la journée } else { # Save the coordinates of the deleted localisation xy <- rbind(xy, tab[i, c("Longitude", "Latitude", "Loc_Quality")]) # Deletion according to the speed : blue Col <- c(Col, "blue") # Delete the localisation (speed too important) tab <- tab[-i, ] ; rownames(tab) <- NULL # And "i" stays "i" i <- i } } } # Else, } else { # Localisation is kept (speed < cruising speed) i <- i + 1 } # We check there are still localisations if (i > nrow(tab)) { break print("break") } } rownames(tab) <- NULL if (Plot) { # Identify deleted localisations points(xy, pch = 19, col = Col) # Draw new positions Position.plot(tab[, c("Longitude", "Latitude")], Ind[j], Xlim, Ylim) } if (!is.null(xy)) { Ofni <- cbind(xy, Col) for (z in 1 : nrow(Ofni)) { pos.col <- which(colnames(Info) == paste("Loc", as.character(Ofni[z, "Loc_Quality"]), sep = "_")) if (Ofni[z, "Col"] == "blue") { Info[3, pos.col] <- Info[3, pos.col] + 1 } else { Info[2, pos.col] <- Info[2, pos.col] + 1 } } } # Add data from new PTT below the last one TAB <- rbind(TAB, tab) # Add the summary table in the list Info.list <- rbind(Info.list, Info) } cat("Graph informations:", "\n") cat(" Red: Deletion of the first daily localisation", "\n") cat(" Blue: Deletion of the localisation based on speed", "\n") cat(" Black: Localisation kept", "\n") cat("\n", "Deletion information:", "\n") print(Info.list) return(TAB) } #=============================================================================== Position.plot <- function(data, Ind, Xlim, Ylim) { plot(data , pch = 19, col = "black", bty = "l", las = 2, cex.axis = 0.75, cex.lab = 0.75, xlab = colnames(data)[1], ylab = colnames(data)[2], xlim = Xlim, ylim = Ylim) title(main = paste("Individual", Ind)) } #=============================================================================== Barycentre <- function(data) { Bar.x <- mean(data[, 1]) Bar.y <- mean(data[, 2]) Bar <- c(Bar.x, Bar.y) return(Bar) } #=============================================================================== Euclidean.Dist <- function(X, G) { d <- sqrt((X[1] - G[1])^2 + (X[2] - G[2])^2) return(d) } #===============================================================================