# Threshold haemoglobin levels and the prognosis of stable coronary disease # R program for stratified Cox analysis # Anoop Shah, November 2010 library(foreign) library(Hmisc) library(survival) # GENERIC FUNCTION FOR STRING CONCATENATION "%&%" <- function (a, b) paste(a, b, sep="") # GLOBAL ANALYSIS OPTIONS for (studypop in c("angina", "mi")) { # SPECIFICATIONS FOR THIS ANALYSIS location <- "~/Dropbox/Haemoglobin/R/" outfile <- "Rcox_results-" %&% studypop %&% ".txt" # Prepare output log file filename <- location %&% studypop %&% ".Rcox.log" sink(filename) cat("Analysis commenced at: " %&% Sys.time() %&% "\n") cat("\nLog file: \n" %&% filename %&% "\n") # Load dataset # MI and stable angina datasets are in saved R workspaces # names miadjust.Rdata and anginaadjust.Rdata # Each saved workspace contains a data.frame a which contains # all patients, and b which contains only patients with nonmissing # data, as used for the main analysis filename <- location %&% studypop %&% "adjust.Rdata" cat("\nLoad data:\n" %&% filename %&% "\n") load(filename) location <- "~/Dropbox/Haemoglobin/R/" # COXPH FUNCTION docox2 <- function (dataset, variables, comment, endpoint = "e4_death", survivaltime = "e4_days", categoryvariable="", basemen="Men[14,15)", basewomen="Women[13,14)") { # Carries out Cox regression for men and women using dataset # and the specified variables. # If no endpoint specified, the default endpoint is death # Non haemoglobin variables are not gender specific. # Contrast matrices are generated manually in order to combine # TWO categories as the reference group, for men and women to # be included in the same model # Output of this function is a data.frame with the following columns: # formula (formula string with adjustment variables) # var (category variable) # category (name of sex-specific category (e.g.Women[11,12))) # N - patients # n - events # hr - hazard ratio and 95% confidence interval, with * for P value # The number of rows is the number of categorylabels # These data can be used for convenient tabular display of results # Initialise output data.frame if (categoryvariable!="") { categorylabels <- levels(dataset[,categoryvariable]) variables <- variables %&% " + " %&% categoryvariable } coxmodel <- data.frame(categorylabels) names(coxmodel) <- "category" row.names(coxmodel) <- coxmodel$category theformula <- "Surv(" %&% survivaltime %&% ", " %&% endpoint %&% ") ~ sex + " %&% variables # Categorylabel includes gender (e.g. Men[0,11)) coxmodel$var <- rep(studypop %&% " " %&% categoryvariable %&% " " %&% endpoint, length(categorylabels)) coxmodel$N <- rep(0, length(categorylabels)) coxmodel$n <- rep(0, length(categorylabels)) coxmodel$hr <- rep("", length(categorylabels)) coxmodel$lower <- rep(0, length(categorylabels)) coxmodel$middle <- rep(0, length(categorylabels)) coxmodel$upper <- rep(0, length(categorylabels)) # Numbers of patients and events for (looper in 1:length(categorylabels)) { coxmodel$N[looper] <- length(dataset[which(dataset[,categoryvariable]== categorylabels[looper]),1]) coxmodel$n[looper] <- length(dataset[which(dataset[,endpoint]==1 & dataset[,categoryvariable]==categorylabels[looper]),1]) } cat("\nCOX PROPORTIONAL HAZARDS EXPORT FOR TABLE") cat("\n" %&% comment %&% "\n========================") cat("\nFormula: " %&% theformula) cat("\nStudy population " %&% studypop %&% "\n") # Prepare contrast matrix (manually) if (categoryvariable!="") { trialmatrix <- matrix(c(rep(c(1,rep(0,length(categorylabels))), length(categorylabels)-1),1), nrow=length(categorylabels), ncol=length(categorylabels), dimnames = list (categorylabels, categorylabels)) trialmatrix <- trialmatrix[, c(-which(categorylabels==basemen), -which(categorylabels==basewomen))] # Number of columns must be n-1, otherwise R will make up some # extra columns. Hence add an additional column of zeroes trialmatrix <- cbind(trialmatrix, rep(0, length(categorylabels))) contrasts(dataset[,categoryvariable]) <- trialmatrix } # Do the Cox regression model <- coxph(formula = as.formula(theformula), data = dataset) print(model) # Calculate Schoenfeld residuals and print analysis to log file zph <- cox.zph(model) print(zph) # Get coefficients, and place them in output data.frame for (looper in 1:length(categorylabels)) { targetpos <- which(names(model$coefficients)== categoryvariable %&% categorylabels[looper]) if (length(targetpos) == 0) { hrtext <- "1 (reference)" middle <- 1 lower <- 1 upper <- 1 } else { if (is.na(model$coefficients[targetpos])) { middle <- NA upper <- NA lower <- NA hrtext <- "" } else { pvalue <- 2 * (1 - pnorm(abs(model$coefficients[targetpos]) / sqrt(model$var[targetpos,targetpos]))) if (pvalue < 0.05) {pstar <- " *"} else {pstar <- ""} if (pvalue < 0.01) {pstar <- " **"} if (pvalue < 0.001) {pstar <- " ***"} pvalue <- NULL middle <- exp(model$coefficients[targetpos]) lower <- exp(log(middle) - 1.96 * sqrt(model$var[targetpos,targetpos])) upper <- exp(log(middle) + 1.96 * sqrt(model$var[targetpos,targetpos])) if (upper < lower) { temp <- upper upper <- lower lower <- temp temp <- NULL } hrtext <- format2dp(middle) %&% " (" %&% format2dp(lower) %&% "–" %&% format2dp(upper) %&% ")" %&% pstar } } coxmodel$lower[looper] <- lower coxmodel$middle[looper] <- middle coxmodel$upper[looper] <- upper coxmodel$hr[looper] <- hrtext } coxmodel$formula <- rep(theformula, length(categorylabels)) return(coxmodel) } # SET UP THE ACTUAL ANALYSIS # Adjustment variables adjustmultiplehb <- "bp_sys + totalchol + indexage + smok_any + fh_chd + diab_any + cci_any" # Variable description # ==================== # bp_sys - systolic BP in mmHg # totalchol - total cholesterol in mmol/L # indexage - patient age at index date # smok_any - whether patient has ever smoked prior to index date # fh_chd - family history of coronary artery disease # diab_any - diabetes prior to index date # cci_any - Charlson comorbidity index evaluated at index date # Dataframe b contains only patients with nonmissing data # This dataframe is used for the main analysis # Generate category for sex-haemoglobin and sex-MCV b$cathb13 <- b$sex %&% b$cathb7 b$cathb13 <- as.factor(as.character(b$cathb13)) b$catmcv <- b$sex %&% b$mcvquint b$catmcv <- as.factor(as.character(b$catmcv)) # DO THE ACTUAL STRATIFIED COX ANALYSIS # Age adjusted analysis tempresults <- docox2(a, "indexage", studypop %&% " - age/sex adjusted", categoryvariable="cathb13") write.table(tempresults, file=outfile, append=FALSE, sep="\t", na="", row.names=FALSE) # Main results -- multiply adjusted analysis, for graphing mainresults <- docox2(b, "egfr_mdrd + " %&% adjustmultiplehb, studypop %&% " - multiply adjusted (main analysis)", categoryvariable="cathb13") write.table(mainresults, file=outfile, append=TRUE, sep="\t", na="", row.names=FALSE) # Close output log file sink() }