macro-; macro[10000; macro* 15 200000; macro=; macfloat 2; report-; var = 0 min_BL ; /* ======== SET HERE THE MINIMUM BRANCH LENGTH FOR INTERNAL AND TERMINAL BRANCHES ============*/ set min_BL 0; /* ========================================================*/ if (ntrees < 0) quote CAUTION: If this script is used with a version of TNT older than "TNT 1.1 of Novembre 2009" the results can be erroneous. Please update your version of TNT to get correct results with MSM.run This script calculates the modified MSM (Pol and Norell, 2001) and GER (Wills, 1999) on the trees in memory using the Sankoff age character (detailed in file dataMSM.TNT) To run this file: A) Modifiy and save the file dataMSM.TNT according to the data you want to analyze (0 should be the oldest state) IMPORTANT: THE FILE WITH THE AGE DATA SHOULD BE named as it is in the example: dataMSM.tnt B) Read the file dataMSM.TNT in TNT, read the trees you want to evaluate if they are not included in dataMSM.TNT C) Execute this script Arguments: 1) If number N is input as first argument the script will perform the significance test (see Sidall, 1998) doing N replicates the default is 1000 replicates OUTPUT: two files are created 1) MSM* and GER values are saved in a text file named MSM.OUT (if there is a prexistent msm.out file it will be overwritten) 2) A nexus tree file named calibrated_tree.tre is saved for each of the trees. This tree file contains branch lengths so it represents a calibrated phylogeny that can be viewed with FigTree, Mesquite or any other program that plots trees using the branch length data in nexus format. ; p/; end; /* READ AGES FROM dataMSM.TNT */ set 198 (maxstate[0]+1); var = 10 age ['198'] ; goto = datamsm.tnt; goto AGES; /* INITIALIZE AGE CHARACTER */ col0; set 199 ntrees+1; var = 200 largest_state + tmp_cost + Lo['199'] + MSM['199'] + GER['199'] + Lm + G + tmp_stateA + tmp_stateB ; /* MAKE SANKOFF AGE CHARACTER */ goto = MSM.run; goto MAKE_SANKOFF; /* Set G */ randtr 100; nelsen * '199'.; set G length[ntrees 0]; keep '199'; /* CALCULATE MSM* and GER */ sil=cons; log MSM.OUT; quote Tree | MSM* | GER ----------------------------; log/; loop 0 ntrees progress #1 ntrees Calculating MSM*/GER values - tree #1 of '199'; set Lo[#1] length[#1 0]; set MSM[#1] 'Lm'/'Lo[#1]'; set GER[#1] ( ('G'-'Lo[#1]')/('G'-'Lm') ); log+ MSM.OUT; quote #1 'MSM[#1]' 'GER[#1]' (MSM= Lm/Lo = 'Lm'/'Lo[#1]' , GER= [LM-Lo]/[LM-Lm] = ['G'-'Lo[#1]']/['G'-'Lm'] ); log/; stop; progress/; /* EXPORT CALIBRATED TREES TO NEXUS FORMAT WITH BRANCH LENGTHS */ macfloat 5; var= + root_number + num_of_nodes + num_nodes_leaves ; set num_of_nodes ntax-1; set root_number ntax+1; set num_nodes_leaves ntax*2+1; var= + cur_node + cur_anc + taxnumber + start_var_num + left_desc + right_desc + finished + done ['num_nodes_leaves'] + cur_desc [ntax] + this_node + para_abajo [ntax] + descendants [2] + max_num_node + state_of ['num_nodes_leaves'] + age_of ['num_nodes_leaves'] + branch_length ['num_nodes_leaves'] + ancestor ; /* STORE AGE OF OTUS */ goto STORE_AGE_OTU ; log calibrated_tree.tre; quote #NEXUS; quote begin trees ., ; log/; loop 0 ntrees progress #1 ntrees Saving calibrated trees - tree #1 of '199'; ttag- ; ttag= ; naked= ; /* CALCULATE AGE OF HTUs (adding minimum branch length) */ set para_abajo downlist[#1]; loop 0 'num_of_nodes' set this_node 'para_abajo[#2]' ; set descendants deslist[#1 'this_node']; if ('age_of['descendants[0]']' > 'age_of['descendants[1]']') set age_of['this_node'] 'age_of['descendants[0]']'+'min_BL'; else set age_of['this_node'] 'age_of['descendants[1]']'+'min_BL'; end; stop; /* CALCULATE BRANCH LENGTHS OF OTU AND HTU */ loop 0 ('num_nodes_leaves'-1) if (#2 == 'root_number') continue; end; set ancestor anc[#1 #2]; set branch_length[#2] 'age_of['ancestor']'-'age_of[#2]'; stop; /* START SAVING CALIBRATED TREE */ log+ calibrated_tree.tre; quote tree #1 = [&U]; log/; lquote=; set cur_node 'root_number'; set finished 0; loop 0 ('num_nodes_leaves'-1) set done[#2] 0; stop; loop 0 0 if ('cur_node' > ntax) goto DO_HTU #1; else goto DO_OTU #1; end; if ('finished' == 0) setloop 0; end; stop; stop; progress/; log+ calibrated_tree.tre; quote end., ; log/; ttag-; /* DO SIGNIFICANCE TEST */ log+ MSM.OUT; quote ----------------------------; quote ----------------------------; quote Tree | p-value --------------; log/; var = + sign_reps ; if (argnumber == 0) set sign_reps 1000; else set sign_reps %1; end; var = + perm + Lperm[('sign_reps'+1) ('199')] + p['199'] ; macfloat 4; set perm ntrees; loop 1 'sign_reps' progress #1 'sign_reps' Performing significance test - replicate #1 of 'sign_reps'; rs *; xp 0/.; loop 0 'perm' set Lperm[#1 #2] length[#2 0]; if ('Lperm[#1 #2]' <= 'Lo[#2]') set p[#2] ++; end; stop; stop; progress/; loop 0 'perm' progress #1 'perm' Performing significance test - calculating p-values; set p[#1] ('p[#1]' / 'sign_reps' ); if ('p[#1]' == 0) set p[#1] ( 1 / 'sign_reps' ); end; log+ MSM.OUT; quote #1 'p[#1]'; log/; stop; progress/; log+ MSM.OUT; sil=cons; quote --------------; quote . THE FOLLOWING TABLE DETAILS THE CRITICAL DISTRIBUTION USED FOR CALCULATING THE P-VALUES, EACH COLUMN REPRESENTS THE CRITICAL DISTRIBUTION GENERATED FOR EACH TREE ============ ; lquote=; loop 0 'perm' quote Tree #1 ; stop; lquote-; quote -; loop 1 'sign_reps' progress #1 'sign_reps' Outputting significance test - replicate #1 of 'sign_reps' ; lquote=; loop 0 'perm' quote 'Lperm[#1 #2]' ; stop; lquote-; quote -; stop; progress/; log/; sil-cons; /* GO BACK TO ORIGINAL DATA */ p datamsm.tnt; goto MAKE_SANKOFF; quote =======================================================================================================; quote =======MSM SCRIPT FINISHED SUCCESSFULLY, SEE RESULTS IN FILES MSM.OUT and CALIBRATED_TREE.TRE==========; quote =======================================================================================================; p/; /*====================== DO HTU =================*/ label DO_HTU; set cur_desc deslist[%1 'cur_node']; set left_desc 'cur_desc[0]'; set right_desc 'cur_desc[1]'; if ('done['cur_node']' == 0) log+ calibrated_tree.tre; quote (; log/; end; set done['cur_node'] ++; if ('done['left_desc']' == 0 ) set cur_node 'left_desc'; p/; end; if ('done['right_desc']' == 0 ) set cur_node 'right_desc'; p/; end; if ('done['cur_node']' == 3 ) set cur_anc anc[%1 'cur_node']; log+ calibrated_tree.tre; quote ):'branch_length['cur_node']'; log/; if ('cur_node' == 'root_number') lquote-; log+ calibrated_tree.tre; quote ., ; log/; set finished 1; p/; end; if ('done['cur_anc']' < 2 ) log+ calibrated_tree.tre; quote , ; log/; end; set cur_node 'cur_anc'; p/; end; p/; /*====================== DO OTU =================*/ label DO_OTU; set cur_anc anc[%1 'cur_node']; log+ calibrated_tree.tre; quote $taxon 'cur_node':'branch_length['cur_node']'; log/; if ('done['cur_anc']' < 2 ) log+ calibrated_tree.tre; quote , ; log/; end; set done['cur_node'] 1; set cur_node 'cur_anc'; p/; /*====================== STORE AGES OF OTUS =================*/ label STORE_AGE_OTU; loop 0 ntax set state_of[#1] states[0 #1 0]; loop 0 maxstate[0] if ('state_of[#1]' == power[2 #2]) set state_of[#1] #2; endloop; end; if (#2 == 31) set state_of[#1] 31; end; stop; set age_of[#1] 'age['state_of[#1]']'; stop; p/; /*========= DEFINE STEP MATRIX OF AGE CHARACTER FROM AGE DATA IN datamsm.tnt & SET Lo =====*/ label MAKE_SANKOFF; set largest_state maxstate[0]; set Lm ('0' - '1'); loop 0 'largest_state' loop 0 'largest_state' if (#1 == #2) continue; end; set tmp_stateA #1; set tmp_stateB #2; set tmp_cost 'age['tmp_stateA']' - 'age['tmp_stateB']' ; if ('tmp_cost' < 0 ) set tmp_cost 'age['tmp_stateB']' - 'age['tmp_stateA']' ; cost 0 = 'tmp_stateB' > 'tmp_stateA' 'tmp_cost'; cost 0 = 'tmp_stateA' > 'tmp_stateB' 10000; else cost 0 = 'tmp_stateA' > 'tmp_stateB' 'tmp_cost'; cost 0 = 'tmp_stateB' > 'tmp_stateA' 10000; end; if ('tmp_stateA' == 0) if ('tmp_cost' > 'Lm') set Lm 'tmp_cost'; end; end; stop; stop; cc( .; p/;