Supplemental material - script

(a) Script to generate mutation context spectrum data
'''
Version 1.22
by Brendan Kohrn
Last updated on July 29, 2014

Find the mutation context for a list of mutants in mutpos format, as follows:
Chromosome    ReferenceNT    Position    Depth    number_of_mutants    mutants_to_T    mutants_to_C    mutants_to_G    mutants_to_A    insertions    deletions

'''

import sys
import re
import collections
from collections import defaultdict
import argparse
from argparse import ArgumentParser
import numpy as np
import matplotlib.pyplot as plt


class Files:
    def __init__(self, inFasta, inMutPos, minC, maxC, bufferSize = 3):
        # DEBUG sys.stderr.write('>>Starting File Initialization\n')
        self.lineNum = 0
        tmpMutPos = open(inMutPos, 'r')
        self.MutPos = mutPosFile(tmpMutPos)
        tmpFasta = open(inFasta, 'r')
        self.Fasta = fastaWin(tmpFasta, bufferSize)
        #self.Fasta.advance(endChr = self.MutPos.chrom, endPos = self.MutPos.pos)
        self.minClonality = minC
        self.maxClonality = maxC

        # DEBUG sys.stderr.write('>>File Initialization Complete\n')
    
    def __iter__(self):
        return(self)
    
    def next(self):
        # DEBUG sys.stderr.write('>>Advancing...\n')
        endTest = False
        while endTest == False:
            if self.MutPos.line.fileEnd == False:
                endTest = self.MutPos.next()
            else:
                break
        if self.MutPos.line.fileEnd == False:
            fastaTest = self.Fasta.advance(endChr = self.MutPos.chrom, endPos = self.MutPos.pos)
            # DEBUG sys.stderr.write('>>Not EOF = %s\n' % fastaTest)
            if fastaTest == False:
                raise StopIteration
            self.lineNum = self.MutPos.lineNum
            #if self.lineNum % 1000 == 0:
                #print('%s lines processed' % self.lineNum)
            # DEBUG sys.stderr.write('>>Advancing complete.\n')
            return(self.MutPos.line)
        else:
            raise StopIteration
    
    def close(self):
        # DEBUG sys.stderr.write('>>Closing Files\n')
        self.MutPos.close()
        self.Fasta.close()
        # DEBUG sys.stderr.write('>>Closing Complete\n')


class mutPosFile:
    def __init__(self, inFile):
        # DEBUG sys.stderr.write('>>Initializing MutPos\n')
        self.file = inFile
        self.chrom = ""
        self.pos = 1
        self.line = mutPosLine("-",0)
        self.lineNum = 0
        #self.next()
    
    def next(self):
        self.lineNum += 1
        self.line = mutPosLine(self.file.readline(), self.lineNum)
        if self.line.fileEnd == False:
            while self.line.fileEnd == False and self.line.Ts == 0 and self.line.Cs == 0 and self.line.Gs == 0 and self.line.As == 0:
                self.lineNum += 1
                self.line = mutPosLine(self.file.readline(), self.lineNum)
            if self.line.fileEnd == False:
                self.pos = self.line.pos
                self.chrom = self.line.chrom
                # DEBUG sys.stderr.write('>>MutPos Advanced to line %s...\n' % self.lineNum)
                return(True)
            else:
                return(False)
        else:
            sys.stderr.write('>>MutPos EOF reached\n')
            return(False)
    
    def __str__(self):
        return(str(self.line))
    
    def close(self):
        self.file.close()
        return(True)


class mutPosLine:
    def __init__(self, inLine, inLineNum):
        if inLine == "-":
            self.fileEnd = False
            self.lineNum = -1
            linebins = inLine.split()
            self.chrom = ''
            self.refBase = ''
            self.pos = -1
            self.depth = -1
            self.muts = -1
            self.Ts = -1
            self.Cs = -1
            self.Gs = -1
            self.As = -1
            self.ins = -1
            self.dels = -1
            #self.Ns = -1
            #self.clonalDepth = self.depth - self.Ns
            self.clonality = 0
        elif inLine == "":
            self.fileEnd = True
            self.lineNum = -1
            linebins = inLine.split()
            self.chrom = ''
            self.refBase = ''
            self.pos = -1
            self.depth = -1
            self.muts = -1
            self.Ts = -1
            self.Cs = -1
            self.Gs = -1
            self.As = -1
            self.ins = -1
            self.dels = -1
            #self.Ns = -1
            #self.clonalDepth = self.depth - self.Ns
            self.clonality = 0
        else:
            self.lineNum = inLineNum
            linebins = inLine.split()
            self.fileEnd = False
            self.chrom = linebins[0]
            self.refBase = linebins[1].upper()
            self.pos = int(linebins[2])
            self.depth = int(linebins[3])
            self.muts = int(linebins[4])
            self.Ts = int(linebins[5])
            self.Cs = int(linebins[6])
            self.Gs = int(linebins[7])
            self.As = int(linebins[8])
            self.ins = int(linebins[9])
            self.dels = int(linebins[10])
            #self.Ns = int(linebins[11])
            #self.clonalDepth = self.depth - self.Ns
            self.clonality = max(float(self.Ts)/self.depth, float(self.Cs)/self.depth, float(self.Gs)/self.depth, float(self.As)/self.depth)
    
    def makeMuts(self):
        outMuts = []
        if self.Ts:
            outMuts.append(Mutation(self.chrom, self.pos, self.refBase, "T"))
        if self.Cs:
            outMuts.append(Mutation(self.chrom, self.pos, self.refBase, "C"))
        if self.Gs:
            outMuts.append(Mutation(self.chrom, self.pos, self.refBase, "G"))
        if self.As:
            outMuts.append(Mutation(self.chrom, self.pos, self.refBase, "A"))
        return(outMuts)
            
    
    def __str__(self):
        return('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % (self.lineNum, self.chrom, self.refBase, self.pos, self.depth, self.Ts, self.Cs, self.Gs, self.As, self.ins, self.dels))


class fastaWin:
    """
    This class enables most of the oporations carried out by the 
    program; it handles the rolling window and sequence retrieval 
    sections.  
    """
    
    def __init__(self, dataSource, bufferSize = 3):
        #~ self.debugCtr = 0
        # DEBUG sys.stderr.write('>>Initializing FASTAWIN\n')
        self.sourceFile = dataSource
        
        ##initialize the window in the first chromosome
        line = self.sourceFile.readline().strip().split(">")[1].split(" ")[0]
        self.chrom = line
        
        line = self.sourceFile.readline().strip().upper()
        self.sizeMax=2*len(line) #size of the window
        self.bsize=int(bufferSize) #size of the buffer desired
        self.eof=False #has the end of the file been reached?
        self.really=False #Are you sure?
        #how long is one line in this reference genome anyway?
        self.lLength=len(line) 

        self.data = []
        #do one line of N's to make sure that any mutations near 
        #the begining can be processed
        self.data.extend(list('N'*self.lLength)) 
        self.data.extend(list(line))#load in the first line
        #the leftmost position on the reference genome.  1-indexed
        self.minPos=1-self.lLength 
        #the rightmost position on the reference genome.  1-indexed
        self.maxPos=self.lLength 
        self.pos = 1
    
    def advance(self, endChr = None, endPos = None):
        # DEBUG sys.stderr.write('>>Advancing FASTA to %s:%s\n' % (endChr, endPos))
        if endChr == None and endPos == None:
            self.pos += 1
            if self.pos > self.maxPos - self.bsize:
                self.moveWin()
        elif endChr != None and endPos == None: 
            while self.chrom != endChr and self.chrom != '':
                self.usedChrs.append(self.chrom)
                self.chrom = self.NewChrom()
        elif endPos != None and endChr == None:
            self.pos = endPos
            while self.pos > self.maxPos - self.bsize:
                self.moveWin()
        elif endPos != None and endChr != None:
            while self.chrom != endChr and self.chrom != '':
                self.chrom = self.NewChrom()
            self.pos = endPos
            while self.pos > self.maxPos - self.bsize and self.chrom != '':
                self.moveWin()
        if self.chrom == '':
            return(False)
        else:
            return(True)
    
    def moveWin(self):
        dataSource = self.sourceFile.readline().strip().upper()
        #move the window over one line
        if self.eof == False:
            if dataSource=="":
                self.eof=True
            else:
                inSeq=dataSource
                while len(inSeq) != self.lLength:
                    inSeq+="N"
                self.maxPos+=self.lLength
                self.minPos+=self.lLength
                wPos=self.minPos%self.sizeMax-1
                for base in list(inSeq):
                    try:
                        self.data[wPos]=base
                        wPos+=1
                    except:
                        print('%s\n%s\t%s\t%s\t%s' % (self.data, wPos, base, inSeq, self.lLength))
                        raise
                
            return(True)
        else:
            if self.really == False:
                inSeq="N"*self.lLength
                self.maxPos+=self.lLength
                self.minPos+=self.lLength
                wPos=self.minPos%self.sizeMax-1
                for base in list(inSeq):
                    self.data[wPos]=base
                    wPos+=1 
                self.really=True
            else:
                return(False)
        
    def getSeq(self, pos):
        seq=""
        while int(pos) - 1 + 3 >= self.maxPos:
            fTest=self.moveWin()
            if fTest==False:
                return("")
        for i in range(int(pos) - 1, int(pos) -1 + 3):
            b=(i+self.lLength)%self.sizeMax-1
            if b == -1:
                b = self.sizeMax - 1
            seq+=str(self.data[b])
        #~ print(pos, seq)
        #~ self.debugCtr += 1
        #~ if self.debugCtr == 5:
            #~ exit()
        return(seq)

    def NewChrom(self):
        # DEBUG sys.stderr.write('>>Switching Chromosomes\n')
        # DEBUG sys.stderr.write('>>>>Old Chromosome: %s\n' % self.chrom)
        mvTest=self.sourceFile.readline().strip()
        while ">" not in mvTest and mvTest != "":
            mvTest=self.sourceFile.readline().strip()
        if mvTest != "":
            self.chrom=mvTest.split(">")[1].split(" ")[0]
            self.data = []
            newData = self.sourceFile.readline().strip().upper()
            self.lLength = len(newData)
            self.data.extend(list('N'*self.lLength))
            self.data.extend(list(newData))
            self.minPos=1-self.lLength
            self.maxPos=self.lLength
            self.sizeMax = 2 * self.lLength
        else:
            return('')
        # DEBUG sys.stderr.write('>>>>New Chromosome: %s\n' % self.chrom)
        return(self.chrom)
    
    def close(self):
        self.sourceFile.close()
        return(True)


#~ class mutType:
    #~ def __init__(self, inRef, inMut):
        #~ self.refNT = inRef
        #~ self.mutNT = inMut
    #~ 
    #~ def __str__(self):
        #~ return("%s>%s" % (self.refNT, self.mutNT))


class Mutation:
    def __init__(self, inChrom, inPos, inRef, inMut):
        # DEBUG sys.stderr.write('>>Initializing mutation with MutPos Line %s\n' % inMutPosLine.lineNum)
        self.chrom = inChrom
        self.pos = inPos
        self.Type = "%s>%s" % (inRef, inMut)
        self.context = ''
    
    def setContext(self, fasta):
        # DEBUG sys.stderr.write('>>Getting Context...\n')
        self.context = fasta.getSeq(self.pos)
    
    def __str__(self):
        strToReturn = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % (self.chrom, self.pos, self.Type, self.context)
        return(strToReturn)

def revComp(inSeq):
    revCompTable = {"A": "T", "G": "C", "C": "G", "T": "A", "N": "N"} 
    revSeq = inSeq.upper()[::-1]
    revCompSeq = ""
    for base in revSeq:
        revCompSeq += revCompTable[base]
    return(revCompSeq)

def main():
    parser=ArgumentParser()
    parser.add_argument("--ref", action="store", dest="ref", 
            help="The reference genome in FASTA format", required = True)
    parser.add_argument("--mutpos", action="store", dest="mutpos", 
            help="The mutpos file", required = True)
    parser.add_argument("--label", action="store", dest="label", required = True)
    parser.add_argument("--debug", action="store_true", dest="debug", help="Print out all mutations processed with +strand sequence context.")
    parser.add_argument('-c', '--minClonality', 
                    action = 'store', 
                    type = float,
                    dest = 'minClonal',
                    default = 0,
                    help = 'The minimum clonality at which to examine a mutation. [0]'
                    )
    parser.add_argument('-C', '--maxClonality', 
                    action = 'store', 
                    type = float,
                    dest = 'maxClonal', 
                    default = 1,
                    help = 'The maximum clonality at which to examine a mutation. [1]'
                    )
    o=parser.parse_args()
    
    allFiles = Files(o.ref, o.mutpos, 0, 1, 1)
    allMuts = []
    mutSpect = {
        "C>A":{"ACA":0., "ACC":0., "ACG":0., "ACT":0., "CCA":0., "CCC":0., "CCG":0., "CCT":0., "GCA":0., "GCC":0., "GCG":0., "GCT":0., "TCA":0., "TCC":0., "TCG":0., "TCT":0.}, 
        "C>G":{"ACA":0., "ACC":0., "ACG":0., "ACT":0., "CCA":0., "CCC":0., "CCG":0., "CCT":0., "GCA":0., "GCC":0., "GCG":0., "GCT":0., "TCA":0., "TCC":0., "TCG":0., "TCT":0.}, 
        "C>T":{"ACA":0., "ACC":0., "ACG":0., "ACT":0., "CCA":0., "CCC":0., "CCG":0., "CCT":0., "GCA":0., "GCC":0., "GCG":0., "GCT":0., "TCA":0., "TCC":0., "TCG":0., "TCT":0.},  
        "T>A":{"ATA":0., "ATC":0., "ATG":0., "ATT":0., "CTA":0., "CTC":0., "CTG":0., "CTT":0., "GTA":0., "GTC":0., "GTG":0., "GTT":0., "TTA":0., "TTC":0., "TTG":0., "TTT":0.}, 
        "T>C":{"ATA":0., "ATC":0., "ATG":0., "ATT":0., "CTA":0., "CTC":0., "CTG":0., "CTT":0., "GTA":0., "GTC":0., "GTG":0., "GTT":0., "TTA":0., "TTC":0., "TTG":0., "TTT":0.}, 
        "T>G":{"ATA":0., "ATC":0., "ATG":0., "ATT":0., "CTA":0., "CTC":0., "CTG":0., "CTT":0., "GTA":0., "GTC":0., "GTG":0., "GTT":0., "TTA":0., "TTC":0., "TTG":0., "TTT":0.}
        }
    mutSpectKeys = {"all":["C>A","C>G", "C>T", "T>A", "T>C", "T>G"], "C":["ACA", "ACC", "ACG", "ACT", "CCA", "CCC", "CCG", "CCT", "GCA", "GCC", "GCG", "GCT", "TCA", "TCC", "TCG", "TCT"], "T":["ATA", "ATC", "ATG", "ATT", "CTA", "CTC", "CTG", "CTT", "GTA", "GTC", "GTG", "GTT", "TTA", "TTC", "TTG", "TTT"]}
    
    typeTrans = {"A>C":"T>G","A>G":"T>C","A>T":"T>A","G>A":"C>T","G>C":"C>G","G>T":"C>A"}
    lineNum=0#DEBUG
    print("Processing mutations...")
    for line in allFiles:
        lineNum += 1
        if lineNum % 1000 == 0:
            print('%s mutation sites processed' % lineNum)
        if o.minClonal <= line.clonality <= o.maxClonal:
            newMuts = line.makeMuts()
            for mut in newMuts:
                mut.setContext(allFiles.Fasta)
                if o.debug:
                    print(mut.chrom, mut.pos, mut.Type, mut.context)
                if mut.Type in typeTrans.keys():
                    mutSpect[typeTrans[mut.Type]][revComp(mut.context)] += 1
                else:
                    try:
                        mutSpect[mut.Type][mut.context] += 1
                    except Exception:
                        print(mut.Type, mut.context)
                        print(mut.chrom, mut.pos)
                        print("fasta dump")
                        print("Position: ", allFiles.Fasta.chrom, allFiles.Fasta.pos)
                        print(allFiles.Fasta.minPos, allFiles.Fasta.maxPos)
                        print("Line Data: ", allFiles.Fasta.lLength)
                        print(allFiles.Fasta.data)
                        raise
    
    allCounts = [0]
    allLabels = ['']
    metaLabels = ['']
    print("Preparing data table...")
    for mutType in mutSpectKeys["all"]:
        for mutKey in mutSpectKeys[mutType.split(">")[0]]:
            allCounts.append(mutSpect[mutType][mutKey])
            allLabels.append(mutKey)
            metaLabels.append(mutType)
        allCounts.append(0)
        allLabels.append('')
        metaLabels.append('')
    totalCounts = sum(allCounts)
    dataFile = open("%s.mcs.dat.txt" % o.label, "w")
    dataFile.write("Type\tContext\tCount\tProportion")
    for ind in xrange(len(allCounts)):
        dataFile.write("\n%s\t%s\t%s\t" % (metaLabels[ind],allLabels[ind],allCounts[ind]))
        allCounts[ind] /= totalCounts
        dataFile.write("%s" % allCounts[ind])
    dataFile.close()

    colTrans = {"C>A":'c', "C>G":'0.2', "C>T":'r', "T>A":'0.75', "T>C":'g', "T>G":'m', "":'w'}
    print("Building Figure...")
    plt.figure(figsize=(11,3))
    ind=np.arange(len(allCounts))
    width = 1
    rects = plt.bar(ind, allCounts, width, color=[colTrans[x] for x in metaLabels])
    plt.ylabel("Proportion of Mutations")
    plt.title("Mutation Spectrum: %s" % o.label)
    plt.xticks(ind+width/2., allLabels, rotation='vertical', fontsize=7)
    plt.xlim([0, ind.size])
    plt.yticks(fontsize=7)
    legendCreator = []
    for mutType in mutSpectKeys["all"]:
        legendCreator.append(plt.Rectangle((0,0), 1, 1, fc=colTrans[mutType]))
    plt.figlegend(legendCreator, mutSpectKeys["all"], loc=5, fontsize=7)
    print("Saving...")
    plt.savefig("%s.mcs.png" % o.label)
    

    allFiles.close()

if __name__ == "__main__":
    main()


(b) Script to generate amino acid change data
#Note: 
#1. BED file of human mitochondrial protein coding genes is required as an input for this script. The file should be generated as a standard BED file format with following information: chrom, chromStartm chromEnd, name, score, and strand. More detailed instruction can be found from https://genome.ucsc.edu/FAQ/FAQformat.html#format1
#2. Sequence of each human mitochondrial protein coding genes is found from http://www.mitomap.org/bin/view.pl/MITOMAP/HumanMitoCode (same information can be found from http://www.mitomap.org/bin/view.pl/MITOMAP/HumanMitoCode).
#3. Amino acids and their correspoinding DNA codons table is required as a input in transtable format. The table for human mitochondrial DNA is provided at the end of this document (file name of an example table is VertebrateMito.transtable).
#4. A mutpos file is required as an input for this script. More information about mutpos file and the script that generates mutpos file (mut-position.py) can be find from https://github.com/loeblab/Duplex-Sequencing/blob/master/mut-position.p


# MutationConsequences.py
# Version 1.12
# Brendan Kohrn
# Last updated on 07/21/2014
#
# Output line:
# chrom, pos, direction, refNt, Compact NT, gene name, AA#, refAA, TmutAA, CmutAA, GmutAA, AmutAA, aaCompact
#
# An AA of ref means that this is the reference AA, an AA of NP means there is no mutation of that type, and an AA of NC means the region is non-coding (as determined from the bed file).  


import sys
import re
import collections
from collections import defaultdict
import argparse
from argparse import ArgumentParser


class Files:
    def __init__(self, inBed, inFasta, inCode, inMutPos, outFileName, minC, maxC):
        self.lineNum = 0
        tmpMutPos = open(inMutPos, 'r')
        self.MutPos = mutPosFile(tmpMutPos)
        tmpFasta = open(inFasta, 'r')
        self.Fasta = fastaWin(tmpFasta)
        #self.Fasta.advance(endChr = self.MutPos.chrom, endPos = self.MutPos.pos)
        tmpBed = open(inBed, 'r')
        self.Bed = BedFile(tmpBed)
        #self.Bed.move(endChr = self.MutPos.chrom, endPos = self.MutPos.pos)
        tmpTable = open(inCode, 'r')
        self.Table = TransTable(tmpTable)
        self.OutFile = outputFile(outFileName)
        self.minClonality = minC
        self.maxClonality = maxC
    
    def __iter__(self):
        return(self)
    
    def next(self):
        endTest = False
        while endTest == False:
            if self.MutPos.line.fileEnd == False:
                endTest = self.MutPos.next()
            else:
                break
        if self.MutPos.line.fileEnd == False:
            fastaTest = self.Fasta.advance(endChr = self.MutPos.chrom, endPos = self.MutPos.pos)
            if fastaTest == False:
                sys.stderr.write("End of fasta file!\n")
                raise StopIteration
            self.Bed.move(endChr = self.MutPos.chrom, endPos = self.MutPos.pos)
            self.lineNum += 1
            if self.lineNum % 10000 == 0:
                print('%s processed' % self.lineNum)
            return(self.MutPos.line)
        else:
            raise StopIteration
    
    def close(self):
        self.MutPos.close()
        self.Fasta.close()
        self.OutFile.close()


class mutPosFile:
    def __init__(self, inFile):
        self.file = inFile
        self.chrom = ""
        self.pos = 1
        self.line = mutPosLine("-",0)
        self.lineNum = 0
        #self.next()
    
    def next(self):
        self.lineNum += 1
        self.line = mutPosLine(self.file.readline(), self.lineNum)
        if self.line.fileEnd == False:
            while self.line.fileEnd == False and self.line.Ts == 0 and self.line.Cs == 0 and self.line.Gs == 0 and self.line.As == 0:
                self.lineNum += 1
                self.line = mutPosLine(self.file.readline(), self.lineNum)
            if self.line.fileEnd == False:
                self.pos = self.line.pos
                self.chrom = self.line.chrom
            return(True)
        else:
            return(False)
    
    def __str__(self):
        return(str(self.line))
    
    def close(self):
        self.file.close()
        return(True)


class mutPosLine:
    def __init__(self, inLine, inLineNum):
        if inLine == "-":
            self.fileEnd = False
            self.lineNum = -1
            linebins = inLine.split()
            self.chrom = ''
            self.refBase = ''
            self.pos = -1
            self.depth = -1
            self.muts = -1
            self.Ts = -1
            self.Cs = -1
            self.Gs = -1
            self.As = -1
            self.ins = -1
            self.dels = -1
            #self.Ns = -1
            #self.clonalDepth = self.depth - self.Ns
            self.clonality = 0
        elif inLine == "":
            self.fileEnd = True
            self.lineNum = -1
            linebins = inLine.split()
            self.chrom = ''
            self.refBase = ''
            self.pos = -1
            self.depth = -1
            self.muts = -1
            self.Ts = -1
            self.Cs = -1
            self.Gs = -1
            self.As = -1
            self.ins = -1
            self.dels = -1
            #self.Ns = -1
            #self.clonalDepth = self.depth - self.Ns
            self.clonality = 0
        else:
            self.lineNum = inLineNum
            linebins = inLine.split()
            self.fileEnd = False
            self.chrom = linebins[0]
            self.refBase = linebins[1].upper()
            self.pos = int(linebins[2])
            self.depth = int(linebins[3])
            self.muts = int(linebins[4])
            self.Ts = int(linebins[5])
            self.Cs = int(linebins[6])
            self.Gs = int(linebins[7])
            self.As = int(linebins[8])
            self.ins = int(linebins[9])
            self.dels = int(linebins[10])
            #self.Ns = int(linebins[11])
            #self.clonalDepth = self.depth - self.Ns
            self.clonality = max(float(self.Ts)/self.depth, float(self.Cs)/self.depth, float(self.Gs)/self.depth, float(self.As)/self.depth)
    
    def __str__(self):
        return('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % (self.lineNum, self.chrom, self.refBase, self.pos, self.depth, self.Ts, self.Cs, self.Gs, self.As, self.ins, self.dels))


class outputFile:
    def __init__(self, outFileLoc):
        self.fileLoc = outFileLoc
        self.outFile = None
        self.firstLine = True
    
    def open(self):
        self.outFile = open(self.fileLoc, 'w')
        return(True)
    
    def write(self, strToWrite):
        if self.firstLine == True:
            self.outFile.write("%s" % (strToWrite))
            self.firstLine = False
        else:
            self.outFile.write("\n%s" % (strToWrite))
        return(True)
    
    def close(self):
        self.outFile.close()
        return(True)
    

class TransTable:
    def __init__(self, inTable):
        self.table = {}
        self.name = ""
        self.file = inTable
        
        for line in self.file:
            if ">>" in line:
                self.name = line.strip().strip(">>")
            else:
                linebins = line.split()
                codons = linebins[1].split(",")
                for codon in codons:
                    self.table[codon] = linebins[0]
        self.file.close()
    
    def translateCodon(self, codon):
        if "N" not in codon:
            return(self.table[codon])
        else:
            return("NAC")


class ROI:
    def __init__(self, inLine):
        linebins = inLine.split()
        self.chrom = linebins[0]
        self.start = int(linebins[1])
        self.stop = int(linebins[2])
        self.name = linebins[3]
        self.strand = linebins[5]
        
    def checkCoord(self, inCoord): # write for reverse mapping as well
        #posTrans = {1:0, 0:2, 2:1}
        codonStar = -1
        codonPos = -1
        AAnum = -1
        if self.strand == "+":
            codonStart = self.start + 3 * ((inCoord - self.start)/3)
            codonPos = (inCoord - self.start) % 3
            AAnum = ((inCoord - self.start)/3) + 1
        elif self.strand == "-":
            codonStart = self.stop - 3 * ((self.stop - inCoord)/3)
            codonPos = (self.stop - inCoord) % 3
            AAnum = ((self.stop - inCoord)/3) + 1
        return((codonStart, codonPos, AAnum, self.strand, self.name))


class BedFile:
    def __init__(self, inFile):
        self.file = inFile
        self.ROIs = []
        self.openROIs = []
        self.chrom = ''
        self.pos = 1
        for line in self.file:
            if "#" not in line:
                tmpROI = ROI(line)
                if self.chrom == '':
                    self.chrom = tmpROI.chrom
                self.ROIs.append(tmpROI)
        self.file.close()
    
    def checkCodon(self, mut):
        Coords = []
        for ROI in self.openROIs:
            Coords.append(ROI.checkCoord(mut.pos))
        if Coords == []:
            Coords = [(-1, -1, -1, '.')]
        return(Coords)
        
    def move(self, endChr = None, endPos = None):
        if endChr == None and endPos == None:
            self.pos += 1
            for ROI in self.ROIs:
                if ROI.start <= self.pos <= ROI.stop and ROI not in self.openROIs:
                    self.openROIs.append(ROI)
            indToPop = []
            for index in xrange(len(self.openROIs)):
                if self.openROIs[index].start > self.pos or self.openROIs[index].stop < self.pos:
                    indToPop.append(index)
            for index in indToPop:
                self.openROIs.pop(index)
        
        elif endChr != None and endPos == None: 
            self.chrom = endChr
            self.openROIs = []
            self.pos = 1
            for ROI in self.ROIs:
                if ROI.start <= self.pos <= ROI.stop and ROI.chrom == self.chrom:
                    self.openROIs.append(ROI)
        
        elif endPos != None and endChr == None:
            self.pos = endPos
            for ROI in self.ROIs:
                if ROI.start <= self.pos <= ROI.stop and ROI not in self.openROIs:
                    self.openROIs.append(ROI)
            indToPop = []
            for index in xrange(len(self.openROIs)):
                if self.openROIs[index].start > self.pos or self.openROIs[index].stop < self.pos:
                    indToPop.append(index)
            for index in indToPop:
                self.openROIs.pop(index)
        
        elif endPos != None and endChr != None:
            self.chrom = endChr
            self.openROIs = []
            self.pos = endPos
            for ROI in self.ROIs:
                if ROI.start <= self.pos <= ROI.stop and ROI.chrom == self.chrom:
                    self.openROIs.append(ROI)


class fastaWin:
    """
    This class enables most of the oporations carried out by the 
    program; it handles the rolling window and sequence retrieval 
    sections.  
    """
    
    def __init__(self, dataSource, bufferSize = 3):
        self.sourceFile = dataSource
        
        ##initialize the window in the first chromosome
        line = self.sourceFile.readline().strip().split(">")[1].split(" ")[0]
        self.chrom = line
        line = self.sourceFile.readline().strip().upper()
        self.sizeMax=2*len(line) #size of the window
        #self.bsize=int(bufferSize) #size of the buffer desired
        self.eof=False #has the end of the file been reached?
        self.really=False #Are you sure?
        #how long is one line in this reference genome anyway?
        self.lLength=len(line) 

        self.data = []
        #do one line of N's to make sure that any mutations near 
        #the begining can be processed
        self.data.extend(list('N'*self.lLength)) 
        self.data.extend(list(line))#load in the first line
        #the leftmost position on the reference genome.  1-indexed
        self.minPos=1-self.lLength 
        #the rightmost position on the reference genome.  1-indexed
        self.maxPos=self.lLength 
        self.pos = 1
    
    def advance(self, endChr = None, endPos = None):
        if endChr == None and endPos == None:
            self.pos += 1
            if self.pos > self.maxPos - 3:
                self.moveWin()
        elif endChr != None and endPos == None: 
            while self.chrom != endChr and self.chrom != '':
                self.usedChrs.append(self.chrom)
                self.chrom = self.NewChrom()
        elif endPos != None and endChr == None:
            self.pos = endPos
            while self.pos > self.maxPos - 3:
                self.moveWin()
        elif endPos != None and endChr != None:
            while self.chrom != endChr and self.chrom != '':
                self.chrom = self.NewChrom()
            self.pos = endPos
            while self.pos > self.maxPos - 3 and self.chrom != '':
                self.moveWin()
        if self.chrom == '':
            return(False)
        else:
            return(True)
        #else:
            #return(False)
    
    def moveWin(self):
        dataSource = self.sourceFile.readline().strip().upper()
        #move the window over one line
        if self.eof == False:
            if dataSource=="":
                self.eof=True
            else:
                inSeq=dataSource
                while len(inSeq) != self.lLength:
                    inSeq+="N"
                self.maxPos+=self.lLength
                self.minPos+=self.lLength
                wPos=self.minPos%self.sizeMax-1
                for base in list(inSeq):
                    self.data[wPos]=base
                    wPos+=1
            return(True)
        else:
            if self.really == False:
                inSeq="N"*self.lLength
                self.maxPos+=self.lLength
                self.minPos+=self.lLength
                wPos=self.minPos%self.sizeMax-1
                for base in list(inSeq):
                    self.data[wPos]=base
                    wPos+=1 
                self.really=True
            else:
                return(False)
        
    def getSeq(self, pos, direction): #Needs rewriting
        seq=""
        if direction == "-":
            for i in range(int(pos)+1-3, int(pos)+1):
                b=(i+self.lLength)%self.sizeMax-1
                if b == -1:
                    b = self.sizeMax - 1
                seq+=str(self.data[b])
            seq = revComp(seq)
        elif direction == "+":
            while int(pos) + 3 >= self.maxPos:
                fTest=self.moveWin()
                if fTest==False:
                    return("")
            for i in range(int(pos), int(pos) + 3):
                b=(i+self.lLength)%self.sizeMax-1
                if b == -1:
                    b = self.sizeMax - 1
                seq+=str(self.data[b])
        return(seq)

    def NewChrom(self):
        # DEBUG sys.stderr.write('>>Switching Chromosomes\n')
        # DEBUG sys.stderr.write('>>>>Old Chromosome: %s\n' % self.chrom)
        mvTest=self.sourceFile.readline().strip()
        while ">" not in mvTest and mvTest != "":
            mvTest=self.sourceFile.readline().strip()
        if mvTest != "":
            self.chrom=mvTest.split(">")[1].split(" ")[0]
            self.data = []
            newData = self.sourceFile.readline().strip().upper()
            self.lLength = len(newData)
            self.data.extend(list('N'*self.lLength))
            self.data.extend(list(newData))
            self.minPos=1-self.lLength
            self.maxPos=self.lLength
            self.sizeMax = 2 * self.lLength
        else:
            return('')
        # DEBUG sys.stderr.write('>>>>New Chromosome: %s\n' % self.chrom)
        return(self.chrom)
    
    def close(self):
        self.sourceFile.close()
        return(True)


class Mutation:
    def __init__(self,inMutPosLine):
        self.chrom = inMutPosLine.chrom
        self.pos = inMutPosLine.pos
        self.refNt = inMutPosLine.refBase
        self.T = False if inMutPosLine.Ts == 0 else True
        self.C = False if inMutPosLine.Cs == 0 else True
        self.G = False if inMutPosLine.Gs == 0 else True
        self.A = False if inMutPosLine.As == 0 else True
        self.name = '.'
        
        self.codonCoords = (0, 0, 0)
        self.direction = "*"
        self.refCodon = ""
        self.mutCodonT = ""
        self.mutCodonC = ""
        self.mutCodonG = ""
        self.mutCodonA = ""
        self.refAA = ""
        self.TmutAA = "NP" if self.refNt != 'T' else "Ref"
        self.CmutAA = "NP" if self.refNt != 'C' else "Ref"
        self.GmutAA = "NP" if self.refNt != 'G' else "Ref"
        self.AmutAA = "NP" if self.refNt != 'A' else "Ref"
        self.syn = 0
        self.nonSyn = 0
        self.ntCompact = '.'
        compactCore = self.refNt + str(self.pos)
        if self.T == True and self.refNt != 'T':
            if self.ntCompact == '.':
                self.ntCompact = compactCore + 'T'
            else:
                self.ntCompact += ',' + compactCore + 'T'
        if self.C == True and self.refNt != 'C':
            if self.ntCompact == '.':
                self.ntCompact = compactCore + 'C'
            else:
                self.ntCompact += ',' + compactCore + 'C'
        if self.G == True and self.refNt != 'G':
            if self.ntCompact == '.':
                self.ntCompact = compactCore + 'G'
            else:
                self.ntCompact += ',' + compactCore + 'G'
        if self.A == True and self.refNt != 'A':
            if self.ntCompact == '.':
                self.ntCompact = compactCore + 'A'
            else:
                self.ntCompact += ',' + compactCore + 'A'
        
        self.aaCompact = '.'
    
    def setCoords(self, inCoords):
        self.codonCoords = inCoords
        self.direction = inCoords[3]
        self.name = inCoords[4]
        if self.direction == "-":
            tmpT = ""
            tmpA = ""
            tmpC = ""
            tmpG = ""
            if self.T:
                tmpT = False if self.A != True else tmpT
                tmpA = True 
            if self.A:
                tmpT = True
                tmpA = False if self.T != True else tmpA
            if self.G:
                tmpG = False if self.C != True else tmpG
                tmpC = True
            if self.C:
                tmpG = True
                tmpC = False if self.G != True else tmpC
            self.T = tmpT if tmpT != "" else self.T
            self.C = tmpC if tmpC != "" else self.C
            self.G = tmpG if tmpG != "" else self.G
            self.A = tmpA if tmpA != "" else self.A
            self.refNt = revComp(self.refNt)
        return(True)
    
    def defineCodons(self, Fasta, Table):
        aaAbrev = {'Ala':'A', 'Arg':'R', 'Asn':'N', 'Asp':'D', 'Cys':'C', 'Gln':'Q', 'Glu':'E', 'Gly':'G', 'His':'H', 'Ile':'I', 'Leu':'L', 'Lys':'K', 'Met':'M', 'Phe':'F', 'Pro':'P', 'Ser':'S', 'Thr':'T', 'Trp':'W', 'Tyr':'Y', 'Val':'V', 'Asx':'B', 'Glx':'Z', 'Ter':'Ter', 'NAC':'NAC'}
        self.refCodon = Fasta.getSeq(self.codonCoords[0], self.direction)
        #if self.direction == '-':
            #self.refCodon = revComp(self.refCodon)
        for index in xrange(len(self.refCodon)):
            if index == self.codonCoords[1]:
                self.mutCodonT += "T" if self.T == True else ""
                self.mutCodonC += "C" if self.C == True else ""
                self.mutCodonG += "G" if self.G == True else ""
                self.mutCodonA += "A" if self.A == True else ""
            else:
                self.mutCodonT += self.refCodon[index] if self.T == True else ""
                self.mutCodonC += self.refCodon[index] if self.C == True else ""
                self.mutCodonG += self.refCodon[index] if self.G == True else ""
                self.mutCodonA += self.refCodon[index] if self.A == True else ""
        self.refAA = Table.translateCodon(self.refCodon)
            
        self.TmutAA = Table.translateCodon(self.mutCodonT) if self.T == True else self.TmutAA
        self.CmutAA = Table.translateCodon(self.mutCodonC) if self.C == True else self.CmutAA
        self.GmutAA = Table.translateCodon(self.mutCodonG) if self.G == True else self.GmutAA
        self.AmutAA = Table.translateCodon(self.mutCodonA) if self.A == True else self.AmutAA
        if self.T and self.refAA == self.TmutAA:
            self.syn += 1
        elif self.T:
            self.nonSyn += 1
        if self.C and self.refAA == self.CmutAA:
            self.syn += 1
        elif self.C:
            self.nonSyn += 1
        if self.G and self.refAA == self.GmutAA:
            self.syn += 1
        elif self.G:
            self.nonSyn += 1
        if self.A and self.refAA == self.AmutAA:
            self.syn += 1
        elif self.A:
            self.nonSyn += 1
        compactStarter = aaAbrev[self.refAA] + str(self.codonCoords[2])
        for elmt in (self.TmutAA, self.CmutAA, self.GmutAA, self.AmutAA):
            if elmt != 'NP' and elmt != 'Ref' and elmt != self.refAA:
                if self.aaCompact == '.':
                    self.aaCompact = compactStarter + aaAbrev[elmt]
                else:
                    self.aaCompact += ',' + compactStarter + aaAbrev[elmt]

    
    def setNonCoding(self):
        self.codonCoords = (0, 0, "NC", '*')
        self.TmutAA = "NC"
        self.CmutAA = "NC"
        self.GmutAA = "NC"
        self.AmutAA = "NC"
        self.refAA = "NC"
    
    def __str__(self):
        strToReturn = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % (self.chrom, self.pos, self.direction, self.refNt, self.ntCompact, self.name, self.codonCoords[2], self.refAA, self.TmutAA, self.CmutAA, self.GmutAA, self.AmutAA, self.aaCompact)
        return(strToReturn)


def revComp(inSeq):
    revCompTable = {"A": "T", "G": "C", "C": "G", "T": "A", "N": "N"} 
    revSeq = inSeq.upper()[::-1]
    revCompSeq = ""
    for base in revSeq:
        revCompSeq += revCompTable[base]
    return(revCompSeq)

def main():
    # Read in the list of arguments from the command line
    parser = ArgumentParser()
    parser.add_argument('-f', '--refrence', 
                        action = 'store', 
                        dest = 'ref', 
                        help = 'The reference genome in FASTA format.', 
                        required=True
                        )
    parser.add_argument('-t', '--table', 
                        action = 'store', 
                        dest = 'table', 
                        help = 'The translation table for the organism.  The first line should be a name for the table preceded by ">>", the second row should contain Met codons, and the third row should contain Ter codons.   All rows other than the first should be: "[Three letter AA abreviation] [DNA codons leading to this AA]".', 
                        required=True
                        )
    parser.add_argument('-p', '--positions', 
                        action = 'store', 
                        dest = 'mutPos', 
                        help = 'The list of where mutations are as a mutpos file.', 
                        required=True
                        )
    parser.add_argument('-b', '--bedFile', 
                        action = 'store', 
                        dest = 'bedFile', 
                        help = 'The locations of various exons (ROIs) as a bed file.', 
                        required=True
                        )
    parser.add_argument('-o', '--outFile', 
                        action = 'store', 
                        dest = 'outFile', 
                        help = 'A name for the output file.', 
                        required=True
                        )
    parser.add_argument('-c', '--minClonality', 
                        action = 'store', 
                        type = float,
                        dest = 'minClonal',
                        default = 0,
                        help = 'The minimum clonality at which to examine a mutation. [0]'
                        )
    parser.add_argument('-C', '--maxClonality', 
                        action = 'store', 
                        type = float,
                        dest = 'maxClonal', 
                        default = 1,
                        help = 'The maximum clonality at which to examine a mutation. [1]'
                        )
    o = parser.parse_args()
    
    # Initialize all files
    allFiles = Files(o.bedFile, o.ref, o.table, o.mutPos, o.outFile, o.minClonal, o.maxClonal)
    # At this point, the list of regions of interest from the bed file, 
    # the begining of the fasta file, the translation table, and
    # the first line of the mutpos file should be read in.  In addition, 
    # all files should be in sync with the first mutation in the mutpos file.  
    
    # Create a list of mutations:
    allMuts = []
    mutsIndex = -1
    
    #Start iterating through the files:
    print('Starting checking mutations...')
    #lineNum=0
    for line in allFiles:
        #lineNum += 1
        #print(lineNum)
        # Check for and process mutations
        if o.minClonal <= line.clonality <= o.maxClonal:
            mutsIndex += 1
            allMuts.append(Mutation(line))
            CodonLocations = allFiles.Bed.checkCodon(allMuts[mutsIndex])
            
            for Codon in CodonLocations:
                if Codon is CodonLocations[0]:
                    if Codon[0:3] != (-1, -1, -1):
                        allMuts[mutsIndex].setCoords(Codon)
                        allMuts[mutsIndex].defineCodons(allFiles.Fasta, allFiles.Table)
                    else:
                        allMuts[mutsIndex].setNonCoding()
                else: 
                    mutsIndex += 1
                    allMuts.append(Mutation(line))
                    if Codon[0:3] != (-1, -1, -1):
                        allMuts[mutsIndex].setCoords(Codon)
                        allMuts[mutsIndex].defineCodons(allFiles.Fasta, allFiles.Table)
                    else:
                        allMuts[mutsIndex].setNonCoding()
    
    # Open the output file
    print('Writing output files...')
    allFiles.OutFile.open()
    totSyn = 0
    totNonSyn = 0
    # Write all mutation records to the output file
    for mut in allMuts:
        totSyn += mut.syn
        totNonSyn += mut.nonSyn
        allFiles.OutFile.write(str(mut))
    # Close the output file
    totalMuts = totSyn + totNonSyn
    allFiles.close()
    print('Summary Statistics:')
    print('%s\tpotential amino acid changes:' % (totalMuts))
    print('%s\tsynonymous mutations' % totSyn)
    print('%s\tnon-synonymous mutations' % totNonSyn)

if __name__ == "__main__":
    main()


(c) Amino acids and their correspoinding codons table for human mitochondrial DNA (VertebrateMito.transtable)
>>VertebrateMito
Met ATG,ATA Start
Ter TAA,TAG,AGA,AGG Stop
Ala	GCT,GCC,GCA,GCG .
Arg	CGT,CGC,CGA,CGG .
Asn	AAT,AAC .
Asp	GAT,GAC .
Cys	TGT,TGC .
Gln	CAA,CAG .
Glu	GAA,GAG .
Gly	GGT,GGC,GGA,GGG .
His	CAT,CAC .
Ile	ATT,ATC .
Leu	TTA,TTG,CTT,CTC,CTA,CTG .
Lys AAA,AAG .
Phe TTT,TTC .
Pro CCT,CCC,CCA,CCG .
Ser TCT,TCC,TCA,TCG,AGT,AGC .
Thr ACT,ACC,ACA,ACG .
Trp TGA,TGG .
Tyr TAT,TAC .
Val GTT,GTC,GTA,GTG .