# -*- html -*- # Recipe example for unpaired amplicon data (pairs are supported). Steps are # run in the order they are given. Recipes are run with the run_recipe script # (type its name to see options) and steps available for recipes are listed # with help_recipe. title = Pig Staph 16S author = Carmen # email = user@company.com site = KU # ------------------------------------------------------------------------- # SFF -> FASTQ CONVERSION # ------------------------------------------------------------------------- title = Conversion to fastq # ------------------------------------------------------------------------- # DE-MULTIPLEXING # ------------------------------------------------------------------------- # Matches one or more primer motifs against the sequences and splits them # into separate files, one for each barcode. The barcode is looked for just # upstream of the motif. title = Pattern de-multiplexing bar-file = XXXXXXXXXXX bar-spacing = 2 bar-quality = 99% primer-file = Pig.primers # ------------------------------------------------------------------------- # BASIC CLEANING # ------------------------------------------------------------------------- title = Sequence cleaning # quality-type = Sanger # keep-outputs = no # --------------------------------------------------------------------- # PRIMER REMOVALS # --------------------------------------------------------------------- # The primer demultiplex step above (see the Test.primers file) removed # the forward primer, but the end of sequence may include the reverse # primer. Here we clip and trim that, title = Reverse 3 end clip pattern-string = CCGTCAATTCMTTTR[1,1,1] AGT pattern-orient = reverse include-match = no search-distance = 100 title = Reverse 3 end trim sequence = ACTYAAAKGAATTGACGG search-distance = 100 minimum-length = 1 minimum-strict = 80% # --------------------------------------------------------------------- # QUALITY TRIM # --------------------------------------------------------------------- # This removes low quality from beginnings and ends. A window of set # length is used, within which a given number of bases must match with # a given minimum quality percentage, title = Start quality trim window-length = 15 window-match = 14 minimum-quality = 97 title = End quality trim window-length = 35 window-match = 34 minimum-quality = 96 # --------------------------------------------------------------------- # LENGTH FILTER # --------------------------------------------------------------------- # Unlike trimming, filtering discards the whole sequence if sequences # are too short after having passed through the steps above, title = Length filter minimum-length = 180 # --------------------------------------------------------------------- # QUALITY FILTER # --------------------------------------------------------------------- # This discards the whole sequence if less than a set percentage has a # quality below a given percentage, title = Quality filter minimum-quality = 95 minimum-strict = 90 # ------------------------------------------------------------------------- # DE-REPLICATION # ------------------------------------------------------------------------- title = Sequence uniqification # keep-outputs = no # ------------------------------------------------------------------------- # CHIMERA FILTER # ------------------------------------------------------------------------- dataset-name = RDP_SSU_domain_3-5-Default title = Chimera filtering word-length = 8 step-length = 4 minimum-score = 30 # debug-output = yes # keep-outputs = no # ------------------------------------------------------------------------- # RDP PROFILE # ------------------------------------------------------------------------- title = RDP similarities output-name = org_seqs dataset-names = RDP_SSU_domain_3-5-Default match-word-length = 8 match-step-length = 2 minimum-base-quality = 95% match-minimum = 70% match-top-range = 1% match-forward = yes match-reverse = no # keep-outputs = no title = RDP taxonomy profiling output-name = org_profile taxonomy-names = RDP_SSU minimum-oligo-count = 1 maximum-ambiguity-depth = 1 map-method = branch_tree match-minimum = 70% match-use-range = 0% score-unclassified-sister = no id-file = XXXXXXXXXX # sequence-outputs = phylum # keep-outputs = no title = RDP taxonomic profiles input-step = organism-taxonomy-profiler output-name = org_profile_rdp taxonomy-minimum-score = 5 zip-outputs = yes