# -*- html -*-
# Recipe example for unpaired amplicon data (pairs are supported). Steps are
# run in the order they are given. Recipes are run with the run_recipe script
# (type its name to see options) and steps available for recipes are listed
# with help_recipe.
title = Pig Staph 16S
author = Carmen
# email = user@company.com
site = KU
# -------------------------------------------------------------------------
# SFF -> FASTQ CONVERSION
# -------------------------------------------------------------------------
title = Conversion to fastq
# -------------------------------------------------------------------------
# DE-MULTIPLEXING
# -------------------------------------------------------------------------
# Matches one or more primer motifs against the sequences and splits them
# into separate files, one for each barcode. The barcode is looked for just
# upstream of the motif.
title = Pattern de-multiplexing
bar-file = XXXXXXXXXXX
bar-spacing = 2
bar-quality = 99%
primer-file = Pig.primers
# -------------------------------------------------------------------------
# BASIC CLEANING
# -------------------------------------------------------------------------
title = Sequence cleaning
# quality-type = Sanger
# keep-outputs = no
# ---------------------------------------------------------------------
# PRIMER REMOVALS
# ---------------------------------------------------------------------
# The primer demultiplex step above (see the Test.primers file) removed
# the forward primer, but the end of sequence may include the reverse
# primer. Here we clip and trim that,
title = Reverse 3 end clip
pattern-string = CCGTCAATTCMTTTR[1,1,1] AGT
pattern-orient = reverse
include-match = no
search-distance = 100
title = Reverse 3 end trim
sequence = ACTYAAAKGAATTGACGG
search-distance = 100
minimum-length = 1
minimum-strict = 80%
# ---------------------------------------------------------------------
# QUALITY TRIM
# ---------------------------------------------------------------------
# This removes low quality from beginnings and ends. A window of set
# length is used, within which a given number of bases must match with
# a given minimum quality percentage,
title = Start quality trim
window-length = 15
window-match = 14
minimum-quality = 97
title = End quality trim
window-length = 35
window-match = 34
minimum-quality = 96
# ---------------------------------------------------------------------
# LENGTH FILTER
# ---------------------------------------------------------------------
# Unlike trimming, filtering discards the whole sequence if sequences
# are too short after having passed through the steps above,
title = Length filter
minimum-length = 180
# ---------------------------------------------------------------------
# QUALITY FILTER
# ---------------------------------------------------------------------
# This discards the whole sequence if less than a set percentage has a
# quality below a given percentage,
title = Quality filter
minimum-quality = 95
minimum-strict = 90
# -------------------------------------------------------------------------
# DE-REPLICATION
# -------------------------------------------------------------------------
title = Sequence uniqification
# keep-outputs = no
# -------------------------------------------------------------------------
# CHIMERA FILTER
# -------------------------------------------------------------------------
dataset-name = RDP_SSU_domain_3-5-Default
title = Chimera filtering
word-length = 8
step-length = 4
minimum-score = 30
# debug-output = yes
# keep-outputs = no
# -------------------------------------------------------------------------
# RDP PROFILE
# -------------------------------------------------------------------------
title = RDP similarities
output-name = org_seqs
dataset-names = RDP_SSU_domain_3-5-Default
match-word-length = 8
match-step-length = 2
minimum-base-quality = 95%
match-minimum = 70%
match-top-range = 1%
match-forward = yes
match-reverse = no
# keep-outputs = no
title = RDP taxonomy profiling
output-name = org_profile
taxonomy-names = RDP_SSU
minimum-oligo-count = 1
maximum-ambiguity-depth = 1
map-method = branch_tree
match-minimum = 70%
match-use-range = 0%
score-unclassified-sister = no
id-file = XXXXXXXXXX
# sequence-outputs = phylum
# keep-outputs = no
title = RDP taxonomic profiles
input-step = organism-taxonomy-profiler
output-name = org_profile_rdp
taxonomy-minimum-score = 5
zip-outputs = yes