##########################################################################################################################################################
##########################################################################################################################################################
##########################################################################################################################################################

The representative reads of the OTUs were compared with 16S rRNA genes of the type strains in the SILVA database

1. 	Go to the search function on the SILVA website (https://www.arb-silva.de/).
2. 	Select strain -> select the cultured type strains with [T] and s[C] -> export the 16S rRNA gene sequences as FASTA
3. 	Format the database to be ready for blastn:
		formatdb -p F -i arb-silva.de_2015-05-19_id256786.fasta
4.	Perform the local Blastn
		blastn -query Mapping_file_Micr_cult_f_otu_database.fa -db arb-silva.de_2015-05-19_id256786.fasta -outfmt 7 -perc_identity 90 -out otus_vs_type_strains.bln
5. Parse the output file (otus_vs_type_strains.bln) with the custom Perl script below

##########################################################################################################################################################
##########################################################################################################################################################
##########################################################################################################################################################

#Custom Perl script:

#!usr/bin/perl -w
use strict;
no strict "refs";

my $rel_ab_threshold = 0.001;

open BLAST_OUT, "otus_vs_type_strains.bln" or die $!;
my %OTUs_of_interest;
my %OTU_to_priority;
my %OTU_to_SILVA_OTU;
my %OTU_to_identity;
my %OTU_to_aln_length;

while (<BLAST_OUT>) {
	next if m/^#/;
	my ($OTU_ID, $otu_ID_SILVA, $identity, $aln_length) = (split("\t", $_))[0,1,2,3];
	if (($identity < 101) && ($aln_length > 30) && (not exists($OTUs_of_interest{$OTU_ID}))) {
		$OTUs_of_interest{$OTU_ID}++;
		$OTU_to_SILVA_OTU{$OTU_ID} = $otu_ID_SILVA;
		$OTU_to_identity{$OTU_ID} = $identity;
		$OTU_to_aln_length{$OTU_ID} = $aln_length;
		}
	}
close BLAST_OUT;

#link OTU ID in blast file to full  annotation by SILVA
my %number_to_SILVA_annotation;
open DATABASE, "arb-silva.de_2015-05-19_id256786.fasta" or die $!;
while (<DATABASE>) {
	next unless m/^>/;
	chomp;
	$_ =~ s/^>//;
	my ($number_id) = (split(' ', $_))[0];
	my $annotation = $_;
	$annotation =~ s/.*?\s//;
	$number_to_SILVA_annotation{$number_id} = $annotation;
}
close DATABASE;




#Retrieve sequences from the selected OTUs from the relevant file

my %OTU_to_sequence;
open SEQUENCE_DATABASE, "Mapping_files_micr_cult_all_fw_otu_database" or die $!;
while (<SEQUENCE_DATABASE>) {
	chomp;
	my ($sequence, $otu) = (split("\t", $_));
	next unless exists($OTUs_of_interest{$otu});
	$OTU_to_sequence{$otu} = $sequence;
}
close SEQUENCE_DATABASE;

my %OTU_to_SILVA_annotation;
open TAX_INFO, "Mapping_files_micr_cult_all_fw_tax_database" or die $!;
while (<TAX_INFO>) {
	chomp;
	my ($otu, $annotation) = (split("\t"))[0,1];
	next unless exists($OTUs_of_interest{$otu});
	$OTU_to_SILVA_annotation{$otu} = $annotation;
}
close TAX_INFO;

#obtain the info of all the samples from the mapping file
open MAPPING_FILE, "Mapping_files_micr_cult_all_fw.txt" or die $!;
my %sampleID_to_no_of_colonies;
my %sample_ID_to_medium;
my %sample_ID_to_sample_number;
my %sampleID_to_duplicate;
my %sampleID_to_dilution;
while (<MAPPING_FILE>) {
	chomp;
	next if m/^#/;
	next unless m/[1-9]/;
	my ($sampleID, $no_of_colonies, $medium, $sample_number, $duplicate, $dilution) = (split("\t", $_))[0,6,7,8,9,10];
	$sampleID_to_no_of_colonies{$sampleID} = $no_of_colonies;
	$sample_ID_to_medium{$sampleID} = $medium;
	$sample_ID_to_sample_number{$sampleID} = $sample_number;
	$sampleID_to_duplicate{$sampleID} = $duplicate;
	$sampleID_to_dilution{$sampleID} = $dilution;
}
close MAPPING_FILE;



#Now the IDs for all samples will be retrieved from the tax file based on the OTU sequence
#For each sample the relative abundance data are put in hashed if they are most wanted OTUs

my %info_all_hash;


foreach  my $tax_file (keys %sampleID_to_no_of_colonies) {
$tax_file = $tax_file.'_tax_file';
my $distribution_file = $tax_file;
$distribution_file =~ s/_tax_file/_total_sample_distribution/;
open TAX_FILE, "$tax_file" or die $!;
my %general_OTU_ID_to_sample_OTU_ID;
my %sample_OTU_ID_to_general_OTU_ID;
$/ = ">";
while (<TAX_FILE>) {
	next if not m/G|A|T|C/;
	chomp;
	foreach my $otu (keys %OTU_to_sequence) {
		my $sequence = $_;
		$sequence =~ m/.*?([GATC]+$)/;
		$sequence = $1;
		next if not ($OTU_to_sequence{$otu} eq $sequence);
		my $ID = $_;
		$ID =~ m/(.*?)\t.*/;
		$ID = $1;
		$general_OTU_ID_to_sample_OTU_ID{$otu} = $ID;
		$sample_OTU_ID_to_general_OTU_ID{$ID} = $otu;
	}
}
close TAX_FILE;
$/ = "\n";



my @sample_OTU_IDs = keys %sample_OTU_ID_to_general_OTU_ID;

#Retrieve relative abudance data from the total distribution files

open DISTRIBUTION, "$distribution_file" or die $!;
while (<DISTRIBUTION>) {
	chomp;
	my ($rel_ab, $sample_ID) = (split("\t"))[0,6];
	#next if $rel_ab < $rel_ab_threshold;
	next unless (grep(/^$sample_ID$/,@sample_OTU_IDs));
	$info_all_hash{($sample_OTU_ID_to_general_OTU_ID{$sample_ID})}->{$tax_file} = $rel_ab;
}
close DISTRIBUTION;
}

foreach my $OTU_ID (keys %info_all_hash) {
	foreach my $sample_ID (keys %{$info_all_hash{$OTU_ID}}) {
		print $OTU_ID."\t".$OTU_to_SILVA_OTU{$OTU_ID}."\t".$OTU_to_identity{$OTU_ID}."\t".$OTU_to_aln_length{$OTU_ID}."\t".$sample_ID."\t".$info_all_hash{$OTU_ID}{$sample_ID}."\t".$OTU_to_SILVA_annotation{$OTU_ID}."\t".$number_to_SILVA_annotation{$OTU_to_SILVA_OTU{$OTU_ID}}."\n";
	}
}

##########################################################################################################################################################
##########################################################################################################################################################
##########################################################################################################################################################