Source code for cluster.Cluster_Command

from classes.ChewbaccaCommand import ChewbaccaCommand
from Cluster_Program_Crop import Cluster_Program_Crop
from Cluster_Program_Swarm import Cluster_Program_Swarm
from Cluster_Program_Vsearch import Cluster_Program_Vsearch


[docs]class Cluster_Command(ChewbaccaCommand): """Clusters a set of fasta files. This command generates a fasta file of unique sequences (each representing a cluster) and a .groups file. This command also takes an optional .groups file containing replication data from previous commands. If a .groups file is supplied, only one output .groups file is generated (regardless of the number of inputs). **Inputs**: * One or more fasta files to cluster. * Optional: :ref:`.groups` - A list of representative names and the names of their replicant \ sequences. You likely have one of these files if you've previously run a \ clustering or dereplication command. **Outputs**: * \*.fasta file - A fasta file with unique sequences and their replication counts. * \*.groups - A :ref:`.groups` **Notes**: * The input fasta file(s) should have been dereplicated before clustering. * For a single experiment with multiple fasta files, it is best to merge all input fasta files, dereplicate them, then cluster the single merged and dereplicated fasta file. This provides the best OTU groupings. **Example**: :: ./ Data.fasta: >seq1_3 AAAAAAAAAA >seq2_1 ATAAAAAAAA >seq3_1 TTTTTTTTTT >seq4_1 TTTTTTATTT >seq5_1 TTTTTTATCT Data.groups: seq1 seq6 seq1 seq7 ``$ python chewbacca.py cluster_seqs -i Data.fasta -o rslt -g Data.groups`` :: rslt/ Data_clustered_seeds.fasta: >seq1_4 AAAAAAAAAA >seq3_3 TTTTTTTTTT rslt_groups_files/ postcluster_updated.groups: seq3 seq3 seq5 seq4 seq1 seq2 seq1 seq7 seq6 """ supported_programs = [Cluster_Program_Crop, Cluster_Program_Swarm, Cluster_Program_Vsearch ] default_program = Cluster_Program_Swarm command_name = "Cluster"