Source code for cluster.Cluster_Command

from classes.ChewbaccaCommand import ChewbaccaCommand
from Cluster_Program_Crop import Cluster_Program_Crop
from Cluster_Program_Swarm import Cluster_Program_Swarm
from Cluster_Program_Vsearch import Cluster_Program_Vsearch


[docs]class Cluster_Command(ChewbaccaCommand): """Clusters a set of fasta files. This command generates a fasta file of unique sequences (each representing a cluster) and a .groups file. This command also takes an optional .groups file containing replication data from previous commands. If a .groups file is supplied, only one output .groups file is generated (regardless of the number of inputs). **Inputs**: * One or more fasta files to cluster. * Optional: :ref:`.groups` - A list of representative names and the names of their seed \ sequences. You likely have one of these files if you've previously run a \ clustering or dereplication command. **Outputs**: * \*.fasta file - A fasta file with unique sequences and their replication counts. * \*.groups - A :ref:`.groups` **Notes**: * The input fasta file(s) should have been dereplicated before clustering. \ * For a single experiment with multiple fasta files, it is best to merge all input fasta files, dereplicate \ them, then cluster the single merged and dereplicated fasta file. This provides the best OTU groupings. **Example**: :: ./ Data.fasta: >seq1_3 AAAAAAAAAA >seq2_1 ATAAAAAAAA >seq3_1 TTTTTTTTTT >seq4_1 TTTTTTATTT >seq5_1 TTTTTTATCT Data.groups: seq1 seq6 seq1 seq7 ``$ python chewbacca.py cluster_seqs -i Data.fasta -o rslt -g Data.groups`` :: rslt/ Data_clustered_seeds.fasta: >seq1_4 AAAAAAAAAA >seq3_3 TTTTTTTTTT rslt_groups_files/ postcluster_updated.groups: seq3 seq3 seq5 seq4 seq1 seq2 seq1 seq7 seq6 """ supported_programs = [Cluster_Program_Crop, Cluster_Program_Swarm, Cluster_Program_Vsearch ] default_program = Cluster_Program_Swarm command_name = "Cluster"