From: Scott C. <sco...@us...> - 2006-03-22 19:24:26
|
Update of /cvsroot/gmod/schema/chado/bin In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29856 Added Files: gmod_fasta2gff3.pl Log Message: adding a simple utility to take a set of fasta files and convert it to GFF3 --- NEW FILE: gmod_fasta2gff3.pl --- #!/usr/bin/perl -w use strict; use Getopt::Long; use Bio::DB::Fasta; my ($FASTA_DIR, $GFFFILENAME, $TYPE, $SOURCE, $ATTRIBUTES, $NOSEQUENCE); GetOptions( 'fasta_dir=s' => \$FASTA_DIR, 'gfffilename=s' => \$GFFFILENAME, 'type=s' => \$TYPE, 'source=s' => \$SOURCE, 'attributes=s' => \$ATTRIBUTES, 'nosequence' => \$NOSEQUENCE, ) or ( system( 'pod2text', $0 ), exit -1 ); my $fastadir = $FASTA_DIR || './fasta'; my $gfffile = $GFFFILENAME || 'out.gff'; my $type = $TYPE || 'EST'; my $source = $SOURCE || '.'; open OUT, ">", $gfffile or die "couldn't open $gfffile for writing: $!\n"; my $stream = Bio::DB::Fasta->new($fastadir)->get_PrimarySeq_stream; print OUT "##gff-version 3\n"; print OUT "#this file generated from $0\n"; while (my $seq = $stream->next_seq) { my $atts; if ($ATTRIBUTES) { $atts = "ID=".$seq->id.";$ATTRIBUTES"; } else { $atts = "ID=".$seq->id; } print OUT join("\t", $seq->id, $source, $type, 1, $seq->length, ".",".",".", $atts ),"\n"; } if (!$NOSEQUENCE) { print OUT "##FASTA\n"; #reset the seq stream $stream = Bio::DB::Fasta->new($fastadir)->get_PrimarySeq_stream; while (my $seq = $stream->next_seq) { print OUT ">".$seq->id."\n"; print OUT $seq->seq . "\n"; } } close OUT; =pod =head1 NAME $O - Convert FASTA to simple GFF3 =head1 SYNOPSYS % $O [options] =head1 COMMAND-LINE OPTIONS --fasta_dir Directory contain fasta files (default: ./fasta) --gfffilename Name of GFF3 file to be created (default: ./out.gff) --type SO type to assign to each feature (default: EST) --source Text to appear in source column (default: .) --attributes Additional tag=value pairs to appear in column 9 --nosequence Suppress the ##FASTA section (ie, don't print DNA sequences) =head1 DESCRIPTION This script simply takes a collection of fasta files and converts them to simple GFF3 suitable for loading into chado. =head1 AUTHORS Scott Cain E<lt>ca...@cs...E<gt> Copyright (c) 2006 This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut |