From: Scott C. <sco...@us...> - 2006-12-20 21:32:45
|
Update of /cvsroot/gmod/schema/chado/load/bin In directory sc8-pr-cvs2.sourceforge.net:/tmp/cvs-serv28644/load/bin Modified Files: load_gff3.PLS Log Message: added new script for generating AutoDBI.pm from the dbi.tt2 template, and a new AutoDBI.PL for generating AutoDBI.pm 'automatically'. For load_gff3.PLS, added support for using feature_property ontology before falling back to autocreating a term. Many things need to be fixed in this loader to make it suffiecently 'modern' Index: load_gff3.PLS =================================================================== RCS file: /cvsroot/gmod/schema/chado/load/bin/load_gff3.PLS,v retrieving revision 1.69 retrieving revision 1.70 diff -C2 -d -r1.69 -r1.70 *** load_gff3.PLS 19 Dec 2006 17:57:35 -0000 1.69 --- load_gff3.PLS 20 Dec 2006 21:32:39 -0000 1.70 *************** *** 82,85 **** --- 82,87 ---- Bio::GMOD::Config to get the organism. + --fp_cv (optional, defaults to 'feature_property') Name of the + feature property cv =head1 DESCRIPTION *************** *** 193,197 **** my ( $ORGANISM,$GFFFILE,$UNIQUENAME,$CACHE_SIZE,$FORCE_LOAD, ! $ONTOLOGY,$DBPROFILE ); my ( $progress, $next_update, $linecount ) = ( undef, undef, undef ); #progressbar; --- 195,199 ---- my ( $ORGANISM,$GFFFILE,$UNIQUENAME,$CACHE_SIZE,$FORCE_LOAD, ! $ONTOLOGY,$DBPROFILE, $FP_CV, $SRC_DB ); my ( $progress, $next_update, $linecount ) = ( undef, undef, undef ); #progressbar; *************** *** 199,203 **** ( undef, undef, undef ); my $feature_count = 0; #for cache/flush ! my $cv; my $null_db_id; my %srcfeature = (); --- 201,205 ---- ( undef, undef, undef ); my $feature_count = 0; #for cache/flush ! my ($auto_cv, $fp_cv); my $null_db_id; my %srcfeature = (); *************** *** 287,290 **** --- 289,293 ---- #is this general, or what should really be done here? #parse the dbxref and get the appropriate db_id + #or take commandline arg mapping them out if ( $id && !$dbxref{$id} ) { my ($chado_dbxref) = Chado::Dbxref->find_or_create( *************** *** 443,447 **** else { unless ( defined $cvterm{$tag} ) { ! cache_cvterm($tag); $progress->message("Data with the $tag tag are being placed in the featureprop table"); } --- 446,450 ---- else { unless ( defined $cvterm{$tag} ) { ! cache_cvterm($tag, $fp_cv ? $fp_cv->id : 0); $progress->message("Data with the $tag tag are being placed in the featureprop table"); } *************** *** 800,803 **** --- 803,807 ---- GetOptions( 'organism:s' => \$ORGANISM, + 'srcdb:s' => \$SRC_DB, 'gfffile:s' => \$GFFFILE, 'uniquename:s'=>\$UNIQUENAME, *************** *** 806,809 **** --- 810,814 ---- 'ontology:s' => \$ONTOLOGY, 'dbprofile:s'=> \$DBPROFILE, + 'fp_cv:s' => \$FP_CV, ) or ( system( 'pod2text', $0 ), exit -1 ); *************** *** 825,830 **** --- 830,837 ---- } } + $SRC_DB ||= 'DB:refseq'; $CACHE_SIZE ||= 1000; $ONTOLOGY ||= 'sequence'; + $FP_CV ||= 'feature_property'; die "\nYou must specify a GFF file\n" unless $GFFFILE; *************** *** 869,878 **** Chado::LoadDBI->init(); ! ($cv) = Chado::Cv->search( { name => 'autocreated', } ); ! die "Unable to find a 'autocreated' cv in the cv table; please add one" unless $cv; ($so) = Chado::Cv->search( { name => $ONTOLOGY } ); die "Unable to find $ONTOLOGY in cv table; that is a pretty big problem" unless $so; --- 876,893 ---- Chado::LoadDBI->init(); ! ($auto_cv) = Chado::Cv->search( { name => 'autocreated', } ); ! ($fp_cv) = Chado::Cv->search( ! { ! name => $FP_CV, ! } ! ); ! ! die "Unable to find a 'autocreated' cv in the cv table; please add one" unless $auto_cv; ! warn "No feature property cv found; unknown tags will be put in 'autocreated'" unless $fp_cv; ! ($so) = Chado::Cv->search( { name => $ONTOLOGY } ); die "Unable to find $ONTOLOGY in cv table; that is a pretty big problem" unless $so; *************** *** 882,886 **** qw(description synonym note develops_from part_of gff_file score protein); foreach my $n (@needed_cvterms) { ! cache_cvterm($n); } cache_cvterm('region',$so->id); #make sure to get the SO region term --- 897,901 ---- qw(description synonym note develops_from part_of gff_file score protein); foreach my $n (@needed_cvterms) { ! cache_cvterm($n, $fp_cv ? $fp_cv->id : 0); } cache_cvterm('region',$so->id); #make sure to get the SO region term *************** *** 1296,1301 **** #we need an ontology source check here. GO has an obsolete term for 'protein', but we want the #one from SO. ! ! ( $cvterm{$name} ) = Chado::Cvterm->search( { name => $name, cv_id=> $soid } ) --- 1311,1316 ---- #we need an ontology source check here. GO has an obsolete term for 'protein', but we want the #one from SO. ! if ($soid && !$cvterm{$name} ) { ! ( $cvterm{$name} ) = Chado::Cvterm->search( { name => $name, cv_id=> $soid } ) *************** *** 1303,1306 **** --- 1318,1322 ---- name => ucfirst($name), cv_id=> $soid } ); + } if (!$cvterm{$name} and !$soid) { *************** *** 1313,1317 **** and $cvterm{$name}->isa('Class::DBI::Iterator'); ! if ( !$cvterm{$name} && !$soid ) { unless ($null_db_id) { --- 1329,1333 ---- and $cvterm{$name}->isa('Class::DBI::Iterator'); ! if ( !$cvterm{$name} && $soid != $so->id ) { unless ($null_db_id) { *************** *** 1327,1331 **** { db_id => $null_db_id, ! accession => $cv->name.":".$name, } ); --- 1343,1347 ---- { db_id => $null_db_id, ! accession => $auto_cv->name.":".$name, } ); *************** *** 1334,1338 **** { name => $name, ! cv_id => $cv->id, definition => 'autocreated by gmod_load_gff3.pl', dbxref_id => $dbxref->id, --- 1350,1354 ---- { name => $name, ! cv_id => $auto_cv->id, definition => 'autocreated by gmod_load_gff3.pl', dbxref_id => $dbxref->id, |