From: <pi...@us...> - 2003-07-18 22:00:54
|
Update of /cvsroot/gmod/schema/XMLTools/XORT/Loader In directory sc8-pr-cvs1:/tmp/cvs-serv3319/Loader Modified Files: XMLParser.pm XMLValidator.pm XMLValidatorNoDB.pm Log Message: Index: XMLParser.pm =================================================================== RCS file: /cvsroot/gmod/schema/XMLTools/XORT/Loader/XMLParser.pm,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** XMLParser.pm 10 Jun 2003 15:01:06 -0000 1.5 --- XMLParser.pm 18 Jul 2003 22:00:51 -0000 1.6 *************** *** 8,11 **** --- 8,12 ---- use XORT::Util::DbUtil::DB; use strict; + use XORT::Loader::XMLAccession; # This one is modified on 1/29/2003 for the dtd: chado_1.0.dtd *************** *** 39,42 **** --- 40,44 ---- my $element_name; my $table_name; + my $db; my $dbh_obj; my %hash_table_col; *************** *** 80,83 **** --- 82,87 ---- my $ATTRIBUTE_REF='ref'; + my $DEBUG=0; + # for some elements, it will be ignored, i.e view, and _app_data, # algorithms to filter out ignore elements: initiately P_pseudo set to -1, for tables_pseudo, increase by 1 at beginning of start_element, decrease by 1 at end of end_element *************** *** 104,107 **** --- 108,113 ---- $self->{'db'}=shift; $self->{'file'}=shift; + $db=$self->{db}; + $DEBUG=shift; #load the properties file my $pro=XORT::Util::GeneralUtil::Properties->new('ddl'); *************** *** 112,119 **** foreach my $value(@array_pseudo){ $hash_tables_pseudo{$value}=1; ! print "\npseudo:$value"; } print "\n start to parse xml file ....."; --- 118,141 ---- foreach my $value(@array_pseudo){ $hash_tables_pseudo{$value}=1; ! print "\npseudo:$value" if ($DEBUG==1); } + # under all thos hash and arrary, otherwise, it will intervense for batch executing + undef $level; + undef %hash_table_col; + undef %hash_id; + undef @AoH_data; + undef @AoH_data_new; + undef @AoH_db_id; + undef @AoH_local_id; + undef @AoH_op; + undef @AoH_ref; + undef %hash_level_id; + undef %hash_level_name; + undef %hash_level_op; + undef %hash_level_ref; + undef %hash_level_sub_detect; + print "\n start to parse xml file ....."; *************** *** 133,141 **** } my $file=$self->{file}; ! my $db=$self->{db}; my $dbh_pro=XORT::Util::GeneralUtil::Properties->new($db); my %dbh_hash=$dbh_pro->get_dbh_hash(); $dbh_obj=XORT::Util::DbUtil::DB->_new(\%dbh_hash) ; ! $dbh_obj->open(); # $dbh_obj->set_autocommit(); --- 155,163 ---- } my $file=$self->{file}; ! $db=$self->{db}; my $dbh_pro=XORT::Util::GeneralUtil::Properties->new($db); my %dbh_hash=$dbh_pro->get_dbh_hash(); $dbh_obj=XORT::Util::DbUtil::DB->_new(\%dbh_hash) ; ! $dbh_obj->open(); # $dbh_obj->set_autocommit(); *************** *** 188,196 **** } elsif (!(-e $log_file) && ($recovery_status eq '1' || $recovery_status ==1)) { ! print "\n are you sure you have run this before ?\nif first time parsing, please set the is_recovery=>0\n"; exit(1); } else { ! print "\nIf you parse this xml file from the beginning, you can safely delete this file:\n"; system("delete $log_file"); --- 210,218 ---- } elsif (!(-e $log_file) && ($recovery_status eq '1' || $recovery_status ==1)) { ! print "\n are you sure you have run this before ?\nif first time parsing, please set the is_recovery=>0\n" if ($DEBUG==1); exit(1); } else { ! print "\nIf you parse this xml file from the beginning, you can safely delete this file:\n" if ($DEBUG==1); system("delete $log_file"); *************** *** 207,211 **** $hash_level_sub_detect{$level}=1; $element_name=$element->{'Name'}; ! print "\nstart_element:$element_name"; # here to check whether it is ELEMENT_pseudo --- 229,233 ---- $hash_level_sub_detect{$level}=1; $element_name=$element->{'Name'}; ! print "\nstart_element:$element_name" if ($DEBUG==1); # here to check whether it is ELEMENT_pseudo *************** *** 234,241 **** $hash_level_id{$level}=$local_id; $AoH_local_id[$level]{$element_name}=$local_id; } else { ! delete $hash_level_id{$level}; ! delete $AoH_local_id[$level]{$element_name}; } if ($op && $op ne ''){ --- 256,267 ---- $hash_level_id{$level}=$local_id; $AoH_local_id[$level]{$element_name}=$local_id; + if ($local_id eq 'dbxref_49378'){ + # &create_log(\%hash_trans, \%hash_id, $log_file); + # exit(1); + } } else { ! delete $hash_level_id{$level}; ! delete $AoH_local_id[$level]{$element_name}; } if ($op && $op ne ''){ *************** *** 261,265 **** $hash_level_ref{$level}=$ref; $AoH_ref[$level]{$element_name}=$ref; ! print "\nref for this element:$element_name is :$ref"; } else { --- 287,291 ---- $hash_level_ref{$level}=$ref; $AoH_ref[$level]{$element_name}=$ref; ! print "\nref for this element:$element_name is :$ref" if ($DEBUG==1); } else { *************** *** 291,295 **** $table_name=$element_name; if ( defined $hash_ddl{$hash_level_name{$level-1}}){ ! print "\nstart to output the module table:$hash_level_name{$level-1}, level:$level before parse sub table:$table_name"; my $hash_data_ref; --- 317,321 ---- $table_name=$element_name; if ( defined $hash_ddl{$hash_level_name{$level-1}}){ ! print "\nstart to output the module table:$hash_level_name{$level-1}, level:$level before parse sub table:$table_name" if ($DEBUG==1); my $hash_data_ref; *************** *** 309,312 **** --- 335,341 ---- $hash_data_ref=&_get_ref_data($element_name, $temp_db_id ); } + else { + print "\nunable to retrieve record for this ref:$AoH_ref[$level-1]{$hash_level_name{$level-1}}"; + } } } *************** *** 320,324 **** } if ($#temp >-1 ){ ! #print "\nthere is data for main module table:$hash_level_name{$level-1}"; my $hash_ref=&_data_check($hash_data_ref, $hash_level_name{$level-1}, $level, \%hash_level_id, \%hash_level_name ); --- 349,353 ---- } if ($#temp >-1 ){ ! #print "\nthere is data for main module table:$hash_level_name{$level-1}" if ($DEBUG==1); my $hash_ref=&_data_check($hash_data_ref, $hash_level_name{$level-1}, $level, \%hash_level_id, \%hash_level_name ); *************** *** 336,340 **** } else { ! print "\nyou try to update a record which not exist in db yet"; &create_log(\%hash_trans, \%hash_id, $log_file); exit(1); --- 365,369 ---- } else { ! print "\nyou try to update a record which not exist in db yet" ; &create_log(\%hash_trans, \%hash_id, $log_file); exit(1); *************** *** 357,361 **** elsif ($hash_level_op{$level-1} eq 'insert'){ $db_id=$dbh_obj->db_insert(-data_hash=>$hash_ref, -table=>$hash_level_name{$level-1},-hash_local_id=>\%hash_id, -hash_trans=>\%hash_trans, -log_file=>$log_file); ! print "\ndb_id:$db_id:"; #save the pair of local_id/db_id if (defined $db_id && defined $AoH_local_id[$level-1]{$hash_level_name{$level-1}}){ --- 386,390 ---- elsif ($hash_level_op{$level-1} eq 'insert'){ $db_id=$dbh_obj->db_insert(-data_hash=>$hash_ref, -table=>$hash_level_name{$level-1},-hash_local_id=>\%hash_id, -hash_trans=>\%hash_trans, -log_file=>$log_file); ! print "\ndb_id:$db_id:" if ($DEBUG==1); #save the pair of local_id/db_id if (defined $db_id && defined $AoH_local_id[$level-1]{$hash_level_name{$level-1}}){ *************** *** 399,403 **** foreach my $i(0..$#array_col){ $hash_table_col{$array_col[$i]}=1; ! # print "\ncol:$array_col[$i]"; } --- 428,432 ---- foreach my $i(0..$#array_col){ $hash_table_col{$array_col[$i]}=1; ! # print "\ncol:$array_col[$i]" if ($DEBUG==1); } *************** *** 413,421 **** elsif ( $element_name ne $root_element ) { ! print "\ntable:$hash_level_name{$level-1}:\tcolumn:$element_name"; my $col_ref=&_get_table_columns($hash_level_name{$level-1}); #not column element name if (!(exists $col_ref->{$element_name})){ ! print "\n invalid element...... element:$element_name"; print "\ntable:$hash_level_name{$level-1}:\tcolumn:$element_name"; &create_log(\%hash_trans, \%hash_id, $log_file); --- 442,450 ---- elsif ( $element_name ne $root_element ) { ! print "\ntable:$hash_level_name{$level-1}:\tcolumn:$element_name" if ($DEBUG==1); my $col_ref=&_get_table_columns($hash_level_name{$level-1}); #not column element name if (!(exists $col_ref->{$element_name})){ ! print "\n invalid element...... element:$element_name" ; print "\ntable:$hash_level_name{$level-1}:\tcolumn:$element_name"; &create_log(\%hash_trans, \%hash_id, $log_file); *************** *** 499,508 **** } else { ! print "\nTry to update a column which the op for table is not update....."; &create_log(\%hash_trans, \%hash_id , $log_file ); exit(1); } } ! #print "\n\nin characters key:$key\tvalue:$AoH_data[$level]{$key}:\tlevel:$level"; --- 528,537 ---- } else { ! print "\nTry to update a column which the op for table is not update....." ; &create_log(\%hash_trans, \%hash_id , $log_file ); exit(1); } } ! #print "\n\nin characters key:$key\tvalue:$AoH_data[$level]{$key}:\tlevel:$level" if ($DEBUG==1); *************** *** 527,535 **** my $hash_ref; ! print "\nend_element_name:$element_name"; # come to end of document if ($element_name eq $root_element){ ! print "\n\nbingo ....you success !...."; ! exit(1); } --- 556,565 ---- my $hash_ref; ! print "\nend_element_name:$element_name" if ($DEBUG==1); # come to end of document if ($element_name eq $root_element){ ! print "\n\nbingo ....you success !...." ; ! #$dbh_obj->close(); ! return; } *************** *** 599,604 **** $AoH_db_id[$level]{$element_name}=$db_id; } ! print "\nend_element:$element_name is table element, and sub element is col of this table"; ! print "\nlocal_id:$AoH_local_id[$level]{$element_name}:\tdb_id:$db_id:"; } } # end of if defined hash_data_ref, --- 629,634 ---- $AoH_db_id[$level]{$element_name}=$db_id; } ! print "\nend_element:$element_name is table element, and sub element is col of this table" if ($DEBUG==1); ! print "\nlocal_id:$AoH_local_id[$level]{$element_name}:\tdb_id:$db_id:" if ($DEBUG==1); } } # end of if defined hash_data_ref, *************** *** 606,610 **** elsif (defined $AoH_ref[$level]{$hash_level_name{$level}} && !(defined $hash_data_ref)){ my $hash_id_key=$element_name.":".$AoH_ref[$level]{$hash_level_name{$level}}; ! if (defined $hash_id{$hash_id_key}){ $hash_data_ref=&_get_ref_data($element_name, $hash_id{$hash_id_key}); --- 636,640 ---- elsif (defined $AoH_ref[$level]{$hash_level_name{$level}} && !(defined $hash_data_ref)){ my $hash_id_key=$element_name.":".$AoH_ref[$level]{$hash_level_name{$level}}; ! print "\nin case using ref attribuate to ref object, ref:$AoH_ref[$level]{$hash_level_name{$level}}" if ($DEBUG==1); if (defined $hash_id{$hash_id_key}){ $hash_data_ref=&_get_ref_data($element_name, $hash_id{$hash_id_key}); *************** *** 615,618 **** --- 645,653 ---- $hash_data_ref=&_get_ref_data($element_name, $temp_db_id ); } + else { + print "\nunable to retrieve the record based on the ref:$AoH_ref[$level]{$hash_level_name{$level}}"; + &create_log(\%hash_trans, \%hash_id, $log_file); + exit(1); + } } *************** *** 663,668 **** $AoH_db_id[$level]{$element_name}=$db_id; } ! print "\nend_element is $element_name table element, and sub element is col of this table"; ! print "\nlocal_id:$AoH_local_id[$level]{$element_name}:\tdb_id:$db_id:"; } # end of if (%hash_data_temp) } # end of using ref attribute to refer object --- 698,703 ---- $AoH_db_id[$level]{$element_name}=$db_id; } ! print "\nend_element is $element_name table element, and sub element is col of this table" if ($DEBUG==1); ! print "\nlocal_id:$AoH_local_id[$level]{$element_name}:\tdb_id:$db_id:" if ($DEBUG==1); } # end of if (%hash_data_temp) } # end of using ref attribute to refer object *************** *** 691,695 **** $AoH_data[$level-1]{$key}=$AoH_db_id[$level]{$element_name}; } ! print "\nsubstitute it with db_id:$AoH_db_id[$level]{$element_name}:level:$level-1:key:$key:"; } } --- 726,730 ---- $AoH_data[$level-1]{$key}=$AoH_db_id[$level]{$element_name}; } ! print "\nsubstitute it with db_id:$AoH_db_id[$level]{$element_name}:level:$level-1:key:$key:" if ($DEBUG==1); } } *************** *** 699,703 **** my $key=$hash_level_name{$level-1}.".".$element_name; my $primary_table=$hash_ddl{$temp_foreign}; ! print "\n$element_name is column_element"; #if is foreign key, and next level element is the primary table, it has done in last step, ie. <type_id><cvterm>...</cvterm></type_id> if ($hash_ddl{$temp_foreign} eq $hash_level_name{$level+1} && defined $hash_ddl{$temp_foreign} ne '' && (defined $hash_level_sub_detect{$level+1})){ --- 734,738 ---- my $key=$hash_level_name{$level-1}.".".$element_name; my $primary_table=$hash_ddl{$temp_foreign}; ! print "\n$element_name is column_element" if ($DEBUG==1); #if is foreign key, and next level element is the primary table, it has done in last step, ie. <type_id><cvterm>...</cvterm></type_id> if ($hash_ddl{$temp_foreign} eq $hash_level_name{$level+1} && defined $hash_ddl{$temp_foreign} ne '' && (defined $hash_level_sub_detect{$level+1})){ *************** *** 729,740 **** } elsif(defined $hash_accession_entry{$primary_table}) { my $id=&_get_accession($AoH_data[$level]{$key}, $primary_table, $level); ! if ($id){ $AoH_data[$level]{$key}=$id; $hash_id{$hash_id_key}=$id; } else { ! print "\n$element_name: can't retrieve the id based on the accession:$AoH_data[$level]{$key}"; ! print "\nor correct format for accesion, but op for table:$hash_level_name{$level-1} is 'delete', and record for this accesion is not in db yet"; &create_log(\%hash_trans, \%hash_id, $log_file); exit(1); --- 764,776 ---- } elsif(defined $hash_accession_entry{$primary_table}) { + print "\nhas value:$AoH_data[$level]{$key},not in hash_id" if ($DEBUG==1); my $id=&_get_accession($AoH_data[$level]{$key}, $primary_table, $level); ! if (defined $id){ $AoH_data[$level]{$key}=$id; $hash_id{$hash_id_key}=$id; } else { ! print "\n$element_name: can't retrieve the id based on the accession:$AoH_data[$level]{$key}" if ($DEBUG==1); ! print "\nor correct format for accesion, but op for table:$hash_level_name{$level-1} is 'delete', and record for this accesion is not in db yet" if ($DEBUG==1); &create_log(\%hash_trans, \%hash_id, $log_file); exit(1); *************** *** 742,750 **** } else { ! print "\n$element_name:$AoH_data[$level]{$key}: is not accession, or local_id:$AoH_data[$level]{$key} is not defined yet"; &create_log(\%hash_trans, \%hash_id , $log_file ); exit(1); } ! print "\nend_element:$element_name is col, table_op:not update"; } #table:update, col:update --- 778,786 ---- } else { ! print "\n$element_name:$AoH_data[$level]{$key}: is not accession, or local_id:$AoH_data[$level]{$key} is not defined yet" ; &create_log(\%hash_trans, \%hash_id , $log_file ); exit(1); } ! print "\nend_element:$element_name is col, table_op:not update" if ($DEBUG==1); } #table:update, col:update *************** *** 772,776 **** exit(1); } ! print "\nend_element: self:col, table_op:update, col_op:update"; } #table: update, col: not upate --- 808,812 ---- exit(1); } ! print "\nend_element: self:col, table_op:update, col_op:update" if ($DEBUG==1); } #table: update, col: not upate *************** *** 797,804 **** exit(1); } ! print "\nend_element: self:col, table_op:update, col_op:not update"; } ! print "\nprimary table:$hash_ddl{$temp_foreign}:sub element:$hash_level_name{$level+1}"; ! print "\n\n$element_name is foreign key, no sub element, has data, db_id:$AoH_data[$level]{$key}"; } # foreign key, no sub element, but NO data, error ....... --- 833,840 ---- exit(1); } ! print "\nend_element: self:col, table_op:update, col_op:not update" if ($DEBUG==1); } ! print "\nprimary table:$hash_ddl{$temp_foreign}:sub element:$hash_level_name{$level+1}" if ($DEBUG==1); ! print "\n\n$element_name is foreign key, no sub element, has data, db_id:$AoH_data[$level]{$key}" if ($DEBUG==1); } # foreign key, no sub element, but NO data, error ....... *************** *** 830,834 **** $dbh_obj->close(); print "\n\nbingo ....you success !...."; ! exit(1); } --- 866,871 ---- $dbh_obj->close(); print "\n\nbingo ....you success !...."; ! # exit(1); ! return; } *************** *** 848,856 **** my $content=$element."."; foreach my $value (keys %$hash_ref){ ! print "\nextract_hash before:key:$value:value:$hash_ref->{$value}:"; if (index($value, $content) ==0 ){ my $start=length $content; my $key=substr($value, $start); ! print "\nextract_hash:content:$content:value:$value:key:$key:$hash_ref->{$value}:"; # if ($hash_ref->{$value} =~/\w/){ $result{$key}=$hash_ref->{$value}; --- 885,893 ---- my $content=$element."."; foreach my $value (keys %$hash_ref){ ! #print "\nextract_hash before:key:$value:value:$hash_ref->{$value}:" if ($DEBUG==1); if (index($value, $content) ==0 ){ my $start=length $content; my $key=substr($value, $start); ! #print "\nextract_hash:content:$content:value:$value:key:$key:$hash_ref->{$value}:" if ($DEBUG==1); # if ($hash_ref->{$value} =~/\w/){ $result{$key}=$hash_ref->{$value}; *************** *** 863,869 **** ! #foreach my $key (keys %$hash_ref){ ! # print "\nleft key:$key:\tvalue:$hash_ref{$key}:"; ! #} if (%result){ return \%result; --- 900,906 ---- ! # foreach my $key (keys %$hash_ref){ ! # print "\nleft key:$key:\tvalue:$hash_ref{$key}:" if ($DEBUG==1); ! # } if (%result){ return \%result; *************** *** 907,911 **** foreach my $key (keys %$hash_ref){ ! print "\nin data_check col:$key\tvalue:$hash_ref->{$key}:"; } --- 944,948 ---- foreach my $key (keys %$hash_ref){ ! print "\nin data_check col:$key\tvalue:$hash_ref->{$key}:" if ($DEBUG==1); } *************** *** 920,924 **** if ($temp[$i] ne $table_id && !(defined $hash_ref->{$temp[$i]}) && (defined $hash_foreign_key{$temp[$i]} )){ my $temp_key=$table.":".$temp[$i]."_ref_table"; ! print "\ndata_check temp_key:$temp_key:value:$hash_ddl{$temp_key}"; my $retrieved_value=&_context_retrieve($level, $hash_ddl{$temp_key}, $hash_level_name_ref); if ($retrieved_value){ --- 957,961 ---- if ($temp[$i] ne $table_id && !(defined $hash_ref->{$temp[$i]}) && (defined $hash_foreign_key{$temp[$i]} )){ my $temp_key=$table.":".$temp[$i]."_ref_table"; ! print "\ndata_check temp_key:$temp_key:value:$hash_ddl{$temp_key}" if ($DEBUG==1); my $retrieved_value=&_context_retrieve($level, $hash_ddl{$temp_key}, $hash_level_name_ref); if ($retrieved_value){ *************** *** 931,935 **** exit(1); } ! #if not null, but not unique key, then depend on the op: ok for lookup/delete, ok for force is already exist in DB, NOT ok for insert else { my $op=$hash_level_op{$level-1}; --- 968,972 ---- exit(1); } ! #if not null, but not unique key, then depend on the op: ok for lookup/delete, ok for force if already exist in DB, NOT ok for insert else { my $op=$hash_level_op{$level-1}; *************** *** 1004,1017 **** my $hash_level_name_ref=shift; my $result; ! # print "\ncontext_retrieve:level:$level:primary_table:$primary_table"; for ( my $i=$level-1; $i>=0; $i--){ ! # print "\ncontext check hash_level_name:$hash_level_name_ref->{$i}"; if ($primary_table eq $hash_level_name_ref->{$i}){ ! print "\ncontext_retrieve:level:$level:primary_table:$primary_table:value:$AoH_db_id[$i]{$primary_table}"; $result= $AoH_db_id[$i]{$primary_table}; last; } } ! print "\nresult is:$result"; return $result; } --- 1041,1054 ---- my $hash_level_name_ref=shift; my $result; ! print "\ncontext_retrieve:level:$level:primary_table:$primary_table" if ($DEBUG==1); for ( my $i=$level-1; $i>=0; $i--){ ! print "\ncontext check hash_level_name:$hash_level_name_ref->{$i}" if ($DEBUG==1); if ($primary_table eq $hash_level_name_ref->{$i}){ ! print "\ncontext_retrieve:level:$level:primary_table:$primary_table:value:$AoH_db_id[$i]{$primary_table}" if ($DEBUG==1); $result= $AoH_db_id[$i]{$primary_table}; last; } } ! print "\nresult is:$result" if ($DEBUG==1); return $result; } *************** *** 1028,1034 **** foreach my $i(0..$#array_col){ if ($array_col[$i] ne ''){ ! $hash_table_column_ref->{$array_col[$i]}=1; ! } ! # print "\ncol:$array_col[$i]"; } return $hash_table_column_ref; --- 1065,1071 ---- foreach my $i(0..$#array_col){ if ($array_col[$i] ne ''){ ! $hash_table_column_ref->{$array_col[$i]}=1; ! } ! # print "\ncol:$array_col[$i]" if ($DEBUG==1); } return $hash_table_column_ref; *************** *** 1047,1132 **** my $op=$hash_level_op{$level}; my ($dbname, $acc, $version, $db_id, $stm_select, $stm_insert); ! print "\nget information for table:$table based on accession:$accession"; ! ! if ($accession =~ /([a-zA-Z]+)\:([a-zA-Z0-9]+)(\.\d)*/){ ! my @temp=split(/\:/, $accession); ! $dbname=$temp[0]; ! if ($temp[1] =~/\./){ ! my @temp1=split(/\./, $temp[1]); ! $acc=$temp1[0]; ! $version=$temp1[1]; ! } ! else { ! $acc=$temp[1]; ! $version=''; ! } ! ! my $organism_id; ! #create a pseudo organism record GAME xml loading ! $organism_id=$dbh_obj->get_one_value("select organism_id from organism where genus='Drosophila' and species='melanogaster'"); ! if (! $organism_id) { ! $dbh_obj->execute_sql("insert into organism (genus, species) values('Drosophila', 'melanogaster')"); ! $organism_id=$dbh_obj->get_one_value("select organism_id from organism where genus='Drosophila' and species='melanogaster'"); ! } ! ! my $type_id; ! # create pseudo cvterm record for GAME xml loading ! $type_id=$dbh_obj->get_one_value("select cvterm_id from cvterm, cv where name='curator note' and cvname='pub type' and cv.cv_id=cvterm.cv_id"); ! if (! $type_id) { ! my $cv_id; ! $cv_id=$dbh_obj->get_one_value("select cv_id from cv where cvname='pub type'"); ! if (!$cv_id) { ! $dbh_obj->execute_sql("insert into cv(cvname) values('pub type')"); ! $cv_id=$dbh_obj->get_one_value("select cv_id from cv where cvname='pub type'"); ! } ! $dbh_obj->execute_sql(sprintf("insert into cvterm(name, cv_id) values('curator note', $cv_id) ")); ! $type_id=$dbh_obj->get_one_value("select cvterm_id from cvterm, cv where name='curator note' and cvname='pub type' and cv.cv_id=cvterm.cv_id"); ! } ! ! ! # here to figure out eg. feature_id, table will be feature ! if ($table =~/\_id/){ ! my @temp2=split(/\_id/, $table); ! $table=$temp2[0]; ! } ! ! #my $table_id=$table."_id"; ! my $table_id_string=$table."_primary_key"; ! my $table_id=$hash_ddl{$table_id_string}; ! my $dbxref_id; ! my $stm_select_dbxref=sprintf("select dbxref_id from dbxref where dbname='%s' and accession='%s' and version='%s'", $dbname, $acc, $version); ! my $stm_insert_dbxref=sprintf("insert into dbxref (dbname, accession, version) values('%s', '%s', '%s')", $dbname, $acc, $version); ! $dbxref_id=$dbh_obj->get_one_value($stm_select_dbxref); ! if (!$dbxref_id && $op ne $OP_DELETE){ ! $dbh_obj->execute_sql($stm_insert_dbxref); ! $dbxref_id=$dbh_obj->get_one_value($stm_select_dbxref); ! } ! else { ! print "\nop is :$OP_DELETE, so no need to insert"; ! } ! ! if ($table eq 'dbxref'){ ! $db_id=$dbxref_id; ! } ! elsif ($table eq 'feature' ){ ! my $stm_select_feature=sprintf("select $table_id from $table where uniquename='%s' and organism_id=%s", $accession, $organism_id); ! my $stm_insert_feature=sprintf("insert into feature (organism_id, uniquename, type_id) values(%s, '%s', $type_id)", $organism_id, $accession); ! $db_id=$dbh_obj->get_one_value($stm_select_feature); ! if (!$db_id && $op ne $OP_DELETE){ ! $dbh_obj->execute_sql($stm_insert_feature); ! $db_id=$dbh_obj->get_one_value($stm_select_feature); ! } ! else { ! print "\nop is :$OP_DELETE, so no need to insert"; ! } ! } } else { ! print "\nsorry, the accession:$accession is not correct format as: db:acc[.version]"; ! &create_log(\%hash_trans, \%hash_id, $log_file); ! exit(1); } ! return $db_id; } --- 1084,1104 ---- my $op=$hash_level_op{$level}; my ($dbname, $acc, $version, $db_id, $stm_select, $stm_insert); + print "\nstart the _get_accession in XMLParse.pm...."; ! my $config_acc_file=$ENV{CodeBase}."/XORT/Config/config_accession.xml"; ! if (-e $config_acc_file) { ! $dbh_obj->close(); ! my $acc_parser=XORT::Loader::XMLAccession->new($db, $config_acc_file, $DEBUG); ! my $acc_id=$acc_parser->parse_accession($table, $accession, $op); ! print "\nget global_id:$acc_id: for this accession:$accession"; ! $dbh_obj->open(); ! print "\nend the _get_accession...."; ! return $acc_id; } else { ! print "\nunable to find configureation file:$config_acc_file"; ! return; } ! } *************** *** 1139,1157 **** my $result; - # print "\ncontext_retrieve:level:$level:primary_table:$primary_table"; for ( my $i=$level; $i>=0; $i--){ ! # print "\ncontext check hash_level_name:$hash_level_name_ref->{$i}"; ! # print "\nhash_level_name:$hash_level_name{$i-1}"; if ( $hash_level_name{$i} eq 'feature' ){ my $hash_ref=$AoH_local_id[$i+1]; foreach my $key (keys %$hash_ref){ ! print "\nkey:$key\tvalue:$hash_ref->{$key}"; } $result= $AoH_local_id[$i+1]{'organism_id'}; ! print "\n\norganism_id is:$result ........"; last; } } ! print "\n\norganism_id is:$result ........"; return $result; --- 1111,1127 ---- my $result; for ( my $i=$level; $i>=0; $i--){ ! print "\nhash_level_name:$hash_level_name{$i-1}" if ($DEBUG==1); if ( $hash_level_name{$i} eq 'feature' ){ my $hash_ref=$AoH_local_id[$i+1]; foreach my $key (keys %$hash_ref){ ! print "\nkey:$key\tvalue:$hash_ref->{$key}" if ($DEBUG==1); } $result= $AoH_local_id[$i+1]{'organism_id'}; ! print "\n\norganism_id is:$result ........" if ($DEBUG==1); last; } } ! print "\n\norganism_id is:$result ........" if ($DEBUG==1); return $result; *************** *** 1182,1191 **** my $stm_select=sprintf("select $data_list from $table where $table_id=$id"); ! print "\nget_ref_data stm:$stm_select"; my $array_ref=$dbh_obj->get_all_arrayref($stm_select); if (defined $array_ref){ for my $i (0..$#{$array_ref->[0]}){ $hash_ref->{$array_table_cols[$i]}=$array_ref->[0][$i]; ! print "\nfrom ref:$table:$array_table_cols[$i]:$array_ref->[0][$i]"; } return $hash_ref; --- 1152,1161 ---- my $stm_select=sprintf("select $data_list from $table where $table_id=$id"); ! print "\nget_ref_data stm:$stm_select" if ($DEBUG==1); my $array_ref=$dbh_obj->get_all_arrayref($stm_select); if (defined $array_ref){ for my $i (0..$#{$array_ref->[0]}){ $hash_ref->{$array_table_cols[$i]}=$array_ref->[0][$i]; ! print "\nfrom ref:$table:$array_table_cols[$i]:$array_ref->[0][$i]" if ($DEBUG==1); } return $hash_ref; Index: XMLValidator.pm =================================================================== RCS file: /cvsroot/gmod/schema/XMLTools/XORT/Loader/XMLValidator.pm,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** XMLValidator.pm 10 Jun 2003 15:01:06 -0000 1.3 --- XMLValidator.pm 18 Jul 2003 22:00:51 -0000 1.4 *************** *** 7,10 **** --- 7,11 ---- use XML::Parser::PerlSAX; use XORT::Util::DbUtil::DB; + use XORT::Loader::XMLAccession; use strict; *************** *** 25,28 **** --- 26,30 ---- my $dbh_obj; my %hash_table_col; + my $db; # this hash will have the pair of local_id/db_id, eg. cvterm_99 from xml file, id from cvterm_id 88, the key format: table_name:local_id *************** *** 59,62 **** --- 61,65 ---- my $SQL_NODE='_sql'; + my $DEBUG=0; # all the operator *************** *** 65,69 **** my $OP_INSERT='insert'; my $OP_DELETE='delete'; ! my $OP_lookup='lookup'; # all attribute --- 68,72 ---- my $OP_INSERT='insert'; my $OP_DELETE='delete'; ! my $OP_LOOKUP='lookup'; # all attribute *************** *** 108,114 **** --- 111,137 ---- $self->{'db'}=shift; $self->{'file'}=shift; + $DEBUG=shift; + $db=$self->{db}; my $pro=XORT::Util::GeneralUtil::Properties->new('ddl'); %hash_ddl=$pro->get_properties_hash(); print "\n start to validate xml file with DB connection.....:db:$self->{db} \tfile:$self->{file}"; + + # under all thos hash and arrary, otherwise, it will intervense for batch executing + + undef $level; + undef %hash_table_col; + undef %hash_id; + undef @AoH_data; + undef @AoH_data_new; + undef @AoH_db_id; + undef @AoH_local_id; + undef @AoH_op; + undef @AoH_ref; + undef %hash_level_id; + undef %hash_level_name; + undef %hash_level_op; + undef %hash_level_ref; + undef %hash_level_sub_detect; + bless $self, $type; return $self; *************** *** 131,135 **** my $file=$self->{file}; ! my $db=$self->{db}; my $dbh_pro=XORT::Util::GeneralUtil::Properties->new($db); my %dbh_hash=$dbh_pro->get_dbh_hash(); --- 154,158 ---- my $file=$self->{file}; ! my $dbh_pro=XORT::Util::GeneralUtil::Properties->new($db); my %dbh_hash=$dbh_pro->get_dbh_hash(); *************** *** 611,615 **** if (defined $hash_ddl{$parent_element}){ my $hash_ref_cols=&_get_table_columns($parent_element); ! if (defined $hash_ref_cols->{$element_name} && ($data =~/\w/) && $data ne "\t"){ my $key=$hash_level_name{$level-1}.".".$element_name; # treat differently for update and other operation --- 634,638 ---- if (defined $hash_ddl{$parent_element}){ my $hash_ref_cols=&_get_table_columns($parent_element); ! if (defined $hash_ref_cols->{$element_name} && ($data =~/\w/ || $data eq '-') && $data ne "\t"){ my $key=$hash_level_name{$level-1}.".".$element_name; # treat differently for update and other operation *************** *** 1314,1318 **** my $table=shift; my $level=shift; ! my $hash_level_id=shift; my $hash_level_name_ref=shift; my %result; --- 1337,1341 ---- my $table=shift; my $level=shift; ! my $hash_level_id_ref=shift; my $hash_level_name_ref=shift; my %result; *************** *** 1322,1326 **** for (@array_foreign_key){ $hash_foreign_key{$_}++; - } --- 1345,1348 ---- *************** *** 1332,1338 **** } my $table_non_null=$table."_non_null_cols"; my @temp=split(/\s+/, $hash_ddl{$table_non_null}); ! my $table_id=$table."_id"; for my $i(0..$#temp){ my $foreign_key=$table.":".$temp[$i]; --- 1354,1373 ---- } + my %hash_unique_key; + my $table_unique_key=$table."_unique"; + my @unique_key=split(/\s+/, $hash_ddl{$table_unique_key}); + for (@unique_key){ + $hash_unique_key{$_}++; + } + + foreach my $key (keys %$hash_ref){ + print "\nin data_check col:$key\tvalue:$hash_ref->{$key}:" if ($DEBUG==1); + } + my $table_non_null=$table."_non_null_cols"; my @temp=split(/\s+/, $hash_ddl{$table_non_null}); ! my $table_id_string=$table."_primary_key"; ! my $table_id=$hash_ddl{$table_id_string}; ! #my $table_id=$table."_id"; for my $i(0..$#temp){ my $foreign_key=$table.":".$temp[$i]; *************** *** 1340,1344 **** if ($temp[$i] ne $table_id && !(defined $hash_ref->{$temp[$i]}) && (defined $hash_foreign_key{$temp[$i]} )){ my $temp_key=$table.":".$temp[$i]."_ref_table"; ! print "\ndata_check temp_key:$temp_key:value:$hash_ddl{$temp_key}"; my $retrieved_value=&_context_retrieve($level, $hash_ddl{$temp_key}, $hash_level_name_ref); if ($retrieved_value){ --- 1375,1379 ---- if ($temp[$i] ne $table_id && !(defined $hash_ref->{$temp[$i]}) && (defined $hash_foreign_key{$temp[$i]} )){ my $temp_key=$table.":".$temp[$i]."_ref_table"; ! print "\ndata_check temp_key:$temp_key:value:$hash_ddl{$temp_key}" if ($DEBUG==1); my $retrieved_value=&_context_retrieve($level, $hash_ddl{$temp_key}, $hash_level_name_ref); if ($retrieved_value){ *************** *** 1346,1357 **** } elsif (!(defined $hash_non_null_default{$temp[$i]})) { ! print LOG0 "\n\ncan not find the value for required element:$temp[$i] of table:$table from context ....."; ! &create_log(\%hash_trans, $hash_ref, $table); ! } ! } elsif ($temp[$i] ne $table_id && !(defined $hash_ref->{$temp[$i]}) && !(defined $hash_foreign_key{$temp[$i]}) && !(defined $hash_non_null_default{$temp[$i]})) { ! print LOG0 "\n\nyou missed the required element:$temp[$i] for table:$table, also it is not foreign key"; ! &create_log(\%hash_trans, $hash_ref, $table); } } --- 1381,1444 ---- } elsif (!(defined $hash_non_null_default{$temp[$i]})) { ! if (exists $hash_unique_key{$temp[$i]}){ ! print "\n\ncan not find the value for required element(unique key):$temp[$i] of table:$table from context ....."; ! &create_log(\%hash_trans, \%hash_id, $log_file); ! #exit(1); ! } ! #if not null, but not unique key, then depend on the op: ok for lookup/delete, ok for force if already exist in DB, NOT ok for insert ! else { ! my $op=$hash_level_op{$level-1}; ! if ($op eq $OP_INSERT){ ! print "\n\ncan not find the value for required element(foreign key, not unique, op:$OP_INSERT):$temp[$i] of table:$table from context ....."; ! &create_log(\%hash_trans, \%hash_id, $log_file); ! #exit(1); ! } ! elsif ($op eq $OP_FORCE){ ! my %hash_temp; ! foreach my $key(keys %$hash_ref){ ! $hash_temp{$key}=$hash_ref->{$key}; ! } ! ! ! my $db_id=$dbh_obj->db_lookup(-data_hash=>\%hash_temp, -table=>$table,-hash_local_id=>\%hash_id, -hash_trans=>\%hash_trans, -log_file=>$log_file); ! if (!($db_id)){ ! print "\n\n$temp[$i]: is foreign_key, unique_key, unable to retrieve from context, op is $OP_FORCE, and this record is not in DB yet"; ! &create_log(\%hash_trans, \%hash_id, $log_file); ! #exit(1); ! ! } ! } ! } ! ! } ! } # end of is foreign_key, try to retrieve from context elsif ($temp[$i] ne $table_id && !(defined $hash_ref->{$temp[$i]}) && !(defined $hash_foreign_key{$temp[$i]}) && !(defined $hash_non_null_default{$temp[$i]})) { ! if (exists $hash_unique_key{$temp[$i]}){ ! print "\n\nyou missed the required element:$temp[$i] for table:$table, also it is not foreign key"; ! &create_log(\%hash_trans, \%hash_id, $log_file); ! exit(1); ! } ! else { ! my $op=$hash_level_op{$level-1}; ! if ($op eq $OP_INSERT){ ! print "\n\ncan not find the value for required element(not foreign key, not unique, op:$OP_INSERT):$temp[$i] of table:$table from context ....."; ! &create_log(\%hash_trans, \%hash_id, $log_file); ! #exit(1); ! } ! #if not null, but not unique key, then depend on the op: ok for lookup/delete, ok for force is already exist in DB, NOT ok for insert ! elsif ($op eq $OP_FORCE){ ! my %hash_temp; ! foreach my $key(keys %$hash_ref){ ! $hash_temp{$key}=$hash_ref->{$key}; ! } ! my $db_id=$dbh_obj->db_lookup(-data_hash=>\%hash_temp, -table=>$table,-hash_local_id=>\%hash_id, -hash_trans=>\%hash_trans, -log_file=>$log_file); ! if (!($db_id)){ ! print "\n\n$temp[$i]: not foreign_key, unique_key, op is $OP_FORCE, and this record is not in DB yet"; ! &create_log(\%hash_trans, \%hash_id, $log_file); ! #exit(1); + } + } + } } } *************** *** 1364,1367 **** --- 1451,1455 ---- + # This util method will retrieve the missed value based on the context check: nearest outer of correct type # *************** *** 1413,1486 **** my $level=shift; my ($dbname, $acc, $version, $db_id, $stm_select, $stm_insert); ! ! if ($accession =~ /([a-zA-Z]+)\:([a-zA-Z0-9]+)(\.\d)*/){ ! my @temp=split(/\:/, $accession); ! $dbname=$temp[0]; ! if ($temp[1] =~/\./){ ! my @temp1=split(/\./, $temp[1]); ! $acc=$temp1[0]; ! $version=$temp1[1]; ! } ! else { ! $acc=$temp[1]; ! $version=''; ! } ! ! my $organism_id; ! #create a pseudo organism record GAME xml loading ! $organism_id=$dbh_obj->get_one_value("select organism_id from organism where genus='Drosophila' and species='melanogaster'"); ! if (! $organism_id) { ! $dbh_obj->execute_sql("insert into organism (genus, species) values('Drosophila', 'melanogaster')"); ! $organism_id=$dbh_obj->get_one_value("select organism_id from organism where genus='Drosophila' and species='melanogaster'"); ! } ! ! my $type_id; ! # create pseudo cvterm record for GAME xml loading ! $type_id=$dbh_obj->get_one_value("select cvterm_id from cvterm, cv where name='curator note' and cvname='pub type' and cv.cv_id=cvterm.cv_id"); ! if (! $type_id) { ! my $cv_id; ! $cv_id=$dbh_obj->get_one_value("select cv_id from cv where cvname='pub type'"); ! if (!$cv_id) { ! $dbh_obj->execute_sql("insert into cv(cvname) values('pub type')"); ! $cv_id=$dbh_obj->get_one_value("select cv_id from cv where cvname='pub type'"); ! } ! $dbh_obj->execute_sql(sprintf("insert into cvterm(name, cv_id) values('curator note', $cv_id) ")); ! $type_id=$dbh_obj->get_one_value("select cvterm_id from cvterm, cv where name='curator note' and cvname='pub type' and cv.cv_id=cvterm.cv_id"); ! } ! ! ! # here to figure out eg. feature_id, table will be feature ! if ($table =~/\_id/){ ! my @temp2=split(/\_id/, $table); ! $table=$temp2[0]; ! } ! ! my $table_id=$table."_id"; ! if ($table eq 'dbxref'){ ! $stm_select=sprintf("select $table_id from $table where dbname='%s' and accession='%s' and version='%s'", $dbname, $acc, $version); ! $stm_insert=sprintf("insert into $table (dbname, accession, version) values('%s', '%s', '%s')", $dbname, $acc, $version); ! } ! elsif ($table eq 'feature' ){ ! $stm_select=sprintf("select $table_id from $table where uniquename='%s' and organism_id=%s", $accession, $organism_id); ! $stm_insert=sprintf("insert into feature (organism_id, uniquename, type_id) values(%s, '%s', 3)", $organism_id, $accession); ! } ! ! print "\nget_accession:$stm_select\n$stm_insert"; ! $db_id=$dbh_obj->get_one_value($stm_select); ! # if (!$db_id){ ! # $dbh_obj->execute_sql($stm_insert); ! # $db_id=$dbh_obj->get_one_value($stm_select); ! # } ! ! # (/[a-zA-Z]+\:([a-Z0-9]+)) } else { ! print "\nsorry, the accession:$accession is not correct format as: db:acc[.version]"; ! &create_log(\%hash_trans, \%hash_id, $table); ! #exit(1); } - return $db_id; } --- 1501,1523 ---- my $level=shift; + #for validation, only LOOKUP, no other operation allow. + my $op=$OP_LOOKUP; my ($dbname, $acc, $version, $db_id, $stm_select, $stm_insert); + print "\nstart the _get_accession in XMLParse.pm...."; ! my $config_acc_file=$ENV{CodeBase}."/XORT/Config/config_accession.xml"; ! if (-e $config_acc_file) { ! $dbh_obj->close(); ! my $acc_parser=XORT::Loader::XMLAccession->new($db, $config_acc_file, $DEBUG); ! my $acc_id=$acc_parser->parse_accession($table, $accession, $op); ! print "\nget global_id:$acc_id: for this accession:$accession"; ! $dbh_obj->open(); ! print "\nend the _get_accession...."; ! return $acc_id; } else { ! print "\nunable to find configureation file:$config_acc_file"; ! return; } } *************** *** 1612,1616 **** ! # print LOG0 "\nsorry, for some reason, this process stop before finish the following main transaction(child of root):$hash_trans->{table}"; foreach my $key (keys %$hash_trans){ if ($key ne 'table'){ --- 1649,1653 ---- ! print LOG0 "\nsorry, for some reason, this process stop before finish the following main transaction(child of root):$hash_trans->{table}"; foreach my $key (keys %$hash_trans){ if ($key ne 'table'){ Index: XMLValidatorNoDB.pm =================================================================== RCS file: /cvsroot/gmod/schema/XMLTools/XORT/Loader/XMLValidatorNoDB.pm,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** XMLValidatorNoDB.pm 6 Jun 2003 19:35:00 -0000 1.2 --- XMLValidatorNoDB.pm 18 Jul 2003 22:00:51 -0000 1.3 *************** *** 68,71 **** --- 68,73 ---- my $ATTRIBUTE_REF='ref'; + my $DEBUG=0; + # for some elements, it will be ignored, i.e view, and _app_data, # algorithms to filter out ignore elements: initiately P_pseudo set to -1, for tables_pseudo, increase by 1 at beginning of start_element, decrease by 1 at end of end_element *************** *** 102,107 **** --- 104,127 ---- my $self={}; $self->{'file'}=shift; + $DEBUG=shift; my $pro=XORT::Util::GeneralUtil::Properties->new('ddl'); %hash_ddl=$pro->get_properties_hash(); + + # under all thos hash and arrary, otherwise, it will intervense for batch executing + undef $level; + undef %hash_table_col; + undef %hash_id; + undef @AoH_data; + undef @AoH_data_new; + undef @AoH_db_id; + undef @AoH_local_id; + undef @AoH_op; + undef @AoH_ref; + undef %hash_level_id; + undef %hash_level_name; + undef %hash_level_op; + undef %hash_level_ref; + undef %hash_level_sub_detect; + print "\n start to validate xml file without DB connection....."; bless $self, $type; |