Update of /cvsroot/gmod/schema/GMODTools/conf/bulkfiles In directory sc8-pr-cvs2.sourceforge.net:/tmp/cvs-serv29196/conf/bulkfiles Modified Files: bulkfiles_template.xml featuresets.xml organisms.xml sgdbulk.xml site_defaults.xml Added Files: chadogenepagesql.xml Log Message: no_csomesplit change for genomes with many scaffolds; validate chado variables; config updates --- NEW FILE: chadogenepagesql.xml --- <opt name="chadogenepagesql" date="20060723" > <title>Chado GenePage SQL</title> <ENV_default seq_ontology="Sequence Ontology Feature Annotation" golden_path="'chromosome', 'chromosome_arm', 'ultra_scaffold'" featureprops="'Note','cyto_range','gbunit'" species="Unknown_species" unknown_chr="U" analysis_where_clause="" est_where_clause="" /> <about> This is GMOD Chado SQL for gene-page output tables; i.e., all the fields relevant to a gene feature needed to create useful gene page web and xml reports. May need several variants (brief, full, tuned to each project's use of chado tables to store data relevant to individual genes. Also want other main non-gene feature output SQL (e.g. transposons) See GMODTools/conf/bulkfiles/chadofeatsql.xml Use with GMOD genepages. Should be part of chadofeatsql.sql or not? </about> <feature_sql id="v_genepage1" type="view"> -- add dbxrefprop output of all dbxref (type_id, value, rank?) -- add all *prop outputs ? (have featureprop) -- analysisprop.sql feature_cvtermprop.sql featureprop.sql -- cvtermprop.sql feature_relationshipprop.sql featureprop_pub.sql -- dbxrefprop.sql feature_relationshipprop_pub.sql organismprop.sql <!-- select v.* from feature f join v_genepage1 as v using(feature_id) where f.organism_id = 10 and f.type_id = 778 order by f.feature_id,v.field // how do we get overlapped analysisfeats included with main feat? select f.name, feature_overlaps(f.feature_id) from feature f where f.feature_id = 146; SWE1 | (146,,10,SWE1,SWE1,,,,778,f,f,"2007-03-22 00:07:50.978518","2007-03-22 00:07:50.978518") SWE1 | (147,,10,SWE1.t01,SWE1.t01,,,,308,f,f,"2007-03-22 00:07:50.978518","2007-03-22 00:07:50.978518") SWE1 | (149,,10,exon-auto149,auto149,,,,221,f,f,"2007-03-22 00:07:50.978518","2007-03-22 00:07:50.978518") --> <sql><![CDATA[ CREATE OR REPLACE VIEW v_genepage1 (feature_id, field, value) AS SELECT feature_id AS feature_id, 'Name' as field, name as value FROM feature UNION ALL SELECT feature_id AS feature_id, 'uniquename' as field, uniquename as value FROM feature UNION ALL SELECT feature_id AS feature_id, 'seqlen' as field, text('seqlen') as value FROM feature UNION ALL SELECT f.feature_id AS feature_id, 'type' as field, c.name as value FROM feature f, cvterm c WHERE f.type_id = c.cvterm_id UNION ALL SELECT f.feature_id AS feature_id, 'organism' as field, o.abbreviation as value FROM feature f, organism o WHERE f.organism_id = o.organism_id UNION ALL SELECT fs.feature_id AS feature_id, CASE WHEN fs.is_current IS FALSE THEN 'Synonym_2nd' ELSE 'Synonym' END AS field, s.name as value FROM feature_synonym fs, synonym s WHERE fs.synonym_id = s.synonym_id UNION ALL SELECT f.feature_id AS feature_id, 'Dbxref' as field, gd.name||':'||gx.accession as value FROM feature f, db gd, dbxref gx WHERE f.dbxref_id = gx.dbxref_id and gx.db_id = gd.db_id UNION ALL SELECT fs.feature_id AS feature_id, CASE WHEN fs.is_current IS FALSE THEN 'Dbxref obsolete' ELSE 'Dbxref 2' END AS field, (d.name || ':' || s.accession)::text AS value FROM feature_dbxref fs, dbxref s, db d WHERE fs.dbxref_id = s.dbxref_id and s.db_id = d.db_id UNION ALL SELECT fc.feature_id AS feature_id, c.name AS field, substr(cv.name,1,40) || '; '|| dx.accession AS value FROM feature_cvterm fc, cvterm cv, cv c, dbxref dx WHERE fc.cvterm_id = cv.cvterm_id and cv.cv_id = c.cv_id and cv.dbxref_id = dx.dbxref_id UNION ALL SELECT fp.feature_id AS feature_id, cv.name AS field, fp.value AS value FROM featureprop fp, cvterm cv WHERE fp.type_id = cv.cvterm_id UNION ALL SELECT fl.feature_id AS feature_id, 'location' as field, chr.uniquename ||':'|| cast( fl.fmin+1 as text) ||'..'|| cast( fl.fmax as text) || CASE WHEN fl.strand IS NULL THEN ' ' WHEN fl.strand < 0 THEN ' [-]' ELSE ' [+]' END AS value FROM featureloc fl, feature chr WHERE fl.srcfeature_id = chr.feature_id -- here if we select on main features (genes) need added join thru location OVERLAP * UNION ALL SELECT af.feature_id AS feature_id, 'an:' || CASE WHEN a.name IS NOT NULL THEN a.name WHEN a.sourcename IS NOT NULL THEN (a.program || '.' || a.sourcename)::text ELSE a.program END AS field, CASE WHEN af.rawscore IS NOT NULL THEN cast(af.rawscore as text) WHEN af.normscore IS NOT NULL THEN cast(af.normscore as text) WHEN af.significance IS NOT NULL THEN cast(af.significance as text) ELSE cast(af.identity as text) END AS value FROM analysisfeature af, analysis a WHERE af.analysis_id = a.analysis_id ; GRANT SELECT ON v_genepage1 TO PUBLIC; ]]></sql> </feature_sql> <feature_sql id="v_analysis_cogs" type="view"> <sql><![CDATA[ -- special case analysis views -- replace these special analysis cases with general analysisfeature dump !?? CREATE OR REPLACE VIEW v_analysis_cogs (feature_id, field, value) AS SELECT af.feature_id, 'COG' as field, ('euCOG:'||af.normscore) as value FROM analysisfeature af, analysis a WHERE af.analysis_id = a.analysis_id and a.program = 'orthomcl' UNION ALL SELECT af.feature_id, ('euCOG:'||af.normscore) as field, ((select name from db where db_id = (select db_id from dbxref where dbxref_id = fo.dbxref_id)) ||':'||fo.uniquename) as value FROM analysisfeature af, analysisfeature afo, feature fo, analysis a WHERE af.analysis_id = a.analysis_id and a.program = 'orthomcl' and afo.normscore = af.normscore and afo.analysis_id = a.analysis_id and fo.feature_id = afo.feature_id and fo.feature_id <> af.feature_id UNION ALL SELECT fd.feature_id, 'COG' AS field, (gd.name||':'||gx.accession) as value FROM feature_dbxref fd, db gd, dbxref gx WHERE fd.dbxref_id = gx.dbxref_id and gx.db_id = gd.db_id -- fd.feature_id = t.id and gd.name = 'IPC' UNION ALL SELECT fd.feature_id, (gd.name||':'||gx.accession) AS field, ((select name from db where db_id = (select db_id from dbxref where dbxref_id = fo.dbxref_id)) ||':'||fo.uniquename) as value FROM feature_dbxref fd, feature_dbxref fdo, feature fo, db gd, dbxref gx WHERE fd.dbxref_id = gx.dbxref_id and gx.db_id = gd.db_id -- fd.feature_id = t.id and gd.name = 'IPC' and fdo.dbxref_id = gx.dbxref_id and fdo.feature_id = fo.feature_id and fdo.feature_id <> fd.feature_id UNION ALL SELECT fd.feature_id, CASE WHEN fd.is_current IS FALSE THEN 'dbxref_2nd' ELSE 'dbxref' END AS field, (gd.name||':'||gx.accession) as value FROM feature_dbxref fd, db gd, dbxref gx WHERE fd.dbxref_id = gx.dbxref_id and gx.db_id = gd.db_id -- fd.feature_id = t.id and gd.name <> 'IPC' ; GRANT SELECT ON v_analysis_cogs TO PUBLIC; ]]></sql> </feature_sql> <feature_sql id="otherviews" type="view"> <sql><![CDATA[ -- attr view for regular features which may have parent features (exons) CREATE OR REPLACE VIEW gffattr_gmodel ( feature_id, type, attribute ) AS SELECT feature_id, CASE WHEN fs.is_current IS FALSE THEN 'dbxref_2nd' ELSE 'dbxref' END AS type, d.name || ':' || s.accession AS attribute FROM dbxref s, feature_dbxref fs, db d WHERE fs.dbxref_id = s.dbxref_id and s.db_id = d.db_id UNION ALL SELECT feature_id, cv.name AS type, fp.value AS attribute FROM featureprop fp, cvterm cv WHERE fp.type_id = cv.cvterm_id and (cv.name in ( ${featureprops} )) -- keep this restriction - other props not useful here: comments, sp_comment, owner ... -- mar05: add 'putative_ortholog_of' cv for dpse/..; this is in feature_relationship UNION ALL SELECT feature_id, CASE WHEN fs.is_current IS FALSE THEN 'synonym_2nd' ELSE 'synonym' END AS type, s.synonym_sgml AS attribute FROM feature_synonym fs, synonym s WHERE fs.synonym_id = s.synonym_id and fs.is_internal IS FALSE UNION ALL -- add parent feat ids for exons, etc. -- putative_ortholog_of=dmel/gene:CG31648 SELECT pk.subject_id AS feature_id, cv.name AS type, 'FlyBase:' || fo.uniquename AS attribute FROM feature_relationship pk, feature fo, cvterm cv WHERE cv.name = 'putative_ortholog_of' and pk.type_id = cv.cvterm_id and pk.object_id = fo.feature_id UNION ALL -- add parent feat ids for exons, etc. SELECT pk.subject_id AS feature_id, 'parent_oid' AS type, CASE WHEN pk.rank IS NULL THEN text(pk.object_id) ELSE pk.object_id || ':' || pk.rank END FROM feature_relationship pk ; GRANT SELECT ON gffattr_gmodel TO PUBLIC; -- seqlen/featloc-length view -- CREATE OR REPLACE VIEW feature_length ( feature_id, flength ) AS -- SELECT feature_id, -- CASE WHEN seqlen IS NULL THEN (fl.fmax - fl.fmin) ELSE fs.seqlen END AS flength, -- FROM featureloc fl -- WHERE fl.feature_id = feature_id -- ; -- attr view for match features CREATE OR REPLACE VIEW gffattr_match ( feature_id, type, attribute ) AS SELECT feature_id, CASE WHEN fs.is_current IS FALSE THEN 'dbxref_2nd' ELSE 'dbxref' END AS type, d.name || ':' || s.accession AS attribute FROM dbxref s, feature_dbxref fs, db d WHERE fs.dbxref_id = s.dbxref_id and s.db_id = d.db_id UNION ALL SELECT feature_id, cv.name AS type, fp.value AS attribute FROM featureprop fp, cvterm cv WHERE fp.type_id = cv.cvterm_id and (cv.name in ( ${featureprops} )) UNION ALL SELECT feature_id, CASE WHEN fs.is_current IS FALSE THEN 'synonym_2nd' ELSE 'synonym' END AS type, s.synonym_sgml AS attribute FROM feature_synonym fs, synonym s WHERE fs.synonym_id = s.synonym_id and fs.is_internal IS FALSE ; GRANT SELECT ON gffattr_match TO PUBLIC; -- attrib view for cross-species feats (syntenic_region, orthology) CREATE OR REPLACE VIEW gffattr_synteny ( feature_id, type, attribute ) AS -- parent feat ids for source supercontigs, etc. SELECT pk.subject_id, text('parent_oid') as type, CASE WHEN pk.rank IS NULL THEN text(pk.object_id) ELSE pk.object_id || ':' || pk.rank END FROM feature_relationship pk ; GRANT SELECT ON gffattr_synteny TO PUBLIC; -- use this one instead of above CREATE OR REPLACE VIEW gffattr_synt2 ( feature_id, type, attribute ) AS SELECT feature_id, text('to_species') AS type, text(targ.organism_id) AS attribute FROM feature targ -- see above orthofix.pl: add case when this select is missing, use ortho featloc -- to find equal gene feature and putative_ortholog_of UNION ALL SELECT feature_id, text('to_name') AS type, CASE WHEN targ.uniquename = targ.name THEN targ.name ELSE targ.name || ',' || targ.uniquename END AS attribute FROM feature targ WHERE NOT( targ.type_id IN ( select cvterm_id from cvterm where name in ( ${golden_path} ) ) ) ; GRANT SELECT ON gffattr_synt2 TO PUBLIC; -- for analysis features -- problem where w/ some analysis features - promotor, transposon -- ? need to restrict armcv to cv_id = SO id - e.g. find chromosome in 4 cv's CREATE OR REPLACE VIEW gffatts_anfloc ( feature_id, arm, fmin, fmax, strand, organism_id ) AS SELECT armloc.feature_id, armft.uniquename as arm, armloc.fmin, armloc.fmax, armloc.strand, armft.organism_id FROM feature armft, featureloc armloc, cvterm armcv, cv socv WHERE armft.type_id = armcv.cvterm_id and armcv.name in ( ${golden_path} ) and armcv.cv_id = socv.cv_id and socv.name = '${seq_ontology}' and armft.feature_id = armloc.srcfeature_id ; GRANT SELECT ON gffatts_anfloc TO PUBLIC; -- for analysis features CREATE OR REPLACE VIEW gffatts_evid ( feature_id, type, attribute ) AS SELECT pk.subject_id, text('parent_oid'), text(pk.object_id) FROM feature_relationship pk ; GRANT SELECT ON gffatts_evid TO PUBLIC; ]]></sql> </feature_sql> </opt> Index: sgdbulk.xml =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/sgdbulk.xml,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** sgdbulk.xml 12 Jan 2006 06:03:49 -0000 1.2 --- sgdbulk.xml 15 Oct 2007 16:19:28 -0000 1.3 *************** *** 1,9 **** <opt name="sgdbulk" ! relid="5" ! date="20051129" make_current="1" > <about id="sgdbulk"><![CDATA[ See bulkfiles_template.xml for further documentation. --- 1,11 ---- <opt name="sgdbulk" ! relid="7" make_current="1" > + <!-- 07oct: test bulkfiles v1.1 with no_csomesplit --> + <no_csomesplit>1</no_csomesplit> + <about id="sgdbulk"><![CDATA[ See bulkfiles_template.xml for further documentation. *************** *** 19,23 **** Database: ${title} Species: ${species} ! Release: ${rel}, dated ${date} as ${relfull} ]]></doc> --- 21,27 ---- Database: ${title} Species: ${species} ! Release: ${release_id}, dated ${release_date} as ${relfull} ! ! See release notes at <a href="${release_url}"> ${release_url}</a> ]]></doc> *************** *** 41,55 **** |& mail -s gmodtools-debug gil...@in... ]]></doc> ! <release id="5" rel="sgdr5" dbname="sgdlite" date="20050823" ! relfull="sgdlite_2005_08_23" ! release_url="/genome/Saccharomyces_cerevisiae/sgdlite-release5.html" ! /> ! <release id="1" rel="sgdr1" dbname="sgdlite_20040519" date="20040519" ! relfull="sgdlite_20040519" ! release_url="/genome/Saccharomyces_cerevisiae/sgdlite-release1.html" ! /> <org>scer</org> --- 45,60 ---- |& mail -s gmodtools-debug gil...@in... + Find worked examples from Bulkfiles SQL use now at + http://www.gmod.org/Sample_Chado_SQL + ]]></doc> ! <release id="7" dbname="sgdlite" date="20070531"/> ! <release id="5" dbname="sgdlite_2005" date="20050823"/> ! <release id="1" dbname="sgdlite_2004" date="20040519"/> ! ! <relfull>${org}_${release_date}</relfull> ! <release_url>/genome/${species}/release${release_id}.html</release_url> <org>scer</org> *************** *** 59,62 **** --- 64,77 ---- <featureprops>'Note','orf_classification'</featureprops> <GFF_source>SGD</GFF_source> + + <!-- default seq_ontology now is sequence --> + <seq_ontology>Sequence Ontology Feature Annotation</seq_ontology> + + <about id="species_variables"> + The golden_path, seq_ontology and species variables are critical. + See bulkfiles_template.xml for further documentation. + </about> + + <valid>0</valid> <!-- set 1 to skip db checks of above variables --> <fileset_override *************** *** 84,91 **** </featdump> ! <!-- feature sets to make fasta bulk files --> <featset>chromosome</featset> <featset>gene</featset> - <!-- <featset>CDS</featset> --> <featset>CDS_translation</featset> <featset>ncRNA</featset> --- 99,105 ---- </featdump> ! <!-- feature sets to make fasta bulk files ; see sgdfeatconf --> <featset>chromosome</featset> <featset>gene</featset> <featset>CDS_translation</featset> <featset>ncRNA</featset> *************** *** 93,97 **** <featset>gene_extended2000</featset> <featset>intergenic</featset> ! <!-- <featset>translation</featset> // none in sgdlite; make from CDS ? --> <!-- feature sets to make blast indices --> --- 107,142 ---- <featset>gene_extended2000</featset> <featset>intergenic</featset> ! ! <featmap id="CDS_translation" ! types="CDS gene" ! typelabel="protein protein" ! dotranslate="1" ! fromdb="0" ! /> ! <featmap id="gene" ! types="gene" ! /> ! <featmap id="chromosome" ! types="${golden_path}" ! fromdb="1" ! onlydb="1" ! /> ! <featmap id="transposon" ! types="transposable_element" ! typelabel="transposable_element" ! /> ! <featmap name="ncRNA" ! types="ncRNA snRNA snoRNA rRNA tRNA" ! /> ! <featmap id="gene_extended2000" ! types="gene" ! typelabel="gene_ex2000" ! subrange="-2000..2000" ! /> ! <featmap id="intergenic" ! types="gene" ! typelabel="intergenic" ! method="between" ! /> <!-- feature sets to make blast indices --> Index: bulkfiles_template.xml =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/bulkfiles_template.xml,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** bulkfiles_template.xml 28 Dec 2005 02:22:07 -0000 1.1 --- bulkfiles_template.xml 15 Oct 2007 16:19:28 -0000 1.2 *************** *** 2,9 **** name="bulkfiles" relid="1" ! date="20060102" ! make_current="0" > <about id="bulkfiles_template"><![CDATA[ This is an example main configuration file for GMODTools bulkfiles --- 2,11 ---- name="bulkfiles" relid="1" ! date="20071014" ! make_current="1" > + <title>Chado DB Data Release</title> + <about id="bulkfiles_template"><![CDATA[ This is an example main configuration file for GMODTools bulkfiles *************** *** 57,89 **** ! <title>Chado DB Data Release</title> ! ! <doc id="Release.txt"><![CDATA[ ! Bulk output for Chado genome databases ! Database: ${title} ! Species: ${species} ! Release: ${rel}, dated ${date} as ${relfull} ! ! See release notes at <a href="${release_url}"> ${release_url}</a> ! ]]></doc> ! ! <about id="doc"> ! doc tags are generally printed to files ! id= name, file name unless path given ! path= path to output file. ! A few common tags can be used as inserted ${variables} ! </about> ! ! <release id="1" ! rel="rel1" ! dbname="chado_spp_2" ! date="20040519" ! relfull="myspecies_release1" ! release_url="/genome/${species}/myspecies_release1.html" ! /> ! <release id="2" rel="rel2" dbname="chado_spp_1" date="20051020" ! relfull="myspecies_release2" ! release_url="/genome/${species}/myspecies_release2.html" ! /> <about id="release"><![CDATA[ release tags (an ARRAY) will specify release info: --- 59,67 ---- ! <release id="1" dbname="chado1" date="20040519" /> ! <release id="2" dbname="chado2" date="20051020" /> ! <relfull>${org}_${release_date}</relfull> <!-- or ${org}_release${release_id} --> ! <release_url>/genome/${species}/release${release_id}.html</release_url> ! <about id="release"><![CDATA[ release tags (an ARRAY) will specify release info: *************** *** 97,111 **** <!-- see organisms.xml; add your species there --> <org>scer</org> <species>Saccharomyces_cerevisiae</species> <!-- see chadofeatsql ENV_default --> ! <golden_path>'chromosome'</golden_path> <featureprops>'Note','orf_classification'</featureprops> - <seq_ontology>Sequence Ontology Feature Annotation</seq_ontology> <about id="species-genome"> ! These values of org, species,golden_path,featureprops are some ! common species-genome specific options. org = short species id. golden_path = what highest level of genome feature is (a SO term), --- 75,107 ---- + <valid>0</valid> + <about id="valid">Set valid=1 to skip seq_ontology, other variable validation</valid> + + <seq_ontology>sequence</seq_ontology> + + <about id="seq_ontology"> + seq_ontology is the name in CV table for the + sequence feature cvterm set, including gene, exon, + chromosome. It varies depending on choices used to + install sequence CV terms in your Chado database. + 'seq_ontology' and 'golden_path' are critical parameters + that must match cvterm values used for feature entries. + See chadofeatsql.xml for more details. + Common alternate values are 'sequence', + 'Sequence Ontology Feature Annotation', 'SO', 'SOFA'. + </about> + <!-- see organisms.xml; add your species there --> <org>scer</org> <species>Saccharomyces_cerevisiae</species> + <!-- see chadofeatsql ENV_default --> ! <golden_path>'chromosome','supercontig'</golden_path> ! <featureprops>'Note','orf_classification'</featureprops> <about id="species-genome"> ! These values of org, species,golden_path,featureprops ! are example species-genome specific options. org = short species id. golden_path = what highest level of genome feature is (a SO term), *************** *** 114,117 **** --- 110,116 ---- </about> + <!-- 0710: this removes per-chromosome/scaffold/. files, not desired but for + well assembled genomes --> + <no_csomesplit>1</no_csomesplit> <featdump Index: featuresets.xml =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/featuresets.xml,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** featuresets.xml 12 Jan 2006 06:03:49 -0000 1.5 --- featuresets.xml 15 Oct 2007 16:19:28 -0000 1.6 *************** *** 32,37 **** </about> ! <!-- feature sets to make fasta bulk files ! --> <featset>gene</featset> <!-- <featset>mRNA</featset> // duplicates transcript --> --- 32,36 ---- </about> ! <!-- feature sets to make fasta bulk files --> <featset>gene</featset> <!-- <featset>mRNA</featset> // duplicates transcript --> *************** *** 39,42 **** --- 38,42 ---- <featset>CDS</featset> <!-- dna of protein coding sequence --> <featset>translation</featset> <!-- protein aminos --> + <!-- <featset>CDS_translation</featset> --> <!-- alternate, create protein from dna --> <!-- <featset>tRNA</featset> --> <!-- <featset>miscRNA</featset> --> *************** *** 45,78 **** <featset>pseudogene</featset> <featset>gene_extended2000</featset> ! <featset>five_prime_UTR</featset> ! <featset>three_prime_UTR</featset> ! <featset>intron</featset> <featset>intergenic</featset> - <featset>syntenic_region</featset> ! <featset>scaffold</featset> <!-- see below confusion --> ! <featset>chromosome</featset> ! ! <!-- <featset>EST</featset> // if wanted; reagent seqs not on chromosomes --> ! <featmap name="gene" get_id="1" /> ! <featmap name="five_prime_UTR" add_id="gene" /> ! <featmap name="three_prime_UTR" add_id="gene" /> ! <featmap name="intron" add_id="gene" /> ! <featmap name="mRNA" add_id="gene" /> ! <featmap name="CDS" add_id="gene" /> ! <featmap ! name="translation" types="CDS" typelabel="protein" fromdb="1" /> ! <featmap ! id="CDS_translation" types="CDS" typelabel="protein" dotranslate="1" ! fromdb="0" /> <!-- feb05: problems here with mRNA vs ncRNA snRNA snoRNA rRNA tRNA --- 45,77 ---- <featset>pseudogene</featset> <featset>gene_extended2000</featset> ! <!-- <featset>five_prime_UTR</featset> --> ! <!-- <featset>three_prime_UTR</featset> --> ! <!-- <featset>intron</featset> --> <featset>intergenic</featset> <featset>syntenic_region</featset> ! <!-- <featset>scaffold</featset> --> <!-- see below confusion --> ! <featset>chromosome</featset> <!-- stands for all ${golden_path} --> ! <!-- <featset>EST</featset> --> <!-- if wanted; reagent seqs not on chromosomes --> ! <featmap id="gene" get_id="1" /> ! <featmap id="five_prime_UTR" add_id="gene" /> ! <featmap id="three_prime_UTR" add_id="gene" /> ! <featmap id="intron" add_id="gene" /> ! <featmap id="mRNA" add_id="gene" /> ! <featmap id="CDS" add_id="gene" /> ! <featmap id="translation" types="CDS" typelabel="protein" fromdb="1" /> ! <featmap id="CDS_translation" types="CDS" typelabel="protein" dotranslate="1" ! fromdb="1" ! /> ! <featmap id="gene" ! types="gene" /> <!-- feb05: problems here with mRNA vs ncRNA snRNA snoRNA rRNA tRNA *************** *** 80,117 **** but for gff, other public files, need to use real RNA type. --> ! <featmap ! name="transcript" types="mRNA" typelabel="transcript" fromdb="1" /> ! <featmap ! name="transposon" types="transposable_element" typelabel="transposable_element" /> ! <featmap ! name="miscRNA" types="ncRNA snRNA snoRNA rRNA miRNA" /> <!-- this variant includes all non-prot-coding rnas --> ! <featmap ! name="ncRNA" types="ncRNA snRNA snoRNA rRNA tRNA miRNA" /> ! <featmap ! name="gene_extended2000" types="gene" typelabel="gene_ex2000" subrange="-2000..2000" /> ! <featmap ! name="intergenic" types="gene" typelabel="intergenic" method="between" /> ! <featmap ! name="EST" types="EST" fromdb="1" --- 79,109 ---- but for gff, other public files, need to use real RNA type. --> ! <featmap name="transcript" types="mRNA" typelabel="transcript" fromdb="1" /> ! <featmap name="transposon" types="transposable_element" typelabel="transposable_element" /> ! <featmap id="miscRNA" types="ncRNA snRNA snoRNA rRNA miRNA" /> <!-- this variant includes all non-prot-coding rnas --> ! <featmap id="ncRNA" types="ncRNA snRNA snoRNA rRNA tRNA miRNA" /> ! <featmap id="gene_extended2000" types="gene" typelabel="gene_ex2000" subrange="-2000..2000" /> ! <featmap id="intergenic" types="gene" typelabel="intergenic" method="between" /> ! <featmap id="EST" types="EST" fromdb="1" *************** *** 119,126 **** /> ! <!-- many names used for this: golden_path_region may be alternate, or supercontig --> ! <featmap ! name="scaffold" alt_name0 = "golden_path_region" alt_name1 = "golden_path_fragment" --- 111,126 ---- /> + <!-- 200710: no_csomesplit config --> + <featmap id="chromosome" + types="${golden_path}" + fromdb="1" + onlydb="1" + /> ! <!-- many names used for this: ! golden_path_region may be alternate, or supercontig ! // use above chromosome, type=${golden_path} variable ! --> ! <featmap id="scaffold" alt_name0 = "golden_path_region" alt_name1 = "golden_path_fragment" Index: organisms.xml =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/organisms.xml,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** organisms.xml 28 Dec 2005 02:22:07 -0000 1.2 --- organisms.xml 15 Oct 2007 16:19:28 -0000 1.3 *************** *** 5,8 **** --- 5,13 ---- > + <!-- perl regex for making file-name abbreviation from genus_species --> + <species_short_pattern>^(\w)[^_]*_(\w{1,3})</species_short_pattern> + <!-- alternate .. --> + <species_short_pattern6>^(\w{1,3})[^_]*_(\w{1,3})</species_short_pattern6> + <about> Table of organism species, abbreviation-id values. *************** *** 13,19 **** </about> - <organism id="dpse" species="Drosophila_pseudoobscura" /> - <organism id="dmel" species="Drosophila_melanogaster" /> - <organism id="agam" species="Anopheles_gambiae" /> <organism id="amel" species="Apis_mellifera" /> --- 18,21 ---- *************** *** 27,32 **** --- 29,36 ---- <organism id="dgri" species="Drosophila_grimshawi" /> <organism id="dmag" species="Daphnia_magna" /> + <organism id="dmel" species="Drosophila_melanogaster" /> <organism id="dmoj" species="Drosophila_mojavensis" /> <organism id="dper" species="Drosophila_persimilis" /> + <organism id="dpse" species="Drosophila_pseudoobscura" /> <organism id="dpul" species="Daphnia_pulex" /> <organism id="drer" species="Danio_rerio" /> Index: site_defaults.xml =================================================================== RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/site_defaults.xml,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** site_defaults.xml 28 Dec 2005 02:22:07 -0000 1.1 --- site_defaults.xml 15 Oct 2007 16:19:28 -0000 1.2 *************** *** 47,55 **** </about> <include>organisms</include> <about id="organisms" > ! list of species and their abbreviations. Include your ! species . </about> --- 47,78 ---- </about> + <doc id="Release.txt"><![CDATA[ + Bulk output for Chado genome databases + Database: ${title} + Species: ${species} + Release: ${release_id}, dated ${release_date} as ${relfull} + + See release notes at <a href="${release_url}"> ${release_url}</a> + ]]></doc> + + <about id="doc"> + doc tags are generally printed to files + id= name, file name unless path given + path= path to output file. + A few common tags can be used as inserted ${variables} + </about> + + <relfull>${org}_${release_date}</relfull> <!-- or ${org}_release${release_id} --> + <release_url>/genome/${species}/release${release_id}.html</release_url> + + <newuser>1</newuser> + <about id="newuser"> + This flag turns on extra help and checks. Set 0 if you are tired + of seeing them, or the time they take. + </about> <include>organisms</include> <about id="organisms" > ! List of species and their abbreviations. Include your species. </about> |