[Gmod-schema-cmts] schema/GMODTools/conf/bulkfiles chadogenepagesql.xml, NONE, 1.1 bulkfiles_templa

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Update of /cvsroot/gmod/schema/GMODTools/conf/bulkfiles
In directory sc8-pr-cvs2.sourceforge.net:/tmp/cvs-serv29196/conf/bulkfiles

Modified Files:
	bulkfiles_template.xml featuresets.xml organisms.xml 
	sgdbulk.xml site_defaults.xml 
Added Files:
	chadogenepagesql.xml 
Log Message:
no_csomesplit change for genomes with many scaffolds; validate chado variables; config updates

--- NEW FILE: chadogenepagesql.xml ---
<opt
  name="chadogenepagesql" 
  date="20060723"
  >

  <title>Chado GenePage SQL</title>
  
  <ENV_default
    seq_ontology="Sequence Ontology Feature Annotation"
    golden_path="'chromosome', 'chromosome_arm', 'ultra_scaffold'"
    featureprops="'Note','cyto_range','gbunit'"
    species="Unknown_species"
    unknown_chr="U"
    analysis_where_clause=""
    est_where_clause=""
  />


  <about>
    This is GMOD Chado SQL for gene-page output tables; i.e.,
    all the fields relevant to a gene feature needed to create useful
    gene page web and xml reports.  May need several variants
    (brief, full, tuned to each project's use of chado tables to
    store data relevant to individual genes.  
    
    Also want other main non-gene feature output SQL (e.g. transposons)
    
    See  GMODTools/conf/bulkfiles/chadofeatsql.xml
    Use with GMOD genepages. Should be part of chadofeatsql.sql or not?  
    
  </about>
  
  
  <feature_sql id="v_genepage1" type="view">

-- add dbxrefprop output of all dbxref  (type_id, value, rank?)
-- add all *prop outputs ? (have featureprop)
-- analysisprop.sql                   feature_cvtermprop.sql             featureprop.sql
-- cvtermprop.sql                     feature_relationshipprop.sql       featureprop_pub.sql
-- dbxrefprop.sql                     feature_relationshipprop_pub.sql   organismprop.sql
<!--
select v.*
from feature f join v_genepage1 as v using(feature_id)
where f.organism_id = 10 and f.type_id = 778 
order by f.feature_id,v.field

// how do we get overlapped analysisfeats included with main feat?
select f.name, feature_overlaps(f.feature_id)
from feature f
where  f.feature_id = 146;
 SWE1 | (146,,10,SWE1,SWE1,,,,778,f,f,"2007-03-22 00:07:50.978518","2007-03-22 00:07:50.978518")
 SWE1 | (147,,10,SWE1.t01,SWE1.t01,,,,308,f,f,"2007-03-22 00:07:50.978518","2007-03-22 00:07:50.978518")
 SWE1 | (149,,10,exon-auto149,auto149,,,,221,f,f,"2007-03-22 00:07:50.978518","2007-03-22 00:07:50.978518")

-->

  <sql><![CDATA[
CREATE OR REPLACE VIEW v_genepage1 
  (feature_id, field, value)
AS
    SELECT feature_id AS feature_id, 'Name' as field, name as value FROM feature
  UNION ALL
    SELECT feature_id AS feature_id, 'uniquename' as field, uniquename as value FROM feature 
  UNION ALL
    SELECT feature_id AS feature_id, 'seqlen' as field, text('seqlen') as value FROM feature  
  UNION ALL
    SELECT f.feature_id AS feature_id, 'type' as field, c.name as value 
  FROM feature f, cvterm c  WHERE f.type_id = c.cvterm_id 
UNION ALL
    SELECT f.feature_id AS feature_id, 'organism' as field, o.abbreviation as value  
  FROM feature f, organism o WHERE  f.organism_id = o.organism_id 
  
  UNION ALL
    SELECT fs.feature_id AS feature_id,  
      CASE WHEN fs.is_current IS FALSE THEN 'Synonym_2nd' ELSE 'Synonym' END AS field,
    s.name as value 
  FROM  feature_synonym fs, synonym s 
  WHERE fs.synonym_id = s.synonym_id  
  
  UNION ALL
    SELECT f.feature_id AS feature_id, 'Dbxref' as field, gd.name||':'||gx.accession as value
  FROM   feature f, db gd, dbxref gx
  WHERE  f.dbxref_id = gx.dbxref_id and gx.db_id = gd.db_id  
  
  UNION ALL
    SELECT fs.feature_id AS feature_id,  
      CASE WHEN fs.is_current IS FALSE THEN 'Dbxref obsolete' ELSE 'Dbxref 2' END AS field, 
      (d.name || ':' || s.accession)::text AS value
    FROM  feature_dbxref fs, dbxref s, db d
    WHERE fs.dbxref_id = s.dbxref_id and s.db_id = d.db_id
  
  UNION ALL
    SELECT fc.feature_id AS feature_id, c.name AS field, 
          substr(cv.name,1,40) || '; '|| dx.accession AS value
  FROM  feature_cvterm fc, cvterm cv, cv c, dbxref dx
  WHERE fc.cvterm_id = cv.cvterm_id and cv.cv_id = c.cv_id  
     and cv.dbxref_id = dx.dbxref_id
  
  UNION ALL
    SELECT fp.feature_id AS feature_id, cv.name AS field, fp.value AS value
    FROM  featureprop fp, cvterm cv
    WHERE fp.type_id = cv.cvterm_id  
  
  UNION ALL
    SELECT fl.feature_id AS feature_id, 'location' as field, 
        chr.uniquename ||':'|| cast( fl.fmin+1 as text) ||'..'|| cast( fl.fmax as text)
        || CASE 
          WHEN fl.strand IS NULL THEN ' '
          WHEN fl.strand < 0 THEN ' [-]'
          ELSE ' [+]'
          END AS value
    FROM   featureloc fl, feature chr
    WHERE  fl.srcfeature_id = chr.feature_id 
  
  -- here if we select on main features (genes) need added join thru  location OVERLAP *
  UNION ALL
    SELECT af.feature_id AS feature_id,   
     'an:' ||  
     CASE 
        WHEN a.name IS NOT NULL THEN a.name
        WHEN a.sourcename IS NOT NULL THEN (a.program || '.' || a.sourcename)::text
        ELSE a.program
      END  AS field,
      CASE  
        WHEN af.rawscore IS NOT NULL THEN cast(af.rawscore as text)
        WHEN af.normscore  IS NOT NULL  THEN cast(af.normscore  as text)
        WHEN af.significance  IS NOT NULL THEN cast(af.significance as text)
        ELSE cast(af.identity  as text)
      END  AS value 
    FROM   analysisfeature af, analysis a
    WHERE  af.analysis_id = a.analysis_id 

;
GRANT SELECT ON v_genepage1 TO PUBLIC;

  
  ]]></sql>
  </feature_sql>


  <feature_sql id="v_analysis_cogs" type="view">
  <sql><![CDATA[
  -- special case analysis views
  -- replace these special analysis cases with general analysisfeature dump !??
CREATE OR REPLACE VIEW v_analysis_cogs
    (feature_id, field, value)
  AS
  SELECT af.feature_id, 'COG' as field, ('euCOG:'||af.normscore) as value
  FROM   analysisfeature af, analysis a
  WHERE  af.analysis_id = a.analysis_id and a.program = 'orthomcl' 
  
UNION ALL
  SELECT af.feature_id, ('euCOG:'||af.normscore) as field, 
         ((select name from db where db_id = (select db_id from dbxref where dbxref_id = fo.dbxref_id))
           ||':'||fo.uniquename) 
         as value
  FROM   analysisfeature af, analysisfeature afo, feature fo, analysis a
  WHERE  af.analysis_id = a.analysis_id and a.program = 'orthomcl' 
         and afo.normscore = af.normscore and afo.analysis_id = a.analysis_id
         and fo.feature_id = afo.feature_id and fo.feature_id <> af.feature_id

UNION ALL
  SELECT fd.feature_id, 'COG' AS field, (gd.name||':'||gx.accession) as value
  FROM   feature_dbxref fd, db gd, dbxref gx
  WHERE  fd.dbxref_id = gx.dbxref_id and gx.db_id = gd.db_id  -- fd.feature_id = t.id 
         and gd.name = 'IPC'
         
UNION ALL
  SELECT fd.feature_id, (gd.name||':'||gx.accession) AS field,  
         ((select name from db where db_id = (select db_id from dbxref where dbxref_id = fo.dbxref_id))
           ||':'||fo.uniquename) 
         as value
  FROM   feature_dbxref fd, feature_dbxref fdo, feature fo, db gd, dbxref gx
  WHERE  fd.dbxref_id = gx.dbxref_id and gx.db_id = gd.db_id  -- fd.feature_id = t.id
         and gd.name = 'IPC'
         and fdo.dbxref_id = gx.dbxref_id 
         and fdo.feature_id = fo.feature_id and fdo.feature_id <> fd.feature_id  

UNION ALL
  SELECT fd.feature_id, 
    CASE WHEN fd.is_current IS FALSE THEN 'dbxref_2nd' ELSE 'dbxref' END AS field, 
    (gd.name||':'||gx.accession) as value
  FROM   feature_dbxref fd, db gd, dbxref gx
  WHERE  fd.dbxref_id = gx.dbxref_id and gx.db_id = gd.db_id  -- fd.feature_id = t.id
         and gd.name <> 'IPC'

;
GRANT SELECT ON v_analysis_cogs TO PUBLIC;


  ]]></sql>
  </feature_sql>


  <feature_sql id="otherviews" type="view">
  <sql><![CDATA[
-- attr view for regular features which may have parent features (exons)
CREATE OR REPLACE VIEW gffattr_gmodel (
    feature_id, type, attribute
) AS
  SELECT feature_id,  
    CASE WHEN fs.is_current IS FALSE THEN 'dbxref_2nd' ELSE 'dbxref' END AS type, 
    d.name || ':' || s.accession AS attribute
  FROM dbxref s, feature_dbxref fs, db d
  WHERE fs.dbxref_id = s.dbxref_id and s.db_id = d.db_id

UNION ALL
  SELECT feature_id, cv.name AS type, fp.value AS attribute
  FROM featureprop fp, cvterm cv
  WHERE fp.type_id = cv.cvterm_id
    and (cv.name in ( ${featureprops} ))
    -- keep this restriction - other props not useful here: comments, sp_comment, owner ...
    -- mar05: add 'putative_ortholog_of' cv for dpse/..; this is in feature_relationship

UNION ALL
  SELECT feature_id,
    CASE WHEN fs.is_current IS FALSE THEN 'synonym_2nd' ELSE 'synonym' END AS type, 
    s.synonym_sgml AS attribute
  FROM feature_synonym fs, synonym s
  WHERE fs.synonym_id = s.synonym_id and fs.is_internal IS FALSE

UNION ALL
  -- add parent feat ids for exons, etc.
  -- putative_ortholog_of=dmel/gene:CG31648
  SELECT pk.subject_id AS feature_id, cv.name AS type, 
    'FlyBase:' || fo.uniquename  AS attribute
  FROM feature_relationship pk, feature fo, cvterm cv
  WHERE 
    cv.name = 'putative_ortholog_of' and pk.type_id = cv.cvterm_id 
    and pk.object_id = fo.feature_id
    
UNION ALL
  -- add parent feat ids for exons, etc.
  SELECT pk.subject_id AS feature_id, 'parent_oid' AS type, 
    CASE  
      WHEN pk.rank IS NULL THEN text(pk.object_id)
      ELSE pk.object_id || ':' || pk.rank
    END 
  FROM feature_relationship pk
;
GRANT SELECT ON gffattr_gmodel TO PUBLIC;


-- seqlen/featloc-length view
-- CREATE OR REPLACE VIEW feature_length ( feature_id, flength ) AS
--   SELECT feature_id,  
--     CASE WHEN seqlen IS NULL THEN (fl.fmax - fl.fmin) ELSE  fs.seqlen END AS flength, 
--   FROM featureloc fl
--   WHERE fl.feature_id = feature_id 
-- ;

-- attr view for match features
CREATE OR REPLACE VIEW gffattr_match (
    feature_id,  type,  attribute
) AS
  SELECT feature_id,  
    CASE WHEN fs.is_current IS FALSE THEN 'dbxref_2nd' ELSE 'dbxref' END AS type, 
    d.name || ':' || s.accession AS attribute
  FROM dbxref s, feature_dbxref fs, db d
  WHERE fs.dbxref_id = s.dbxref_id and s.db_id = d.db_id

UNION ALL
  SELECT feature_id, cv.name AS type, fp.value AS attribute
  FROM featureprop fp, cvterm cv
  WHERE fp.type_id = cv.cvterm_id
    and (cv.name in ( ${featureprops} )) 

UNION ALL
  SELECT feature_id,
    CASE WHEN fs.is_current IS FALSE THEN 'synonym_2nd' ELSE 'synonym' END AS type, 
    s.synonym_sgml AS attribute
  FROM feature_synonym fs, synonym s
  WHERE fs.synonym_id = s.synonym_id and fs.is_internal IS FALSE
;
GRANT SELECT ON gffattr_match TO PUBLIC;

-- attrib view for cross-species feats (syntenic_region, orthology)
CREATE OR REPLACE VIEW gffattr_synteny (
    feature_id, type,  attribute
) AS
  --  parent feat ids for source supercontigs, etc.
  SELECT pk.subject_id, text('parent_oid') as type, 
    CASE  
      WHEN pk.rank IS NULL THEN text(pk.object_id)
      ELSE pk.object_id || ':' || pk.rank
    END 
  FROM feature_relationship pk
;
GRANT SELECT ON gffattr_synteny TO PUBLIC;

-- use this one instead of above
CREATE OR REPLACE VIEW gffattr_synt2 (
    feature_id,  type,  attribute
) AS
  SELECT feature_id, text('to_species') AS type, text(targ.organism_id) AS attribute
  FROM feature targ

-- see above orthofix.pl: add case when this select is missing, use ortho featloc
-- to find equal gene feature and putative_ortholog_of 
UNION ALL
  SELECT feature_id, text('to_name') AS type, 
    CASE WHEN targ.uniquename = targ.name THEN targ.name 
    ELSE targ.name || ',' || targ.uniquename 
    END AS attribute 
  FROM feature targ
  WHERE NOT( targ.type_id IN (
    select cvterm_id from cvterm 
    where name in ( ${golden_path} )   
    ) )
;
GRANT SELECT ON gffattr_synt2 TO PUBLIC;


-- for analysis features
-- problem where w/ some analysis features - promotor, transposon
-- ? need to restrict armcv to cv_id = SO id - e.g. find chromosome in 4 cv's
CREATE OR REPLACE VIEW gffatts_anfloc (
    feature_id, arm, fmin, fmax, strand, organism_id
  ) AS
  SELECT 
    armloc.feature_id,  
    armft.uniquename as arm,
    armloc.fmin, armloc.fmax, armloc.strand, armft.organism_id
  FROM  feature armft, featureloc armloc, cvterm armcv, cv socv
  WHERE 
    armft.type_id = armcv.cvterm_id
    and armcv.name in ( ${golden_path} ) 
    and armcv.cv_id = socv.cv_id and socv.name = '${seq_ontology}'
    and armft.feature_id = armloc.srcfeature_id
;
GRANT SELECT ON gffatts_anfloc TO PUBLIC;

-- for analysis features
CREATE OR REPLACE VIEW gffatts_evid (
    feature_id, type, attribute
) AS
  SELECT pk.subject_id, text('parent_oid'), text(pk.object_id)
  FROM feature_relationship pk
;
GRANT SELECT ON gffatts_evid TO PUBLIC;

  ]]></sql>
  </feature_sql>
  
</opt>
Index: sgdbulk.xml
===================================================================
RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/sgdbulk.xml,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** sgdbulk.xml	12 Jan 2006 06:03:49 -0000	1.2
--- sgdbulk.xml	15 Oct 2007 16:19:28 -0000	1.3
***************
*** 1,9 ****
  <opt
    name="sgdbulk"
!   relid="5"
!   date="20051129"
    make_current="1"
    >
    
    <about id="sgdbulk"><![CDATA[
     See bulkfiles_template.xml for further documentation. 
--- 1,11 ----
  <opt
    name="sgdbulk"
!   relid="7"
    make_current="1"
    >
    
+   <!-- 07oct: test bulkfiles v1.1  with no_csomesplit -->
+   <no_csomesplit>1</no_csomesplit>
+   
    <about id="sgdbulk"><![CDATA[
     See bulkfiles_template.xml for further documentation. 
***************
*** 19,23 ****
    Database: ${title} 
    Species:  ${species}
!   Release:  ${rel}, dated ${date} as ${relfull}
    ]]></doc>
    
--- 21,27 ----
    Database: ${title} 
    Species:  ${species}
!   Release:  ${release_id}, dated ${release_date} as ${relfull}
!   
!   See release notes at <a href="${release_url}"> ${release_url}</a>
    ]]></doc>
    
***************
*** 41,55 ****
      |& mail -s gmodtools-debug gil...@in... 
  
    ]]></doc>
    
    
!   <release id="5" rel="sgdr5"  dbname="sgdlite" date="20050823"
!     relfull="sgdlite_2005_08_23"
!     release_url="/genome/Saccharomyces_cerevisiae/sgdlite-release5.html"
!     />
!   <release id="1" rel="sgdr1"  dbname="sgdlite_20040519" date="20040519"
!     relfull="sgdlite_20040519"
!     release_url="/genome/Saccharomyces_cerevisiae/sgdlite-release1.html"
!     />
  
    <org>scer</org>
--- 45,60 ----
      |& mail -s gmodtools-debug gil...@in... 
  
+   Find worked examples from Bulkfiles SQL use now at
+     http://www.gmod.org/Sample_Chado_SQL
+ 
    ]]></doc>
    
    
!   <release id="7" dbname="sgdlite" date="20070531"/>
!   <release id="5" dbname="sgdlite_2005" date="20050823"/>
!   <release id="1" dbname="sgdlite_2004" date="20040519"/>
! 
!   <relfull>${org}_${release_date}</relfull> 
!   <release_url>/genome/${species}/release${release_id}.html</release_url>
  
    <org>scer</org>
***************
*** 59,62 ****
--- 64,77 ----
    <featureprops>'Note','orf_classification'</featureprops>
    <GFF_source>SGD</GFF_source>
+ 
+   <!-- default seq_ontology now is sequence -->
+   <seq_ontology>Sequence Ontology Feature Annotation</seq_ontology>
+ 
+   <about id="species_variables">
+   The  golden_path, seq_ontology and species variables are critical.
+   See bulkfiles_template.xml for further documentation.
+   </about>
+   
+   <valid>0</valid> <!-- set 1 to skip db checks of above variables -->
    
    <fileset_override
***************
*** 84,91 ****
    </featdump>
    
!   <!-- feature sets to make fasta bulk files -->
    <featset>chromosome</featset>
    <featset>gene</featset>
-   <!-- <featset>CDS</featset> -->
    <featset>CDS_translation</featset>
    <featset>ncRNA</featset>
--- 99,105 ----
    </featdump>
    
!   <!-- feature sets to make fasta bulk files ; see sgdfeatconf -->
    <featset>chromosome</featset>
    <featset>gene</featset>
    <featset>CDS_translation</featset>
    <featset>ncRNA</featset>
***************
*** 93,97 ****
    <featset>gene_extended2000</featset>
    <featset>intergenic</featset>
! <!--   <featset>translation</featset> // none in sgdlite; make from CDS ? -->
  
    <!-- feature sets to make blast indices -->
--- 107,142 ----
    <featset>gene_extended2000</featset>
    <featset>intergenic</featset>
! 
!   <featmap id="CDS_translation"
!     types="CDS gene"
!     typelabel="protein protein"
!     dotranslate="1"
!     fromdb="0"
!     />
!   <featmap id="gene"
!     types="gene"
!     />
!   <featmap id="chromosome"
!     types="${golden_path}"
!     fromdb="1"
!     onlydb="1"
!     />
!   <featmap id="transposon"
!     types="transposable_element"
!     typelabel="transposable_element"
!     />
!   <featmap name="ncRNA"
!     types="ncRNA snRNA snoRNA rRNA tRNA"
!     />
!   <featmap id="gene_extended2000"
!     types="gene"
!     typelabel="gene_ex2000"
!     subrange="-2000..2000"
!     />
!   <featmap id="intergenic"
!     types="gene"
!     typelabel="intergenic"
!     method="between"
!     />
  
    <!-- feature sets to make blast indices -->

Index: bulkfiles_template.xml
===================================================================
RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/bulkfiles_template.xml,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** bulkfiles_template.xml	28 Dec 2005 02:22:07 -0000	1.1
--- bulkfiles_template.xml	15 Oct 2007 16:19:28 -0000	1.2
***************
*** 2,9 ****
    name="bulkfiles"
    relid="1"
!   date="20060102"
!   make_current="0"
    >
    
    <about id="bulkfiles_template"><![CDATA[
      This is an example main configuration file for GMODTools bulkfiles
--- 2,11 ----
    name="bulkfiles"
    relid="1"
!   date="20071014"
!   make_current="1"
    >
    
+   <title>Chado DB Data Release</title>
+   
    <about id="bulkfiles_template"><![CDATA[
      This is an example main configuration file for GMODTools bulkfiles
***************
*** 57,89 ****
    
  
!   <title>Chado DB Data Release</title>
!   
!   <doc id="Release.txt"><![CDATA[
!   Bulk output for Chado genome databases
!   Database: ${title}
!   Species:  ${species}
!   Release:  ${rel}, dated ${date} as ${relfull}
!   
!   See release notes at <a href="${release_url}"> ${release_url}</a>
!   ]]></doc>
!   
!   <about id="doc">
!     doc tags are generally printed to files
!     id= name, file name unless path given
!     path= path to output file.
!     A few common tags can be used as inserted ${variables}
!   </about>
!   
!   <release id="1" 
!     rel="rel1"  
!     dbname="chado_spp_2" 
!     date="20040519"
!     relfull="myspecies_release1"
!     release_url="/genome/${species}/myspecies_release1.html"
!     />
!   <release id="2" rel="rel2"  dbname="chado_spp_1" date="20051020"
!     relfull="myspecies_release2"
!     release_url="/genome/${species}/myspecies_release2.html"
!     />
    <about id="release"><![CDATA[
      release tags (an ARRAY) will specify release info:
--- 59,67 ----
    
  
!   <release id="1" dbname="chado1"  date="20040519" />
!   <release id="2" dbname="chado2"  date="20051020" />
!   <relfull>${org}_${release_date}</relfull> <!-- or ${org}_release${release_id} -->
!   <release_url>/genome/${species}/release${release_id}.html</release_url>
! 
    <about id="release"><![CDATA[
      release tags (an ARRAY) will specify release info:
***************
*** 97,111 ****
    
  
    <!-- see organisms.xml; add your species there -->
    <org>scer</org>
    <species>Saccharomyces_cerevisiae</species>
    <!-- see chadofeatsql ENV_default -->
!   <golden_path>'chromosome'</golden_path>
    <featureprops>'Note','orf_classification'</featureprops>
-   <seq_ontology>Sequence Ontology Feature Annotation</seq_ontology>
    
    <about id="species-genome">
!     These values of org, species,golden_path,featureprops are some
!     common species-genome specific options.
      org = short species id.
      golden_path = what highest level of genome feature is (a SO term), 
--- 75,107 ----
    
  
+   <valid>0</valid> 
+   <about id="valid">Set valid=1 to skip seq_ontology, other variable validation</valid>
+   
+   <seq_ontology>sequence</seq_ontology>
+   
+   <about id="seq_ontology">
+     seq_ontology is the name in CV table for the
+     sequence feature cvterm set, including gene, exon,
+     chromosome.  It varies depending on choices used to
+     install sequence CV terms in your Chado database.
+     'seq_ontology' and 'golden_path' are critical parameters
+     that must match cvterm values used for feature entries. 
+     See chadofeatsql.xml for more details.
+     Common alternate values are 'sequence', 
+     'Sequence Ontology Feature Annotation', 'SO', 'SOFA'.
+   </about>
+ 
    <!-- see organisms.xml; add your species there -->
    <org>scer</org>
    <species>Saccharomyces_cerevisiae</species>
+   
    <!-- see chadofeatsql ENV_default -->
!   <golden_path>'chromosome','supercontig'</golden_path>
! 
    <featureprops>'Note','orf_classification'</featureprops>
    
    <about id="species-genome">
!     These values of org, species,golden_path,featureprops 
!     are example species-genome specific options.
      org = short species id.
      golden_path = what highest level of genome feature is (a SO term), 
***************
*** 114,117 ****
--- 110,116 ----
    </about>
  
+   <!-- 0710: this removes per-chromosome/scaffold/. files, not desired but for
+         well assembled genomes -->
+   <no_csomesplit>1</no_csomesplit>
  
    <featdump

Index: featuresets.xml
===================================================================
RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/featuresets.xml,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** featuresets.xml	12 Jan 2006 06:03:49 -0000	1.5
--- featuresets.xml	15 Oct 2007 16:19:28 -0000	1.6
***************
*** 32,37 ****
    </about>
  
!   <!-- feature sets to make fasta bulk files 
!   -->
    <featset>gene</featset>
  <!--   <featset>mRNA</featset> // duplicates transcript -->
--- 32,36 ----
    </about>
  
!   <!-- feature sets to make fasta bulk files -->
    <featset>gene</featset>
  <!--   <featset>mRNA</featset> // duplicates transcript -->
***************
*** 39,42 ****
--- 38,42 ----
    <featset>CDS</featset>  <!-- dna of protein coding sequence -->
    <featset>translation</featset> <!-- protein aminos -->
+ <!--  <featset>CDS_translation</featset> --> <!-- alternate, create protein from dna -->
  <!--   <featset>tRNA</featset>  -->
  <!--   <featset>miscRNA</featset> -->
***************
*** 45,78 ****
    <featset>pseudogene</featset>
    <featset>gene_extended2000</featset>
!   <featset>five_prime_UTR</featset>
!   <featset>three_prime_UTR</featset>
!   <featset>intron</featset>
    <featset>intergenic</featset>
-   
    <featset>syntenic_region</featset> 
!   <featset>scaffold</featset> <!-- see below confusion -->
!   <featset>chromosome</featset>
! 
! <!--   <featset>EST</featset> // if wanted; reagent seqs not on chromosomes -->
    
!   <featmap name="gene" get_id="1" />
!   <featmap name="five_prime_UTR" add_id="gene" />
!   <featmap name="three_prime_UTR" add_id="gene" />
!   <featmap name="intron" add_id="gene" />
!   <featmap name="mRNA" add_id="gene" />
!   <featmap name="CDS" add_id="gene" />
  
!   <featmap
!     name="translation"
      types="CDS"
      typelabel="protein"
      fromdb="1"
      />
!   <featmap
!     id="CDS_translation"
      types="CDS"
      typelabel="protein"
      dotranslate="1"
!     fromdb="0"
      />
    <!-- feb05: problems here with mRNA vs ncRNA snRNA snoRNA rRNA tRNA 
--- 45,77 ----
    <featset>pseudogene</featset>
    <featset>gene_extended2000</featset>
! <!--   <featset>five_prime_UTR</featset> -->
! <!--   <featset>three_prime_UTR</featset> -->
! <!--   <featset>intron</featset> -->
    <featset>intergenic</featset>
    <featset>syntenic_region</featset> 
! <!--   <featset>scaffold</featset>  --> <!-- see below confusion -->
!   <featset>chromosome</featset>  <!-- stands for all ${golden_path} -->
! <!--   <featset>EST</featset> --> <!-- if wanted; reagent seqs not on chromosomes -->
    
!   <featmap id="gene" get_id="1" />
!   <featmap id="five_prime_UTR" add_id="gene" />
!   <featmap id="three_prime_UTR" add_id="gene" />
!   <featmap id="intron" add_id="gene" />
!   <featmap id="mRNA" add_id="gene" />
!   <featmap id="CDS" add_id="gene" />
  
!   <featmap id="translation"
      types="CDS"
      typelabel="protein"
      fromdb="1"
      />
!   <featmap id="CDS_translation"
      types="CDS"
      typelabel="protein"
      dotranslate="1"
!     fromdb="1"
!     />
!   <featmap id="gene"
!     types="gene"
      />
    <!-- feb05: problems here with mRNA vs ncRNA snRNA snoRNA rRNA tRNA 
***************
*** 80,117 ****
      but for gff, other public files, need to use real RNA type.
      -->
!   <featmap
!     name="transcript"
      types="mRNA"
      typelabel="transcript"
      fromdb="1"
      />
!   <featmap
!     name="transposon"
      types="transposable_element"
      typelabel="transposable_element"
      />
!   <featmap
!     name="miscRNA"
      types="ncRNA snRNA snoRNA rRNA miRNA" 
      />
      <!-- this variant includes all non-prot-coding rnas -->
!   <featmap
!     name="ncRNA"
      types="ncRNA snRNA snoRNA rRNA tRNA miRNA" 
      />
!   <featmap
!     name="gene_extended2000"
      types="gene"
      typelabel="gene_ex2000"
      subrange="-2000..2000"
      />
!   <featmap
!     name="intergenic"
      types="gene"
      typelabel="intergenic"
      method="between"
      />
!   <featmap
!     name="EST"
      types="EST"
      fromdb="1"
--- 79,109 ----
      but for gff, other public files, need to use real RNA type.
      -->
!   <featmap  name="transcript"
      types="mRNA"
      typelabel="transcript"
      fromdb="1"
      />
!   <featmap  name="transposon"
      types="transposable_element"
      typelabel="transposable_element"
      />
!   <featmap id="miscRNA"
      types="ncRNA snRNA snoRNA rRNA miRNA" 
      />
      <!-- this variant includes all non-prot-coding rnas -->
!   <featmap id="ncRNA"
      types="ncRNA snRNA snoRNA rRNA tRNA miRNA" 
      />
!   <featmap id="gene_extended2000"
      types="gene"
      typelabel="gene_ex2000"
      subrange="-2000..2000"
      />
!   <featmap id="intergenic"
      types="gene"
      typelabel="intergenic"
      method="between"
      />
!   <featmap id="EST"
      types="EST"
      fromdb="1"
***************
*** 119,126 ****
      />
  
  
!   <!-- many names used for this: golden_path_region may be alternate, or supercontig -->
!   <featmap
!     name="scaffold"   
      alt_name0 = "golden_path_region" 
      alt_name1 = "golden_path_fragment" 
--- 111,126 ----
      />
  
+   <!-- 200710: no_csomesplit config -->
+   <featmap id="chromosome"
+     types="${golden_path}"
+     fromdb="1"
+     onlydb="1"
+     />
  
!   <!-- many names used for this: 
!     golden_path_region may be alternate, or supercontig 
!     // use above chromosome, type=${golden_path} variable
!   -->
!   <featmap id="scaffold"   
      alt_name0 = "golden_path_region" 
      alt_name1 = "golden_path_fragment" 

Index: organisms.xml
===================================================================
RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/organisms.xml,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** organisms.xml	28 Dec 2005 02:22:07 -0000	1.2
--- organisms.xml	15 Oct 2007 16:19:28 -0000	1.3
***************
*** 5,8 ****
--- 5,13 ----
    >
  
+   <!-- perl regex for making file-name abbreviation from genus_species -->
+   <species_short_pattern>^(\w)[^_]*_(\w{1,3})</species_short_pattern>
+   <!-- alternate .. -->
+   <species_short_pattern6>^(\w{1,3})[^_]*_(\w{1,3})</species_short_pattern6>
+ 
    <about>
    Table of organism species, abbreviation-id values.
***************
*** 13,19 ****
    </about>
    
-   <organism id="dpse" species="Drosophila_pseudoobscura"  />
-   <organism id="dmel" species="Drosophila_melanogaster" />
- 
    <organism id="agam" species="Anopheles_gambiae" />
    <organism id="amel" species="Apis_mellifera" />
--- 18,21 ----
***************
*** 27,32 ****
--- 29,36 ----
    <organism id="dgri" species="Drosophila_grimshawi" />
    <organism id="dmag" species="Daphnia_magna" />
+   <organism id="dmel" species="Drosophila_melanogaster" />
    <organism id="dmoj" species="Drosophila_mojavensis" />
    <organism id="dper" species="Drosophila_persimilis" />
+   <organism id="dpse" species="Drosophila_pseudoobscura"  />
    <organism id="dpul" species="Daphnia_pulex" />
    <organism id="drer" species="Danio_rerio" />

Index: site_defaults.xml
===================================================================
RCS file: /cvsroot/gmod/schema/GMODTools/conf/bulkfiles/site_defaults.xml,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** site_defaults.xml	28 Dec 2005 02:22:07 -0000	1.1
--- site_defaults.xml	15 Oct 2007 16:19:28 -0000	1.2
***************
*** 47,55 ****
    </about>
    
    
    <include>organisms</include>
    <about id="organisms" >
!   list of species and their abbreviations. Include your
!   species .
    </about>
    
--- 47,78 ----
    </about>
    
+   <doc id="Release.txt"><![CDATA[
+   Bulk output for Chado genome databases
+   Database: ${title}
+   Species:  ${species}
+   Release:  ${release_id}, dated ${release_date} as ${relfull}
+   
+   See release notes at <a href="${release_url}"> ${release_url}</a>
+   ]]></doc>
+   
+   <about id="doc">
+     doc tags are generally printed to files
+     id= name, file name unless path given
+     path= path to output file.
+     A few common tags can be used as inserted ${variables}
+   </about>
+ 
+   <relfull>${org}_${release_date}</relfull> <!-- or ${org}_release${release_id} -->
+   <release_url>/genome/${species}/release${release_id}.html</release_url>
+ 
+   <newuser>1</newuser>
+   <about id="newuser">
+     This flag turns on extra help and checks.  Set 0 if you are tired
+     of seeing them, or the time they take.
+   </about>
    
    <include>organisms</include>
    <about id="organisms" >
!   List of species and their abbreviations. Include your species.
    </about>

[Gmod-schema-cmts] schema/GMODTools/conf/bulkfiles chadogenepagesql.xml, NONE, 1.1 bulkfiles_templa

[Gmod-schema-cmts] schema/GMODTools/conf/bulkfiles chadogenepagesql.xml, NONE, 1.1 bulkfiles_template.xml, 1.1, 1.2 featuresets.xml, 1.5, 1.6 organisms.xml, 1.2, 1.3 sgdbulk.xml, 1.2, 1.3 site_defaults.xml, 1.1, 1.2