Thread: [pygccxml-commit] SF.net SVN: pygccxml:[1629] sphinx
Brought to you by:
mbaas,
roman_yakovenko
From: <rom...@us...> - 2009-01-28 12:34:39
|
Revision: 1629 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1629&view=rev Author: roman_yakovenko Date: 2009-01-28 11:42:27 +0000 (Wed, 28 Jan 2009) Log Message: ----------- adding directory skeleton for sphinx Added Paths: ----------- sphinx/ sphinx/Makefile sphinx/__static/ sphinx/__templates/ sphinx/conf.py sphinx/index.rest Property changes on: sphinx ___________________________________________________________________ Added: svn:ignore + __build pydsc pygccxml pyplusplus Added: sphinx/Makefile =================================================================== --- sphinx/Makefile (rev 0) +++ sphinx/Makefile 2009-01-28 11:42:27 UTC (rev 1629) @@ -0,0 +1,75 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d __build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html web pickle htmlhelp latex changes linkcheck + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview over all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + +clean: + -rm -rf __build/* + +html: + mkdir -p __build/html __build/doctrees + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) __build/html + @echo + @echo "Build finished. The HTML pages are in __build/html." + +pickle: + mkdir -p __build/pickle __build/doctrees + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) __build/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +web: pickle + +json: + mkdir -p __build/json __build/doctrees + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) __build/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + mkdir -p __build/htmlhelp __build/doctrees + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) __build/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in __build/htmlhelp." + +latex: + mkdir -p __build/latex __build/doctrees + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) __build/latex + @echo + @echo "Build finished; the LaTeX files are in __build/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + mkdir -p __build/changes __build/doctrees + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) __build/changes + @echo + @echo "The overview file is in __build/changes." + +linkcheck: + mkdir -p __build/linkcheck __build/doctrees + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) __build/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in __build/linkcheck/output.txt." Added: sphinx/conf.py =================================================================== --- sphinx/conf.py (rev 0) +++ sphinx/conf.py 2009-01-28 11:42:27 UTC (rev 1629) @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +# +# Language Binding documentation build configuration file, created by +# sphinx-quickstart on Wed Jan 28 10:41:40 2009. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# The contents of this file are pickled, so don't put values in the namespace +# that aren't pickleable (module imports are okay, they're removed automatically). +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os, shutil + +# If your extensions are in another directory, add it here. If the directory +# is relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +#sys.path.append(os.path.abspath('.')) + +project_root = os.path.abspath('..') +doc_project_root = os.path.abspath('.') + +for entry in ( 'pydsc', 'pygccxml', 'pyplusplus' ): + target = os.path.join( doc_project_root, entry ) + source = os.path.join( project_root, entry + '_dev', 'docs' ) + if os.path.exists( target ): + shutil.rmtree( target ) + shutil.copytree( source, target, ignore=shutil.ignore_patterns( r'.svn', '*.pyc', 'apidocs', 'www_configuration.py' ) ) +shutil.copy( os.path.join( project_root, 'index.rest' ), doc_project_root ) + +# General configuration +# --------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['__templates'] + +# The suffix of source filenames. +source_suffix = '.rest' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Language Binding' +copyright = u'2009, Roman Yakovenko' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '1.1' +# The full version, including alpha/beta/rc tags. +release = '1.1' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = ['__build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + + +# Options for HTML output +# ----------------------- + +# The style sheet to use for HTML and HTML Help pages. A file of that name +# must exist either in Sphinx' static/ path, or in one of the custom paths +# given in html_static_path. +html_style = 'default.css' + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['__static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, the reST sources are included in the HTML build as _sources/<name>. +#html_copy_source = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'LanguageBindingdoc' + + +# Options for LaTeX output +# ------------------------ + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, document class [howto/manual]). +latex_documents = [ + ('index', 'LanguageBinding.tex', ur'Language Binding Documentation', + ur'Roman Yakovenko', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'http://docs.python.org/dev': None} Added: sphinx/index.rest =================================================================== --- sphinx/index.rest (rev 0) +++ sphinx/index.rest 2009-01-28 11:42:27 UTC (rev 1629) @@ -0,0 +1,96 @@ +=========================== +C++ Python language binding +=========================== + +.. contents:: Table of contents + +.. toctree:: + :maxdepth: 2 + +---------------- +pygccxml package +---------------- + +* Do you need to parse C++ code? +* Do you need to build code generator? +* Do you need to create UML diagram? + +`pygccxml`_ is the way to go! `Learn more`__. + +.. __: `pygccxml`_ + +------------ +Py++ package +------------ + +"I love deadlines. I love the whooshing noise they make as they go by." + -- Douglas Adams + +Meet your deadlines with powerful code generator engine - `Py++`_. +`Py++`_ package and `Boost.Python`_ library provide a complete solution for +interfacing Python and C++. `Learn more`_. + +*European Space Agency*, *Ogre*, *PyOpenSG* and many others `use`__ Py++. + +.. _`Learn more` : `Py++`_ + +.. __: ./pyplusplus/quotes.html + +--------------- +pyboost package +--------------- + +`Boost`_ provides free peer-reviewed portable C++ source libraries. `pyboost`_ +package export the following libraries to Python: + + * `Boost.Date_Time`_ - date time library designed to provide a basis for + performing efficient time calculations + * `Boost.CRC`_ - cyclic redundancy code computation objects + * `Boost.Rational`_ - rational number class + * `Boost.Random`_ - a complete system for random number generation + +Python bindings for `boost.graph`_ library is also available from +http://www.osl.iu.edu/~dgregor/bgl-python . + +------------- +pydsc package +------------- + +Documentation strings contain spelling errors? `Fix them in a minute`_. + +.. _`Fix them in a minute` : `pydsc`_ + +----------------- +pyeasybmp package +----------------- + +`EasyBMP`_ could be easier? Yes of course! Learn more about `EasyBMP Python bindings`_. + +.. _`EasyBMP Python bindings`: ./pyplusplus/examples/easybmp/easybmp.html + + +.. _`Boost.Python`: http://www.boost.org/libs/python/doc/index.html + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + + +.. _`pyboost` : ./pyplusplus/examples/boost/boost.html +.. _`boost.graph` : http://www.boost.org/libs/graph/doc/table_of_contents.html +.. _`boost.date_time` : http://boost.org/doc/html/date_time.html +.. _`boost.crc` : http://boost.org/libs/crc/index.html +.. _`boost.rational` : http://boost.org/libs/rational/index.html +.. _`boost.random` : http://boost.org/libs/random/index.html + +.. _`Boost`: http://boost.org/ +.. _`Python`: http://www.python.org +.. _`pygccxml`: ./pygccxml/pygccxml.html +.. _`Py++`: ./pyplusplus/pyplusplus.html +.. _`pydsc`: ./pydsc/pydsc.html +.. _`EasyBMP`: http://easybmp.sourceforge.net/ + +.. _`many others` : ./x.html This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rom...@us...> - 2009-01-28 18:25:50
|
Revision: 1632 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1632&view=rev Author: roman_yakovenko Date: 2009-01-28 18:25:43 +0000 (Wed, 28 Jan 2009) Log Message: ----------- sphinx Modified Paths: -------------- index.rest sphinx/conf.py Modified: index.rest =================================================================== --- index.rest 2009-01-28 14:42:41 UTC (rev 1631) +++ index.rest 2009-01-28 18:25:43 UTC (rev 1632) @@ -4,6 +4,11 @@ .. contents:: Table of contents +.. toctree:: + :maxdepth: 2 + + pydsc/pydsc + ---------------- pygccxml package ---------------- @@ -68,7 +73,14 @@ .. _`Boost.Python`: http://www.boost.org/libs/python/doc/index.html +Indices and tables +================== +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + + .. _`pyboost` : ./pyplusplus/examples/boost/boost.html .. _`boost.graph` : http://www.boost.org/libs/graph/doc/table_of_contents.html .. _`boost.date_time` : http://boost.org/doc/html/date_time.html Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2009-01-28 14:42:41 UTC (rev 1631) +++ sphinx/conf.py 2009-01-28 18:25:43 UTC (rev 1632) @@ -23,15 +23,18 @@ project_root = os.path.abspath('..') doc_project_root = os.path.abspath('.') - -for entry in ( 'pydsc', 'pygccxml', 'pyplusplus' ): - target = os.path.join( doc_project_root, entry ) - source = os.path.join( project_root, entry + '_dev', 'docs' ) +packages = ( 'pydsc', 'pygccxml', 'pyplusplus' ) +for pkg in packages: + target = os.path.join( doc_project_root, pkg ) + sys.path.append( os.path.join( project_root, pkg + '_dev' ) ) + source = os.path.join( project_root, pkg + '_dev', 'docs' ) if os.path.exists( target ): shutil.rmtree( target ) shutil.copytree( source, target, ignore=shutil.ignore_patterns( r'.svn', '*.pyc', 'apidocs', 'www_configuration.py' ) ) shutil.copy( os.path.join( project_root, 'index.rest' ), doc_project_root ) +os.environ['PYDSC'] = 'sphinx' + # General configuration # --------------------- This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rom...@us...> - 2009-02-12 23:02:53
|
Revision: 1675 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1675&view=rev Author: roman_yakovenko Date: 2009-02-12 22:00:53 +0000 (Thu, 12 Feb 2009) Log Message: ----------- sphinx Modified Paths: -------------- sphinx/conf.py Added Paths: ----------- sphinx/__templates_www/ sphinx/__templates_www/layout.html sphinx/__templates_www/left_ad_unit.html Added: sphinx/__templates_www/layout.html =================================================================== --- sphinx/__templates_www/layout.html (rev 0) +++ sphinx/__templates_www/layout.html 2009-02-12 22:00:53 UTC (rev 1675) @@ -0,0 +1,20 @@ +{% extends "!layout.html" %} + +{% set sidebarsearch = "left_ad_unit.html" %} + +{%- block sidebarsearch %} +{%- if pagename != "search" %} +<div id="searchbox" style="display: none"> +<h3>{{ _('Quick search') }}</h3> + <form class="search" action="{{ pathto('search') }}" method="get"> + <input type="text" name="q" size="18" /> + <input type="submit" value="{{ _('Go') }}" /> + <input type="hidden" name="check_keywords" value="yes" /> + <input type="hidden" name="area" value="default" /> + </form> +{% include "left_ad_unit.html" %} +</div> +<script type="text/javascript">$('#searchbox').show(0);</script> +{%- endif %} +{%- endblock %} + Added: sphinx/__templates_www/left_ad_unit.html =================================================================== --- sphinx/__templates_www/left_ad_unit.html (rev 0) +++ sphinx/__templates_www/left_ad_unit.html 2009-02-12 22:00:53 UTC (rev 1675) @@ -0,0 +1,11 @@ +<script type="text/javascript"><!-- +google_ad_client = "pub-0886572017808006"; +/* 200x200, created 2/12/09 */ +google_ad_slot = "3526901157"; +google_ad_width = 200; +google_ad_height = 200; +//--> +</script> +<script type="text/javascript" +src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> +</script> Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2009-02-12 09:15:05 UTC (rev 1674) +++ sphinx/conf.py 2009-02-12 22:00:53 UTC (rev 1675) @@ -16,6 +16,9 @@ import sys, os, shutil, atexit +#Questions +#1. how to read/add/modify command line arguments? + # If your extensions are in another directory, add it here. If the directory # is relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. @@ -79,7 +82,7 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx'] # Add any paths that contain templates here, relative to this directory. -templates_path = ['__templates'] +templates_path = ['__templates_www'] # The suffix of source filenames. source_suffix = '.rest' This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rom...@us...> - 2009-02-13 20:39:41
|
Revision: 1677 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1677&view=rev Author: roman_yakovenko Date: 2009-02-13 20:39:34 +0000 (Fri, 13 Feb 2009) Log Message: ----------- sphinx Modified Paths: -------------- sphinx/__templates_www/layout.html sphinx/conf.py Added Paths: ----------- sphinx/__templates_www/bottom_ad_unit.html Added: sphinx/__templates_www/bottom_ad_unit.html =================================================================== --- sphinx/__templates_www/bottom_ad_unit.html (rev 0) +++ sphinx/__templates_www/bottom_ad_unit.html 2009-02-13 20:39:34 UTC (rev 1677) @@ -0,0 +1,11 @@ +<script type="text/javascript"><!-- +google_ad_client = "pub-0886572017808006"; +/* 300x250, created 2/13/09 */ +google_ad_slot = "8069153601"; +google_ad_width = 300; +google_ad_height = 250; +//--> +</script> +<script type="text/javascript" +src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> +</script> Modified: sphinx/__templates_www/layout.html =================================================================== --- sphinx/__templates_www/layout.html 2009-02-13 09:27:20 UTC (rev 1676) +++ sphinx/__templates_www/layout.html 2009-02-13 20:39:34 UTC (rev 1677) @@ -1,20 +1,33 @@ {% extends "!layout.html" %} -{% set sidebarsearch = "left_ad_unit.html" %} - {%- block sidebarsearch %} -{%- if pagename != "search" %} -<div id="searchbox" style="display: none"> -<h3>{{ _('Quick search') }}</h3> - <form class="search" action="{{ pathto('search') }}" method="get"> - <input type="text" name="q" size="18" /> - <input type="submit" value="{{ _('Go') }}" /> - <input type="hidden" name="check_keywords" value="yes" /> - <input type="hidden" name="area" value="default" /> - </form> -{% include "left_ad_unit.html" %} -</div> -<script type="text/javascript">$('#searchbox').show(0);</script> -{%- endif %} + {%- if pagename != "search" %} + <div id="searchbox" style="display: none"> + <h3>{{ _('Quick search') }}</h3> + <form class="search" action="{{ pathto('search') }}" method="get"> + <input type="text" name="q" size="18" /> + <input type="submit" value="{{ _('Go') }}" /> + <input type="hidden" name="check_keywords" value="yes" /> + <input type="hidden" name="area" value="default" /> + </form> + {% include "left_ad_unit.html" %} + </div> + <script type="text/javascript">$('#searchbox').show(0);</script> + {%- endif %} {%- endblock %} +{%- block document %} + <div class="document"> + <div class="documentwrapper"> + {%- if not embedded %} + <div class="bodywrapper"> + {%- endif %} + <div class="body"> + {% block body %} {% endblock %} + {% include "bottom_ad_unit.html" %} + </div> + {%- if not embedded %} + </div> + {%- endif %} + </div> +{%- endblock %} Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2009-02-13 09:27:20 UTC (rev 1676) +++ sphinx/conf.py 2009-02-13 20:39:34 UTC (rev 1677) @@ -84,6 +84,7 @@ """<?xml version="1.0" encoding="UTF-8"?> <site base_url="http://www.language-binding.net/" store_into="%(path)s/sitemap.xml.gz" verbose="1"> <directory path="%(path)s" url="http://www.language-binding.net/" default_file="index.html" /> + <filter action="drop" type="regexp" pattern="/\.[^/]*" /> </site> """ % dict( path=os.path.join( doc_project_root, working_dir ) ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rom...@us...> - 2009-02-15 06:21:43
|
Revision: 1678 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1678&view=rev Author: roman_yakovenko Date: 2009-02-15 06:21:39 +0000 (Sun, 15 Feb 2009) Log Message: ----------- sphinx Modified Paths: -------------- index.rest pygccxml_dev/pygccxml/declarations/type_traits.py pygccxml_dev/pygccxml/parser/directory_cache.py pygccxml_dev/pygccxml/parser/source_reader.py pyplusplus_dev/pyplusplus/decl_wrappers/python_traits.py sphinx/__templates_www/layout.html sphinx/conf.py Modified: index.rest =================================================================== --- index.rest 2009-02-13 20:39:34 UTC (rev 1677) +++ index.rest 2009-02-15 06:21:39 UTC (rev 1678) @@ -1,6 +1,6 @@ -=========================== -C++ Python language binding -=========================== +============================== +C\\C++ Python language binding +============================== ---------------- pygccxml package Modified: pygccxml_dev/pygccxml/declarations/type_traits.py =================================================================== --- pygccxml_dev/pygccxml/declarations/type_traits.py 2009-02-13 20:39:34 UTC (rev 1677) +++ pygccxml_dev/pygccxml/declarations/type_traits.py 2009-02-15 06:21:39 UTC (rev 1678) @@ -254,9 +254,9 @@ return nake_type.base def remove_declarated( type_ ): - """removes type-declaration class-binder :class:`declarated_t` from the type_ + """removes type-declaration class-binder :class:`declarated_t` from the `type_` - If type_ is not :class:`declarated_t`, it will be returned as is + If `type_` is not :class:`declarated_t`, it will be returned as is """ type_ = remove_alias( type_ ) if isinstance( type_, cpptypes.declarated_t ): @@ -426,7 +426,7 @@ return decls def has_public_binary_operator( type_, operator_symbol ): - """returns True, if type_ has public binary operator, otherwise False""" + """returns True, if `type_` has public binary operator, otherwise False""" not_artificial = lambda decl: not decl.is_artificial type_ = remove_alias( type_ ) type_ = remove_cv( type_ ) Modified: pygccxml_dev/pygccxml/parser/directory_cache.py =================================================================== --- pygccxml_dev/pygccxml/parser/directory_cache.py 2009-02-13 20:39:34 UTC (rev 1677) +++ pygccxml_dev/pygccxml/parser/directory_cache.py 2009-02-15 06:21:39 UTC (rev 1678) @@ -460,7 +460,7 @@ del self.__id_lut[entry.filename] def is_file_modified(self, id_, signature): - """Check if the file referred to by id_ has been modified. + """Check if the file referred to by `id_` has been modified. """ entry = self.__entries.get(id_) if entry==None: @@ -479,7 +479,7 @@ return filesig!=signature def update_id_counter(self): - """Update the id_ counter so that it doesn't grow forever. + """Update the `id_` counter so that it doesn't grow forever. """ if len(self.__entries)==0: self.__next_id = 1 Modified: pygccxml_dev/pygccxml/parser/source_reader.py =================================================================== --- pygccxml_dev/pygccxml/parser/source_reader.py 2009-02-13 20:39:34 UTC (rev 1677) +++ pygccxml_dev/pygccxml/parser/source_reader.py 2009-02-15 06:21:39 UTC (rev 1678) @@ -57,7 +57,7 @@ to two classes: 1. `scanner_t` - this class scans the "XML" file, generated by GCC-XML and - creates `pygccxml`_ declarations and types classes. After the XML file has + creates :mod:`pygccxml` declarations and types classes. After the XML file has been processed declarations and type class instances keeps references to each other using GCC-XML generated id's. Modified: pyplusplus_dev/pyplusplus/decl_wrappers/python_traits.py =================================================================== --- pyplusplus_dev/pyplusplus/decl_wrappers/python_traits.py 2009-02-13 20:39:34 UTC (rev 1677) +++ pyplusplus_dev/pyplusplus/decl_wrappers/python_traits.py 2009-02-15 06:21:39 UTC (rev 1678) @@ -8,7 +8,7 @@ from pygccxml import declarations def is_immutable( type_ ): - """returns True, if type_ represents Python immutable type""" + """returns True, if `type_` represents Python immutable type""" return declarations.is_fundamental( type_ ) \ or declarations.is_enum( type_ ) \ or declarations.is_std_string( type_ ) \ Modified: sphinx/__templates_www/layout.html =================================================================== --- sphinx/__templates_www/layout.html 2009-02-13 20:39:34 UTC (rev 1677) +++ sphinx/__templates_www/layout.html 2009-02-15 06:21:39 UTC (rev 1678) @@ -1,33 +1,37 @@ -{% extends "!layout.html" %} - -{%- block sidebarsearch %} - {%- if pagename != "search" %} - <div id="searchbox" style="display: none"> - <h3>{{ _('Quick search') }}</h3> - <form class="search" action="{{ pathto('search') }}" method="get"> - <input type="text" name="q" size="18" /> - <input type="submit" value="{{ _('Go') }}" /> - <input type="hidden" name="check_keywords" value="yes" /> - <input type="hidden" name="area" value="default" /> - </form> - {% include "left_ad_unit.html" %} - </div> - <script type="text/javascript">$('#searchbox').show(0);</script> - {%- endif %} -{%- endblock %} - -{%- block document %} - <div class="document"> - <div class="documentwrapper"> - {%- if not embedded %} - <div class="bodywrapper"> - {%- endif %} - <div class="body"> - {% block body %} {% endblock %} - {% include "bottom_ad_unit.html" %} - </div> - {%- if not embedded %} - </div> - {%- endif %} - </div> -{%- endblock %} +{% extends "!layout.html" %} + +{%- block rootrellink %} +<li><a href="{{ pathto(master_doc) }}">{{"Language Binding Project"}}</a>{{ reldelim1 }}</li> +{%- endblock %} + +{%- block sidebarsearch %} + {%- if pagename != "search" %} + <div id="searchbox" style="display: none"> + <h3>{{ _('Quick search') }}</h3> + <form class="search" action="{{ pathto('search') }}" method="get"> + <input type="text" name="q" size="18" /> + <input type="submit" value="{{ _('Go') }}" /> + <input type="hidden" name="check_keywords" value="yes" /> + <input type="hidden" name="area" value="default" /> + </form> + {% include "left_ad_unit.html" %} + </div> + <script type="text/javascript">$('#searchbox').show(0);</script> + {%- endif %} +{%- endblock %} + +{%- block document %} + <div class="document"> + <div class="documentwrapper"> + {%- if not embedded %} + <div class="bodywrapper"> + {%- endif %} + <div class="body"> + {% block body %} {% endblock %} + {% include "bottom_ad_unit.html" %} + </div> + {%- if not embedded %} + </div> + {%- endif %} + </div> +{%- endblock %} Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2009-02-13 20:39:34 UTC (rev 1677) +++ sphinx/conf.py 2009-02-15 06:21:39 UTC (rev 1678) @@ -14,10 +14,12 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os, shutil, atexit +import sys, os, shutil, atexit, getopt -#Questions -#1. how to read/add/modify command line arguments? +opts, args = getopt.getopt( sys.argv[1:], 'ab:d:c:CD:A:NEqQP') +print 'opts: ', opts +print 'args: ', args +outdir = args[1] # If your extensions are in another directory, add it here. If the directory # is relative to the documentation root, use os.path.abspath to make it @@ -27,11 +29,7 @@ project_root = os.path.abspath('..') doc_project_root = os.path.abspath('.') packages = ( 'pydsc', 'pygccxml', 'pyplusplus' ) -html_documentation_dir_name = 'html_documentation' -if html_documentation_dir_name not in str( sys.argv ): - raise RuntimeError( "\n\n\nThe documentation directory name should be: '%s'\n\n\n" % html_documentation_dir_name ) - has_true_links = 'linux' in sys.platform for pkg in packages: target = os.path.join( doc_project_root, pkg ) @@ -69,17 +67,19 @@ @atexit.register def copy_indexing_suite_v2_files(): source_dir = os.path.join( project_root, 'pyplusplus_dev', 'docs', 'documentation', 'indexing_suite_v2_files' ) - target_dir = os.path.join( doc_project_root, html_documentation_dir_name, 'pyplusplus', 'documentation', 'indexing_suite_v2_files' ) + target_dir = os.path.join( doc_project_root, outdir, 'pyplusplus', 'documentation', 'indexing_suite_v2_files' ) if os.path.exists(target_dir): shutil.rmtree(target_dir) shutil.copytree( source_dir, target_dir, ignore=shutil.ignore_patterns( r'.svn' ) ) @atexit.register def generate_sitemap(): + if 'www' not in outdir: + return try: import sitemap_gen - working_dir = os.path.join( doc_project_root, html_documentation_dir_name ) + working_dir = os.path.join( doc_project_root, outdir ) config = \ """<?xml version="1.0" encoding="UTF-8"?> <site base_url="http://www.language-binding.net/" store_into="%(path)s/sitemap.xml.gz" verbose="1"> @@ -112,7 +112,9 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx'] # Add any paths that contain templates here, relative to this directory. -templates_path = ['__templates_www'] +templates_path = ['__templates'] +if 'www' in outdir: + templates_path = ['__templates_www'] # The suffix of source filenames. source_suffix = '.rest' @@ -225,7 +227,7 @@ #html_split_index = False # If true, the reST sources are included in the HTML build as _sources/<name>. -html_copy_source = False +html_copy_source = True # If true, an OpenSearch description file will be output, and all pages will # contain a <link> tag referring to it. The value of this option must be the @@ -275,7 +277,7 @@ #If true, keep warnings as “system message” paragraphs in the built documents. #Regardless of this setting, warnings are always written to the standard error #stream when sphinx-build is run. -keep_warnings = True +keep_warnings = False # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'http://docs.python.org/dev': None} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rom...@us...> - 2009-02-15 07:10:28
|
Revision: 1679 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1679&view=rev Author: roman_yakovenko Date: 2009-02-15 07:10:23 +0000 (Sun, 15 Feb 2009) Log Message: ----------- sphinx Modified Paths: -------------- pygccxml_dev/docs/history/history.rest pyplusplus_dev/docs/history/history.rest sphinx/conf.py Modified: pygccxml_dev/docs/history/history.rest =================================================================== --- pygccxml_dev/docs/history/history.rest 2009-02-15 06:21:39 UTC (rev 1678) +++ pygccxml_dev/docs/history/history.rest 2009-02-15 07:10:23 UTC (rev 1679) @@ -34,6 +34,8 @@ 3. Ability to load `GCC-XML`_ configuration from ``.ini`` like file was added +4. From now on, :doc:`pygccxml <../pygccxml>` will use `Sphinx <http://sphinx.pocoo.org/>`_ + for all documentation. ----------- Version 1.0 Modified: pyplusplus_dev/docs/history/history.rest =================================================================== --- pyplusplus_dev/docs/history/history.rest 2009-02-15 06:21:39 UTC (rev 1678) +++ pyplusplus_dev/docs/history/history.rest 2009-02-15 07:10:23 UTC (rev 1679) @@ -33,6 +33,15 @@ 2. Few bugs were fixed for 64Bit platform. Many thanks to Carsten. +3. `ctypes`_ backend was introduced - :doc:`Py++ <../pyplusplus>` is able to generate + Python code, which uses `ctypes`_ package to call functions in DLLs or shared libraries. + +4. From now on, :doc:`Py++ <../pyplusplus>` will use `Sphinx <http://sphinx.pocoo.org/>`_ + for all documentation. + + +.. _`ctypes` : http://docs.python.org/library/ctypes.html + ----------- Version 1.0 ----------- @@ -185,7 +194,7 @@ of ``std::vector< int, std::allocator< int > >``, in many cases :doc:`Py++ <../pyplusplus>` will generate ``std::vector< int >``. -5. :doc:`create_with_signature <../documentation/functions/overloading>` algorithm was improved. +5. :doc:`create_with_signature <../documentation/functions/overloading>` algorithm was improved. :doc:`Py++ <../pyplusplus>` will generate correct code in one more use case. 6. Added ability to exclude declarations from being exposed, if they will cause @@ -232,7 +241,7 @@ .. line-separator -5. :doc:`input_c_buffer <../documentation/functions/transformation/input_c_buffer>` - new functions +5. :doc:`input_c_buffer <../documentation/functions/transformation/input_c_buffer>` - new functions transformation, which allows to pass a Python sequence to function, instead of pair of arguments: pointer to buffer and size. 6. Added ability to control generated "include" directives. Now you can ask :doc:`Py++ <../pyplusplus>` @@ -270,7 +279,7 @@ 3. Added new algorithm, which controls the registration order of the functions. See :doc:`registration order document <../documentation/functions/registration_order>` -4. New "Py++" defined :doc:`return_pointee_value <../documentation/functions/call_policies/return_pointee_value>` +4. New "Py++" defined :doc:`return_pointee_value <../documentation/functions/call_policies/return_pointee_value>` call policy was introduced. 5. Support for opaque types was added. Read more about this feature `here`__. Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2009-02-15 06:21:39 UTC (rev 1678) +++ sphinx/conf.py 2009-02-15 07:10:23 UTC (rev 1679) @@ -84,7 +84,8 @@ """<?xml version="1.0" encoding="UTF-8"?> <site base_url="http://www.language-binding.net/" store_into="%(path)s/sitemap.xml.gz" verbose="1"> <directory path="%(path)s" url="http://www.language-binding.net/" default_file="index.html" /> - <filter action="drop" type="regexp" pattern="/\.[^/]*" /> + <filter action="drop" type="regexp" pattern="/\.[^/]*" /> + <filter action="drop" type="regexp" pattern="/_[^/]*" /> </site> """ % dict( path=os.path.join( doc_project_root, working_dir ) ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rom...@us...> - 2009-02-16 11:11:42
|
Revision: 1684 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1684&view=rev Author: roman_yakovenko Date: 2009-02-16 11:11:35 +0000 (Mon, 16 Feb 2009) Log Message: ----------- sphinx Modified Paths: -------------- sphinx/__templates_www/layout.html sphinx/conf.py Modified: sphinx/__templates_www/layout.html =================================================================== --- sphinx/__templates_www/layout.html 2009-02-16 08:30:56 UTC (rev 1683) +++ sphinx/__templates_www/layout.html 2009-02-16 11:11:35 UTC (rev 1684) @@ -23,15 +23,18 @@ {%- block document %} <div class="document"> <div class="documentwrapper"> - {%- if not embedded %} + {%- if not embedded %}{% if not theme_nosidebar|tobool %} <div class="bodywrapper"> - {%- endif %} + {%- endif %}{% endif %} <div class="body"> {% block body %} {% endblock %} {% include "bottom_ad_unit.html" %} </div> - {%- if not embedded %} + {%- if not embedded %}{% if not theme_nosidebar|tobool %} </div> - {%- endif %} + {%- endif %}{% endif %} </div> + {%- block sidebar2 %}{{ sidebar() }}{% endblock %} + <div class="clearer"></div> + </div> {%- endblock %} Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2009-02-16 08:30:56 UTC (rev 1683) +++ sphinx/conf.py 2009-02-16 11:11:35 UTC (rev 1684) @@ -14,7 +14,7 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os, shutil, atexit, getopt +import sys, os, shutil, getopt opts, args = getopt.getopt( sys.argv[1:], 'ab:d:c:CD:A:NEqQP') print 'opts: ', opts @@ -64,18 +64,22 @@ except: pass #it is possible that pyenchant is not installed -...@at...gister -def copy_indexing_suite_v2_files(): +def copy_indexing_suite_v2_files(app, exception): + if exception: + print 'Indexing suite V2 copying was skipped - there were errors during the build process' + return source_dir = os.path.join( project_root, 'pyplusplus_dev', 'docs', 'documentation', 'indexing_suite_v2_files' ) target_dir = os.path.join( doc_project_root, outdir, 'pyplusplus', 'documentation', 'indexing_suite_v2_files' ) if os.path.exists(target_dir): shutil.rmtree(target_dir) shutil.copytree( source_dir, target_dir, ignore=shutil.ignore_patterns( r'.svn' ) ) -...@at...gister -def generate_sitemap(): +def generate_sitemap(app, exception): if 'www' not in outdir: return + if exception: + print 'SITEMAP generation was skipped - there were errors during the build process' + return try: import sitemap_gen @@ -105,6 +109,10 @@ print "ERROR(SITEMAP): sitemap file was not generated - ", str(error) +def setup(app): + app.connect('build-finished', copy_indexing_suite_v2_files) + app.connect('build-finished', generate_sitemap) + # General configuration # --------------------- This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rom...@us...> - 2009-05-13 10:24:06
|
Revision: 1726 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1726&view=rev Author: roman_yakovenko Date: 2009-05-13 10:23:46 +0000 (Wed, 13 May 2009) Log Message: ----------- updating docs Modified Paths: -------------- sphinx/conf.py Added Paths: ----------- sphinx/readme.txt Removed Paths: ------------- sphinx/check_links.bat Property Changed: ---------------- sphinx/ Property changes on: sphinx ___________________________________________________________________ Modified: svn:ignore - __build pydsc pygccxml pyplusplus index.rest html_documentation + __build pydsc pygccxml pyplusplus index.rest html_documentation www Deleted: sphinx/check_links.bat =================================================================== --- sphinx/check_links.bat 2009-05-13 10:22:27 UTC (rev 1725) +++ sphinx/check_links.bat 2009-05-13 10:23:46 UTC (rev 1726) @@ -1,2 +0,0 @@ -E:\Python25\Scripts\linkchecker.bat --no-warnings .\__build\index.html - Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2009-05-13 10:22:27 UTC (rev 1725) +++ sphinx/conf.py 2009-05-13 10:23:46 UTC (rev 1726) @@ -54,11 +54,11 @@ shutil.copy( os.path.join( project_root, 'index.rest' ), doc_project_root ) try: - import pydsc + #~ import pydsc #report errors related to the project only - pydsc.include_paths( project_root ) - pydsc.ignore_dictionary( 'ignore_dictionary.txt' ) - pydsc.set_text_preprocessor( pydsc.sphinx_preprocessor ) + #~ pydsc.include_paths( project_root ) + #~ pydsc.ignore_dictionary( 'ignore_dictionary.txt' ) + #~ pydsc.set_text_preprocessor( pydsc.sphinx_preprocessor ) import pygccxml import pyplusplus except: @@ -118,8 +118,7 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx'] - +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage'] # Add any paths that contain templates here, relative to this directory. templates_path = ['__templates'] if 'www' in outdir: @@ -204,7 +203,7 @@ # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +html_favicon = 'favicon.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, Added: sphinx/readme.txt =================================================================== --- sphinx/readme.txt (rev 0) +++ sphinx/readme.txt 2009-05-13 10:23:46 UTC (rev 1726) @@ -0,0 +1,7 @@ +language-binding web site: + + sphinx-build -b [changes|linkcheck|doctest] . www + +ads free doumentation: + + sphinx-build . docs This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rom...@us...> - 2010-04-06 04:21:54
|
Revision: 1835 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1835&view=rev Author: roman_yakovenko Date: 2010-04-06 04:21:48 +0000 (Tue, 06 Apr 2010) Log Message: ----------- adding sitemap generator Modified Paths: -------------- sphinx/conf.py Added Paths: ----------- sphinx/sitemap_gen.py Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2010-04-05 09:08:06 UTC (rev 1834) +++ sphinx/conf.py 2010-04-06 04:21:48 UTC (rev 1835) @@ -30,6 +30,8 @@ doc_project_root = os.path.abspath('.') packages = ( 'pydsc', 'pygccxml', 'pyplusplus' ) +sys.path.append( doc_project_root ) + has_true_links = 'linux' in sys.platform for pkg in packages: target = os.path.join( doc_project_root, pkg ) Added: sphinx/sitemap_gen.py =================================================================== --- sphinx/sitemap_gen.py (rev 0) +++ sphinx/sitemap_gen.py 2010-04-06 04:21:48 UTC (rev 1835) @@ -0,0 +1,2205 @@ +#!/usr/bin/python +# +# Copyright (c) 2004, 2005 Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of Google nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# +# The sitemap_gen.py script is written in Python 2.2 and released to +# the open source community for continuous improvements under the BSD +# 2.0 new license, which can be found at: +# +# http://www.opensource.org/licenses/bsd-license.php +# + +__usage__ = \ +"""A simple script to automatically produce sitemaps for a webserver, +in the Google Sitemap Protocol (GSP). + +Usage: python sitemap_gen.py --config=config.xml [--help] [--testing] + --config=config.xml, specifies config file location + --help, displays usage message + --testing, specified when user is experimenting +""" + +# Please be careful that all syntax used in this file can be parsed on +# Python 1.5 -- this version check is not evaluated until after the +# entire file has been parsed. +import sys +if sys.hexversion < 0x02020000: + print 'This script requires Python 2.2 or later.' + print 'Currently run with version: %s' % sys.version + sys.exit(1) + +import fnmatch +import glob +import gzip +import md5 +import os +import re +import stat +import time +import types +import urllib +import urlparse +import xml.sax + +# True and False were introduced in Python2.2.2 +try: + testTrue=True + del testTrue +except NameError: + True=1 + False=0 + +# Text encodings +ENC_ASCII = 'ASCII' +ENC_UTF8 = 'UTF-8' +ENC_IDNA = 'IDNA' +ENC_ASCII_LIST = ['ASCII', 'US-ASCII', 'US', 'IBM367', 'CP367', 'ISO646-US' + 'ISO_646.IRV:1991', 'ISO-IR-6', 'ANSI_X3.4-1968', + 'ANSI_X3.4-1986', 'CPASCII' ] +ENC_DEFAULT_LIST = ['ISO-8859-1', 'ISO-8859-2', 'ISO-8859-5'] + +# Maximum number of urls in each sitemap, before next Sitemap is created +MAXURLS_PER_SITEMAP = 50000 + +# Suffix on a Sitemap index file +SITEINDEX_SUFFIX = '_index.xml' + +# Regular expressions tried for extracting URLs from access logs. +ACCESSLOG_CLF_PATTERN = re.compile( + r'.+\s+"([^\s]+)\s+([^\s]+)\s+HTTP/\d+\.\d+"\s+200\s+.*' + ) + +# Match patterns for lastmod attributes +LASTMOD_PATTERNS = map(re.compile, [ + r'^\d\d\d\d$', + r'^\d\d\d\d-\d\d$', + r'^\d\d\d\d-\d\d-\d\d$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\dZ$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d[+-]\d\d:\d\d$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?Z$', + r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?[+-]\d\d:\d\d$', + ]) + +# Match patterns for changefreq attributes +CHANGEFREQ_PATTERNS = [ + 'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never' + ] + +# XML formats +SITEINDEX_HEADER = \ + '<?xml version="1.0" encoding="UTF-8"?>\n' \ + '<sitemapindex\n' \ + ' xmlns="http://www.google.com/schemas/sitemap/0.84"\n' \ + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ + ' xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84\n' \ + ' http://www.google.com/schemas/sitemap/0.84/' \ + 'siteindex.xsd">\n' +SITEINDEX_FOOTER = '</sitemapindex>\n' +SITEINDEX_ENTRY = \ + ' <sitemap>\n' \ + ' <loc>%(loc)s</loc>\n' \ + ' <lastmod>%(lastmod)s</lastmod>\n' \ + ' </sitemap>\n' +SITEMAP_HEADER = \ + '<?xml version="1.0" encoding="UTF-8"?>\n' \ + '<urlset\n' \ + ' xmlns="http://www.google.com/schemas/sitemap/0.84"\n' \ + ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ + ' xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84\n' \ + ' http://www.google.com/schemas/sitemap/0.84/' \ + 'sitemap.xsd">\n' +SITEMAP_FOOTER = '</urlset>\n' +SITEURL_XML_PREFIX = ' <url>\n' +SITEURL_XML_SUFFIX = ' </url>\n' + +# Search engines to notify with the updated sitemaps +# +# This list is very non-obvious in what's going on. Here's the gist: +# Each item in the list is a 6-tuple of items. The first 5 are "almost" +# the same as the input arguments to urlparse.urlunsplit(): +# 0 - schema +# 1 - netloc +# 2 - path +# 3 - query <-- EXCEPTION: specify a query map rather than a string +# 4 - fragment +# Additionally, add item 5: +# 5 - query attribute that should be set to the new Sitemap URL +# Clear as mud, I know. +NOTIFICATION_SITES = [ + ('http', 'www.google.com', 'webmasters/sitemaps/ping', {}, '', 'sitemap') + ] + + +class Error(Exception): + """ + Base exception class. In this module we tend not to use our own exception + types for very much, but they come in very handy on XML parsing with SAX. + """ + pass +#end class Error + + +class SchemaError(Error): + """Failure to process an XML file according to the schema we know.""" + pass +#end class SchemeError + + +class Encoder: + """ + Manages wide-character/narrow-character conversions for just about all + text that flows into or out of the script. + + You should always use this class for string coercion, as opposed to + letting Python handle coercions automatically. Reason: Python + usually assumes ASCII (7-bit) as a default narrow character encoding, + which is not the kind of data we generally deal with. + + General high-level methodologies used in sitemap_gen: + + [PATHS] + File system paths may be wide or narrow, depending on platform. + This works fine, just be aware of it and be very careful to not + mix them. That is, if you have to pass several file path arguments + into a library call, make sure they are all narrow or all wide. + This class has MaybeNarrowPath() which should be called on every + file system path you deal with. + + [URLS] + URL locations are stored in Narrow form, already escaped. This has the + benefit of keeping escaping and encoding as close as possible to the format + we read them in. The downside is we may end up with URLs that have + intermingled encodings -- the root path may be encoded in one way + while the filename is encoded in another. This is obviously wrong, but + it should hopefully be an issue hit by very few users. The workaround + from the user level (assuming they notice) is to specify a default_encoding + parameter in their config file. + + [OTHER] + Other text, such as attributes of the URL class, configuration options, + etc, are generally stored in Unicode for simplicity. + """ + + def __init__(self): + self._user = None # User-specified default encoding + self._learned = [] # Learned default encodings + self._widefiles = False # File system can be wide + + # Can the file system be Unicode? + try: + self._widefiles = os.path.supports_unicode_filenames + except AttributeError: + try: + self._widefiles = sys.getwindowsversion() == os.VER_PLATFORM_WIN32_NT + except AttributeError: + pass + + # Try to guess a working default + try: + encoding = sys.getfilesystemencoding() + if encoding and not (encoding.upper() in ENC_ASCII_LIST): + self._learned = [ encoding ] + except AttributeError: + pass + + if not self._learned: + encoding = sys.getdefaultencoding() + if encoding and not (encoding.upper() in ENC_ASCII_LIST): + self._learned = [ encoding ] + + # If we had no guesses, start with some European defaults + if not self._learned: + self._learned = ENC_DEFAULT_LIST + #end def __init__ + + def SetUserEncoding(self, encoding): + self._user = encoding + #end def SetUserEncoding + + def NarrowText(self, text, encoding): + """ Narrow a piece of arbitrary text """ + if type(text) != types.UnicodeType: + return text + + # Try the passed in preference + if encoding: + try: + result = text.encode(encoding) + if not encoding in self._learned: + self._learned.append(encoding) + return result + except UnicodeError: + pass + except LookupError: + output.Warn('Unknown encoding: %s' % encoding) + + # Try the user preference + if self._user: + try: + return text.encode(self._user) + except UnicodeError: + pass + except LookupError: + temp = self._user + self._user = None + output.Warn('Unknown default_encoding: %s' % temp) + + # Look through learned defaults, knock any failing ones out of the list + while self._learned: + try: + return text.encode(self._learned[0]) + except: + del self._learned[0] + + # When all other defaults are exhausted, use UTF-8 + try: + return text.encode(ENC_UTF8) + except UnicodeError: + pass + + # Something is seriously wrong if we get to here + return text.encode(ENC_ASCII, 'ignore') + #end def NarrowText + + def MaybeNarrowPath(self, text): + """ Paths may be allowed to stay wide """ + if self._widefiles: + return text + return self.NarrowText(text, None) + #end def MaybeNarrowPath + + def WidenText(self, text, encoding): + """ Widen a piece of arbitrary text """ + if type(text) != types.StringType: + return text + + # Try the passed in preference + if encoding: + try: + result = unicode(text, encoding) + if not encoding in self._learned: + self._learned.append(encoding) + return result + except UnicodeError: + pass + except LookupError: + output.Warn('Unknown encoding: %s' % encoding) + + # Try the user preference + if self._user: + try: + return unicode(text, self._user) + except UnicodeError: + pass + except LookupError: + temp = self._user + self._user = None + output.Warn('Unknown default_encoding: %s' % temp) + + # Look through learned defaults, knock any failing ones out of the list + while self._learned: + try: + return unicode(text, self._learned[0]) + except: + del self._learned[0] + + # When all other defaults are exhausted, use UTF-8 + try: + return unicode(text, ENC_UTF8) + except UnicodeError: + pass + + # Getting here means it wasn't UTF-8 and we had no working default. + # We really don't have anything "right" we can do anymore. + output.Warn('Unrecognized encoding in text: %s' % text) + if not self._user: + output.Warn('You may need to set a default_encoding in your ' + 'configuration file.') + return text.decode(ENC_ASCII, 'ignore') + #end def WidenText +#end class Encoder +encoder = Encoder() + + +class Output: + """ + Exposes logging functionality, and tracks how many errors + we have thus output. + + Logging levels should be used as thus: + Fatal -- extremely sparingly + Error -- config errors, entire blocks of user 'intention' lost + Warn -- individual URLs lost + Log(,0) -- Un-suppressable text that's not an error + Log(,1) -- touched files, major actions + Log(,2) -- parsing notes, filtered or duplicated URLs + Log(,3) -- each accepted URL + """ + + def __init__(self): + self.num_errors = 0 # Count of errors + self.num_warns = 0 # Count of warnings + + self._errors_shown = {} # Shown errors + self._warns_shown = {} # Shown warnings + self._verbose = 0 # Level of verbosity + #end def __init__ + + def Log(self, text, level): + """ Output a blurb of diagnostic text, if the verbose level allows it """ + if text: + text = encoder.NarrowText(text, None) + if self._verbose >= level: + print text + #end def Log + + def Warn(self, text): + """ Output and count a warning. Suppress duplicate warnings. """ + if text: + text = encoder.NarrowText(text, None) + hash = md5.new(text).digest() + if not self._warns_shown.has_key(hash): + self._warns_shown[hash] = 1 + print '[WARNING] ' + text + else: + self.Log('(suppressed) [WARNING] ' + text, 3) + self.num_warns = self.num_warns + 1 + #end def Warn + + def Error(self, text): + """ Output and count an error. Suppress duplicate errors. """ + if text: + text = encoder.NarrowText(text, None) + hash = md5.new(text).digest() + if not self._errors_shown.has_key(hash): + self._errors_shown[hash] = 1 + print '[ERROR] ' + text + else: + self.Log('(suppressed) [ERROR] ' + text, 3) + self.num_errors = self.num_errors + 1 + #end def Error + + def Fatal(self, text): + """ Output an error and terminate the program. """ + if text: + text = encoder.NarrowText(text, None) + print '[FATAL] ' + text + else: + print 'Fatal error.' + sys.exit(1) + #end def Fatal + + def SetVerbose(self, level): + """ Sets the verbose level. """ + try: + if type(level) != types.IntType: + level = int(level) + if (level >= 0) and (level <= 3): + self._verbose = level + return + except ValueError: + pass + self.Error('Verbose level (%s) must be between 0 and 3 inclusive.' % level) + #end def SetVerbose +#end class Output +output = Output() + + +class URL(object): + """ URL is a smart structure grouping together the properties we + care about for a single web reference. """ + __slots__ = 'loc', 'lastmod', 'changefreq', 'priority' + + def __init__(self): + self.loc = None # URL -- in Narrow characters + self.lastmod = None # ISO8601 timestamp of last modify + self.changefreq = None # Text term for update frequency + self.priority = None # Float between 0 and 1 (inc) + #end def __init__ + + def __cmp__(self, other): + if self.loc < other.loc: + return -1 + if self.loc > other.loc: + return 1 + return 0 + #end def __cmp__ + + def TrySetAttribute(self, attribute, value): + """ Attempt to set the attribute to the value, with a pretty try + block around it. """ + if attribute == 'loc': + self.loc = self.Canonicalize(value) + else: + try: + setattr(self, attribute, value) + except AttributeError: + output.Warn('Unknown URL attribute: %s' % attribute) + #end def TrySetAttribute + + def IsAbsolute(loc): + """ Decide if the URL is absolute or not """ + if not loc: + return False + narrow = encoder.NarrowText(loc, None) + (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) + if (not scheme) or (not netloc): + return False + return True + #end def IsAbsolute + IsAbsolute = staticmethod(IsAbsolute) + + def Canonicalize(loc): + """ Do encoding and canonicalization on a URL string """ + if not loc: + return loc + + # Let the encoder try to narrow it + narrow = encoder.NarrowText(loc, None) + + # Escape components individually + (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) + unr = '-._~' + sub = '!$&\'()*+,;=' + netloc = urllib.quote(netloc, unr + sub + '%:@/[]') + path = urllib.quote(path, unr + sub + '%:@/') + query = urllib.quote(query, unr + sub + '%:@/?') + frag = urllib.quote(frag, unr + sub + '%:@/?') + + # Try built-in IDNA encoding on the netloc + try: + (ignore, widenetloc, ignore, ignore, ignore) = urlparse.urlsplit(loc) + for c in widenetloc: + if c >= unichr(128): + netloc = widenetloc.encode(ENC_IDNA) + netloc = urllib.quote(netloc, unr + sub + '%:@/[]') + break + except UnicodeError: + # urlsplit must have failed, based on implementation differences in the + # library. There is not much we can do here, except ignore it. + pass + except LookupError: + output.Warn('An International Domain Name (IDN) is being used, but this ' + 'version of Python does not have support for IDNA encoding. ' + ' (IDNA support was introduced in Python 2.3) The encoding ' + 'we have used instead is wrong and will probably not yield ' + 'valid URLs.') + bad_netloc = False + if '%' in netloc: + bad_netloc = True + + # Put it all back together + narrow = urlparse.urlunsplit((scheme, netloc, path, query, frag)) + + # I let '%' through. Fix any that aren't pre-existing escapes. + HEXDIG = '0123456789abcdefABCDEF' + list = narrow.split('%') + narrow = list[0] + del list[0] + for item in list: + if (len(item) >= 2) and (item[0] in HEXDIG) and (item[1] in HEXDIG): + narrow = narrow + '%' + item + else: + narrow = narrow + '%25' + item + + # Issue a warning if this is a bad URL + if bad_netloc: + output.Warn('Invalid characters in the host or domain portion of a URL: ' + + narrow) + + return narrow + #end def Canonicalize + Canonicalize = staticmethod(Canonicalize) + + def Validate(self, base_url, allow_fragment): + """ Verify the data in this URL is well-formed, and override if not. """ + assert type(base_url) == types.StringType + + # Test (and normalize) the ref + if not self.loc: + output.Warn('Empty URL') + return False + if allow_fragment: + self.loc = urlparse.urljoin(base_url, self.loc) + if not self.loc.startswith(base_url): + output.Warn('Discarded URL for not starting with the base_url: %s' % + self.loc) + self.loc = None + return False + + # Test the lastmod + if self.lastmod: + match = False + self.lastmod = self.lastmod.upper() + for pattern in LASTMOD_PATTERNS: + match = pattern.match(self.lastmod) + if match: + break + if not match: + output.Warn('Lastmod "%s" does not appear to be in ISO8601 format on ' + 'URL: %s' % (self.lastmod, self.loc)) + self.lastmod = None + + # Test the changefreq + if self.changefreq: + match = False + self.changefreq = self.changefreq.lower() + for pattern in CHANGEFREQ_PATTERNS: + if self.changefreq == pattern: + match = True + break + if not match: + output.Warn('Changefreq "%s" is not a valid change frequency on URL ' + ': %s' % (self.changefreq, self.loc)) + self.changefreq = None + + # Test the priority + if self.priority: + priority = -1.0 + try: + priority = float(self.priority) + except ValueError: + pass + if (priority < 0.0) or (priority > 1.0): + output.Warn('Priority "%s" is not a number between 0 and 1 inclusive ' + 'on URL: %s' % (self.priority, self.loc)) + self.priority = None + + return True + #end def Validate + + def MakeHash(self): + """ Provides a uniform way of hashing URLs """ + if not self.loc: + return None + if self.loc.endswith('/'): + return md5.new(self.loc[:-1]).digest() + return md5.new(self.loc).digest() + #end def MakeHash + + def Log(self, prefix='URL', level=3): + """ Dump the contents, empty or not, to the log. """ + out = prefix + ':' + + for attribute in self.__slots__: + value = getattr(self, attribute) + if not value: + value = '' + out = out + (' %s=[%s]' % (attribute, value)) + + output.Log('%s' % encoder.NarrowText(out, None), level) + #end def Log + + def WriteXML(self, file): + """ Dump non-empty contents to the output file, in XML format. """ + if not self.loc: + return + out = SITEURL_XML_PREFIX + + for attribute in self.__slots__: + value = getattr(self, attribute) + if value: + if type(value) == types.UnicodeType: + value = encoder.NarrowText(value, None) + elif type(value) != types.StringType: + value = str(value) + value = xml.sax.saxutils.escape(value) + out = out + (' <%s>%s</%s>\n' % (attribute, value, attribute)) + + out = out + SITEURL_XML_SUFFIX + file.write(out) + #end def WriteXML +#end class URL + + +class Filter: + """ + A filter on the stream of URLs we find. A filter is, in essence, + a wildcard applied to the stream. You can think of this as an + operator that returns a tri-state when given a URL: + + True -- this URL is to be included in the sitemap + None -- this URL is undecided + False -- this URL is to be dropped from the sitemap + """ + + def __init__(self, attributes): + self._wildcard = None # Pattern for wildcard match + self._regexp = None # Pattern for regexp match + self._pass = False # "Drop" filter vs. "Pass" filter + + if not ValidateAttributes('FILTER', attributes, + ('pattern', 'type', 'action')): + return + + # Check error count on the way in + num_errors = output.num_errors + + # Fetch the attributes + pattern = attributes.get('pattern') + type = attributes.get('type', 'wildcard') + action = attributes.get('action', 'drop') + if type: + type = type.lower() + if action: + action = action.lower() + + # Verify the attributes + if not pattern: + output.Error('On a filter you must specify a "pattern" to match') + elif (not type) or ((type != 'wildcard') and (type != 'regexp')): + output.Error('On a filter you must specify either \'type="wildcard"\' ' + 'or \'type="regexp"\'') + elif (action != 'pass') and (action != 'drop'): + output.Error('If you specify a filter action, it must be either ' + '\'action="pass"\' or \'action="drop"\'') + + # Set the rule + if action == 'drop': + self._pass = False + elif action == 'pass': + self._pass = True + + if type == 'wildcard': + self._wildcard = pattern + elif type == 'regexp': + try: + self._regexp = re.compile(pattern) + except re.error: + output.Error('Bad regular expression: %s' % pattern) + + # Log the final results iff we didn't add any errors + if num_errors == output.num_errors: + output.Log('Filter: %s any URL that matches %s "%s"' % + (action, type, pattern), 2) + #end def __init__ + + def Apply(self, url): + """ Process the URL, as above. """ + if (not url) or (not url.loc): + return None + + if self._wildcard: + if fnmatch.fnmatchcase(url.loc, self._wildcard): + return self._pass + return None + + if self._regexp: + if self._regexp.search(url.loc): + return self._pass + return None + + assert False # unreachable + #end def Apply +#end class Filter + + +class InputURL: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a single URL, manually specified in the config file. + """ + + def __init__(self, attributes): + self._url = None # The lonely URL + + if not ValidateAttributes('URL', attributes, + ('href', 'lastmod', 'changefreq', 'priority')): + return + + url = URL() + for attr in attributes.keys(): + if attr == 'href': + url.TrySetAttribute('loc', attributes[attr]) + else: + url.TrySetAttribute(attr, attributes[attr]) + + if not url.loc: + output.Error('Url entries must have an href attribute.') + return + + self._url = url + output.Log('Input: From URL "%s"' % self._url.loc, 2) + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + if self._url: + consumer(self._url, True) + #end def ProduceURLs +#end class InputURL + + +class InputURLList: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a text file with a list of URLs + """ + + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + + if not ValidateAttributes('URLLIST', attributes, ('path', 'encoding')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From URLLIST "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None + else: + output.Error('Urllist entries must have a "path" attribute.') + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'URLLIST') + if not file: + return + + # Iterate lines + linenum = 0 + for line in file.readlines(): + linenum = linenum + 1 + + # Strip comments and empty lines + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + if (not line) or line[0] == '#': + continue + + # Split the line on space + url = URL() + cols = line.split(' ') + for i in range(0,len(cols)): + cols[i] = cols[i].strip() + url.TrySetAttribute('loc', cols[0]) + + # Extract attributes from the other columns + for i in range(1,len(cols)): + if cols[i]: + try: + (attr_name, attr_val) = cols[i].split('=', 1) + url.TrySetAttribute(attr_name, attr_val) + except ValueError: + output.Warn('Line %d: Unable to parse attribute: %s' % + (linenum, cols[i])) + + # Pass it on + consumer(url, False) + + file.close() + if frame: + frame.close() + #end def ProduceURLs +#end class InputURLList + + +class InputDirectory: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles a directory that acts as base for walking the filesystem. + """ + + def __init__(self, attributes, base_url): + self._path = None # The directory + self._url = None # The URL equivelant + self._default_file = None + + if not ValidateAttributes('DIRECTORY', attributes, ('path', 'url', + 'default_file')): + return + + # Prep the path -- it MUST end in a sep + path = attributes.get('path') + if not path: + output.Error('Directory entries must have both "path" and "url" ' + 'attributes') + return + path = encoder.MaybeNarrowPath(path) + if not path.endswith(os.sep): + path = path + os.sep + if not os.path.isdir(path): + output.Error('Can not locate directory: %s' % path) + return + + # Prep the URL -- it MUST end in a sep + url = attributes.get('url') + if not url: + output.Error('Directory entries must have both "path" and "url" ' + 'attributes') + return + url = URL.Canonicalize(url) + if not url.endswith('/'): + url = url + '/' + if not url.startswith(base_url): + url = urlparse.urljoin(base_url, url) + if not url.startswith(base_url): + output.Error('The directory URL "%s" is not relative to the ' + 'base_url: %s' % (url, base_url)) + return + + # Prep the default file -- it MUST be just a filename + file = attributes.get('default_file') + if file: + file = encoder.MaybeNarrowPath(file) + if os.sep in file: + output.Error('The default_file "%s" can not include path information.' + % file) + file = None + + self._path = path + self._url = url + self._default_file = file + if file: + output.Log('Input: From DIRECTORY "%s" (%s) with default file "%s"' + % (path, url, file), 2) + else: + output.Log('Input: From DIRECTORY "%s" (%s) with no default file' + % (path, url), 2) + #end def __init__ + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + if not self._path: + return + + root_path = self._path + root_URL = self._url + root_file = self._default_file + + def PerFile(dirpath, name): + """ + Called once per file. + Note that 'name' will occasionally be None -- for a directory itself + """ + # Pull a timestamp + url = URL() + isdir = False + try: + if name: + path = os.path.join(dirpath, name) + else: + path = dirpath + isdir = os.path.isdir(path) + time = None + if isdir and root_file: + file = os.path.join(path, root_file) + try: + time = os.stat(file)[stat.ST_MTIME]; + except OSError: + pass + if not time: + time = os.stat(path)[stat.ST_MTIME]; + url.lastmod = TimestampISO8601(time) + except OSError: + pass + except ValueError: + pass + + # Build a URL + middle = dirpath[len(root_path):] + if os.sep != '/': + middle = middle.replace(os.sep, '/') + if middle: + middle = middle + '/' + if name: + middle = middle + name + if isdir: + middle = middle + '/' + url.TrySetAttribute('loc', root_URL + encoder.WidenText(middle, None)) + + # Suppress default files. (All the way down here so we can log it.) + if name and (root_file == name): + url.Log(prefix='IGNORED (default file)', level=2) + return + + consumer(url, False) + #end def PerFile + + def PerDirectory(ignore, dirpath, namelist): + """ + Called once per directory with a list of all the contained files/dirs. + """ + ignore = ignore # Avoid warnings of an unused parameter + + if not dirpath.startswith(root_path): + output.Warn('Unable to decide what the root path is for directory: ' + '%s' % dirpath) + return + + for name in namelist: + PerFile(dirpath, name) + #end def PerDirectory + + output.Log('Walking DIRECTORY "%s"' % self._path, 1) + PerFile(self._path, None) + os.path.walk(self._path, PerDirectory, None) + #end def ProduceURLs +#end class InputDirectory + + +class InputAccessLog: + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles access logs. It's non-trivial in that we want to + auto-detect log files in the Common Logfile Format (as used by Apache, + for instance) and the Extended Log File Format (as used by IIS, for + instance). + """ + + def __init__(self, attributes): + self._path = None # The file path + self._encoding = None # Encoding of that file + self._is_elf = False # Extended Log File Format? + self._is_clf = False # Common Logfile Format? + self._elf_status = -1 # ELF field: '200' + self._elf_method = -1 # ELF field: 'HEAD' + self._elf_uri = -1 # ELF field: '/foo?bar=1' + self._elf_urifrag1 = -1 # ELF field: '/foo' + self._elf_urifrag2 = -1 # ELF field: 'bar=1' + + if not ValidateAttributes('ACCESSLOG', attributes, ('path', 'encoding')): + return + + self._path = attributes.get('path') + self._encoding = attributes.get('encoding', ENC_UTF8) + if self._path: + self._path = encoder.MaybeNarrowPath(self._path) + if os.path.isfile(self._path): + output.Log('Input: From ACCESSLOG "%s"' % self._path, 2) + else: + output.Error('Can not locate file: %s' % self._path) + self._path = None + else: + output.Error('Accesslog entries must have a "path" attribute.') + #end def __init__ + + def RecognizeELFLine(self, line): + """ Recognize the Fields directive that heads an ELF file """ + if not line.startswith('#Fields:'): + return False + fields = line.split(' ') + del fields[0] + for i in range(0, len(fields)): + field = fields[i].strip() + if field == 'sc-status': + self._elf_status = i + elif field == 'cs-method': + self._elf_method = i + elif field == 'cs-uri': + self._elf_uri = i + elif field == 'cs-uri-stem': + self._elf_urifrag1 = i + elif field == 'cs-uri-query': + self._elf_urifrag2 = i + output.Log('Recognized an Extended Log File Format file.', 2) + return True + #end def RecognizeELFLine + + def GetELFLine(self, line): + """ Fetch the requested URL from an ELF line """ + fields = line.split(' ') + count = len(fields) + + # Verify status was Ok + if self._elf_status >= 0: + if self._elf_status >= count: + return None + if not fields[self._elf_status].strip() == '200': + return None + + # Verify method was HEAD or GET + if self._elf_method >= 0: + if self._elf_method >= count: + return None + if not fields[self._elf_method].strip() in ('HEAD', 'GET'): + return None + + # Pull the full URL if we can + if self._elf_uri >= 0: + if self._elf_uri >= count: + return None + url = fields[self._elf_uri].strip() + if url != '-': + return url + + # Put together a fragmentary URL + if self._elf_urifrag1 >= 0: + if self._elf_urifrag1 >= count or self._elf_urifrag2 >= count: + return None + urlfrag1 = fields[self._elf_urifrag1].strip() + urlfrag2 = None + if self._elf_urifrag2 >= 0: + urlfrag2 = fields[self._elf_urifrag2] + if urlfrag1 and (urlfrag1 != '-'): + if urlfrag2 and (urlfrag2 != '-'): + urlfrag1 = urlfrag1 + '?' + urlfrag2 + return urlfrag1 + + return None + #end def GetELFLine + + def RecognizeCLFLine(self, line): + """ Try to tokenize a logfile line according to CLF pattern and see if + it works. """ + match = ACCESSLOG_CLF_PATTERN.match(line) + recognize = match and (match.group(1) in ('HEAD', 'GET')) + if recognize: + output.Log('Recognized a Common Logfile Format file.', 2) + return recognize + #end def RecognizeCLFLine + + def GetCLFLine(self, line): + """ Fetch the requested URL from a CLF line """ + match = ACCESSLOG_CLF_PATTERN.match(line) + if match: + request = match.group(1) + if request in ('HEAD', 'GET'): + return match.group(2) + return None + #end def GetCLFLine + + def ProduceURLs(self, consumer): + """ Produces URLs from our data source, hands them in to the consumer. """ + + # Open the file + (frame, file) = OpenFileForRead(self._path, 'ACCESSLOG') + if not file: + return + + # Iterate lines + for line in file.readlines(): + if self._encoding: + line = encoder.WidenText(line, self._encoding) + line = line.strip() + + # If we don't know the format yet, try them both + if (not self._is_clf) and (not self._is_elf): + self._is_elf = self.RecognizeELFLine(line) + self._is_clf = self.RecognizeCLFLine(line) + + # Digest the line + match = None + if self._is_elf: + match = self.GetELFLine(line) + elif self._is_clf: + match = self.GetCLFLine(line) + if not match: + continue + + # Pass it on + url = URL() + url.TrySetAttribute('loc', match) + consumer(url, True) + + file.close() + if frame: + frame.close() + #end def ProduceURLs +#end class InputAccessLog + + +class InputSitemap(xml.sax.handler.ContentHandler): + + """ + Each Input class knows how to yield a set of URLs from a data source. + + This one handles Sitemap files and Sitemap index files. For the sake + of simplicity in design (and simplicity in interfacing with the SAX + package), we do not handle these at the same time, recursively. Instead + we read an index file completely and make a list of Sitemap files, then + go back and process each Sitemap. + """ + + class _ContextBase(object): + + """Base class for context handlers in our SAX processing. A context + handler is a class that is responsible for understanding one level of + depth in the XML schema. The class knows what sub-tags are allowed, + and doing any processing specific for the tag we're in. + + This base class is the API filled in by specific context handlers, + all defined below. + """ + + def __init__(self, subtags): + """Initialize with a sequence of the sub-tags that would be valid in + this context.""" + self._allowed_tags = subtags # Sequence of sub-tags we can have + self._last_tag = None # Most recent seen sub-tag + #end def __init__ + + def AcceptTag(self, tag): + """Returns True iff opening a sub-tag is valid in this context.""" + valid = tag in self._allowed_tags + if valid: + self._last_tag = tag + else: + self._last_tag = None + return valid + #end def AcceptTag + + def AcceptText(self, text): + """Returns True iff a blurb of text is valid in this context.""" + return False + #end def AcceptText + + def Open(self): + """The context is opening. Do initialization.""" + pass + #end def Open + + def Close(self): + """The context is closing. Return our result, if any.""" + pass + #end def Close + + def Return(self, result): + """We're returning to this context after handling a sub-tag. This + method is called with the result data from the sub-tag that just + closed. Here in _ContextBase, if we ever see a result it means + the derived child class forgot to override this method.""" + if result: + raise NotImplementedError + #end def Return + #end class _ContextBase + + class _ContextUrlSet(_ContextBase): + + """Context handler for the document node in a Sitemap.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('url',)) + #end def __init__ + #end class _ContextUrlSet + + class _ContextUrl(_ContextBase): + + """Context handler for a URL node in a Sitemap.""" + + def __init__(self, consumer): + """Initialize this context handler with the callable consumer that + wants our URLs.""" + InputSitemap._ContextBase.__init__(self, URL.__slots__) + self._url = None # The URL object we're building + self._consumer = consumer # Who wants to consume it + #end def __init__ + + def Open(self): + """Initialize the URL.""" + assert not self._url + self._url = URL() + #end def Open + + def Close(self): + """Pass the URL to the consumer and reset it to None.""" + assert self._url + self._consumer(self._url, False) + self._url = None + #end def Close + + def Return(self, result): + """A value context has closed, absorb the data it gave us.""" + assert self._url + if result: + self._url.TrySetAttribute(self._last_tag, result) + #end def Return + #end class _ContextUrl + + class _ContextSitemapIndex(_ContextBase): + + """Context handler for the document node in an index file.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('sitemap',)) + self._loclist = [] # List of accumulated Sitemap URLs + #end def __init__ + + def Open(self): + """Just a quick verify of state.""" + assert not self._loclist + #end def Open + + def Close(self): + """Return our list of accumulated URLs.""" + if self._loclist: + temp = self._loclist + self._loclist = [] + return temp + #end def Close + + def Return(self, result): + """Getting a new loc URL, add it to the collection.""" + if result: + self._loclist.append(result) + #end def Return + #end class _ContextSitemapIndex + + class _ContextSitemap(_ContextBase): + + """Context handler for a Sitemap entry in an index file.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ('loc', 'lastmod')) + self._loc = None # The URL to the Sitemap + #end def __init__ + + def Open(self): + """Just a quick verify of state.""" + assert not self._loc + #end def Open + + def Close(self): + """Return our URL to our parent.""" + if self._loc: + temp = self._loc + self._loc = None + return temp + output.Warn('In the Sitemap index file, a "sitemap" entry had no "loc".') + #end def Close + + def Return(self, result): + """A value has closed. If it was a 'loc', absorb it.""" + if result and (self._last_tag == 'loc'): + self._loc = result + #end def Return + #end class _ContextSitemap + + class _ContextValue(_ContextBase): + + """Context handler for a single value. We return just the value. The + higher level context has to remember what tag led into us.""" + + def __init__(self): + InputSitemap._ContextBase.__init__(self, ()) + self._text = None + #end def __init__ + + def AcceptText(self, text): + """Allow all text, adding it to our buffer.""" + if self._text: + self._text = self._text + text + else: + self._text = text + return True + #end def AcceptText + + def Open(self): + """Initialize our buffer.""" + self._text = None + #end def Open + + def Close(self): + """Return what's in our buffer.""" + text = self._text + self._text = None + if text: + text = text.strip() + return text + #end def Close + #end class _ContextValue + + def __init__(self, attributes): + """Initialize with a dictionary of attributes from our entry in the + config file.""" + xml.sax.handler.ContentHandler.__init__(self) + self._pathlist = None # A list of files + self._current = -1 # Current context in _contexts + self._contexts = None # The stack of contexts we allow + self._contexts_idx = None # ...contexts for index files + self._contexts_stm = None # ...contexts for Sitemap files + + if not ValidateAttributes('SITEMAP', attributes, ['path']): + return + + # Init the first file path + path = attributes.get('path') + if path: + path = encoder.MaybeNarrowPath(path) + if os.path.isfile(path): + output.Log('Input: From SITEMAP "%s"' % path, 2) + self._pathlist = [path] + else: + output.Error('Can not locate file "%s"' % path) + else: + output.Error('Sitemap entries must have a "path" attribute.') + #end def __init__ + + def ProduceURLs(self, consumer): + """In general: Produces URLs from our data source, hand them to the + callable consumer. + + In specific: Iterate over our list of paths and delegate the actual + processing to helper methods. This is a complexity no other data source + needs to suffer. We are unique in that we can have files that tell us + to bring in other files. + + Note the decision to allow an index file or not is made in this method. + If we call our parser with (self._contexts == None) the parser will + grab whichever context stack can handle the file. IE: index is allowed. + If instead we set (self._contexts = ...) before parsing, the parser + will only use the stack we specify. IE: index not allowed. + """ + # Set up two stacks of contexts + self._contexts_idx = [InputSitemap._ContextSitemapIndex(), + InputSitemap._ContextSitemap(), + InputSitemap._ContextValue()] + + self._contexts_stm = [InputSitemap._ContextUrlSet(), + InputSitemap._ContextUrl(consumer), + InputSitemap._ContextValue()] + + # Process the first file + assert self._pathlist + path = self._pathlist[0] + self._contexts = None # We allow an index file here + self._ProcessFile(path) + + # Iterate over remaining files + self._contexts = self._contexts_stm # No index files allowed + for path in self._pathlist[1:]: + self._ProcessFile(path) + #end def ProduceURLs + + def _ProcessFile(self, path): + """Do per-file reading/parsing/consuming for the file path passed in.""" + assert path + + # Open our file + (frame, file) = OpenFileForRead(path, 'SITEMAP') + if not file: + return + + # Rev up the SAX engine + try: + self._current = -1 + xml.sax.parse(file, self) + except SchemaError: + output.Error('An error in file "%s" made us abort reading the Sitemap.' + % path) + except IOError: + output.Error('Cannot read from file "%s"' % path) + except xml.sax._exceptions.SAXParseException, e: + output.Error('XML error in the file "%s" (line %d, column %d): %s' % + (path, e._linenum, e._colnum, e.getMessage())) + + # Clean up + file.close() + if frame: + frame.close() + #end def _ProcessFile + + def _MungeLocationListIntoFiles(self, urllist): + """Given a list of URLs, munge them into our self._pathlist property. + We do this by assuming all the files live in the same directory as + the first file in the existing pathlist. That is, we assume a + Sitemap index points to Sitemaps only in the same directory. This + is not true in general, but will be true for any output produced + by this script. + """ + assert self._pathlist + path = self._pathlist[0] + path = os.path.normpath(path) + dir = os.path.dirname(path) + wide = False + if type(path) == types.UnicodeType: + wide = True + + for url in urllist: + url = URL.Canonicalize(url) + output.Log('Index points to Sitemap file at: %s' % url, 2) + (scheme, netloc, path, query, frag) = urlparse.urlsplit(url) + file = os.path.basename(path) + file = urllib.unquote(file) + if wide: + file = encoder.WidenText(file) + if dir: + file = dir + os.sep + file + if file: + self._pathlist.append(file) + output.Log('Will attempt to read Sitemap file: %s' % file, 1) + #end def _MungeLocationListIntoFiles + + def startElement(self, tag, attributes): + """SAX processing, called per node in the config stream. + As long as the new tag is legal in our current context, this + becomes an Open call on one context deeper. + """ + # If this is the document node, we may have to look for a context stack + if (self._current < 0) and not self._contexts: + assert self._contexts_idx and self._contexts_stm + if tag == 'urlset': + self._contexts = self._contexts_stm + elif tag == 'sitemapindex': + self._contexts = self._contexts_idx + output.Log('File is a Sitemap index.', 2) + else: + output.Error('The document appears to be neither a Sitemap nor a ' + 'Sitemap index.') + raise SchemaError + + # Display a kinder error on a common mistake + if (self._current < 0) and (self._contexts == self._contexts_stm) and ( + tag == 'sitemapindex'): + output.Error('A Sitemap index can not refer to another Sitemap index.') + raise SchemaError + + # Verify no unexpected attributes + if attributes: + text = '' + for attr in attributes.keys(): + # The document node will probably have namespaces + if self._current < 0: + if attr.find('xmlns') >= 0: + continue + if attr.find('xsi') >= 0: + continue + if text: + text = text + ', ' + text = text + attr + if text: + output.Warn('Did not expect any attributes on any tag, instead tag ' + '"%s" had attributes: %s' % (tag, text)) + + # Switch contexts + if (self._current < 0) or (self._contexts[self._current].AcceptTag(tag)): + self._current = self._current + 1 + assert self._current < len(self._contexts) + self._contexts[self._current].Open() + else: + output.Error('Can not accept tag "%s" where it appears.' % tag) + raise SchemaError + #end def startElement + + def endElement(self, tag): + """SAX processing, called per node in the config stream. + This becomes a call to Close on one context followed by a call + to Return on the previous. + """ + tag = tag # Avoid warning on unused argument + assert self._current >= 0 + retval = self._contexts[self._current].Close() + self._current = self._current - 1 + if self._current >= 0: + self._contexts[self._current].Return(retval) + elif retval and (self._contexts == self._contexts_idx): + self._MungeLocationListIntoFiles(retval) + #end def endElement + + def characters(self, text): + """SAX processing, called when text values are read. Important to + note that one single text value may be split across multiple calls + of this method. + """ + if (self._current < 0) or ( + not self._contexts[self._current].AcceptText(text)): + if text.strip(): + output.Error('Can not accept text "%s" where it appears.' % text) + raise SchemaError + #end def characters +#end class InputSitemap + + +class FilePathGenerator: + """ + This class generates filenames in a series, upon request. + You can request any iteration number at any time, you don't + have to go in order. + + Example of iterations for '/path/foo.xml.gz': + 0 --> /path/foo.xml.gz + 1 --> /path/foo1.xml.gz + 2 --> /path/foo2.xml.gz + _index.xml --> /path/foo_index.xml + """ + + def __init__(self): + self.is_gzip = False # Is this a GZIP file? + + self._path = None # '/path/' + self._prefix = None # 'foo' + self._suffix = None # '.xml.gz' + #end def __init__ + + def Preload(self, path): + """ Splits up a path into forms ready for recombination. """ + path = encoder.MaybeNarrowPath(path) + + # Get down to a base name + path = os.path.normpath(path) + base = os.path.basename(path).lower() + if not base: + output.Error('Couldn\'t parse the file path: %s' % path) + return False + lenbase = len(base) + + # Recognize extension + lensuffix = 0 + compare_suffix = ['.xml', '.xml.gz', '.gz'] + for suffix in compare_suffix: + if base.endswith(suffix): + lensuffix = len(suffix) + break + if not lensuffix: + output.Error('The path "%s" doesn\'t end in a supported file ' + 'extension.' % path) + return False + self.is_gzip = suffix.endswith('.gz') + + # Split the original path + lenpath = len(path) + self._path = path[:lenpath-lenbase] + self._prefix = path[lenpath-lenbase:lenpath-lensuffix] + self._suffix = path[lenpath-lensuffix:] + + return True + #end def Preload + + def GeneratePath(self, instance): + """ Generates the iterations, as described above. """ + prefix = self._path + self._prefix + if type(instance) == types.IntType: + if instance: + return '%s%d%s' % (prefix, instance, self._suffix) + return prefix + self._suffix + return prefix + instance + #end def GeneratePath + + def GenerateURL(self, instance, root_url): + """ Generates iterations, but as a URL instead of a path. """ + prefix = root_url + self._prefix + retval = None + if type(instance) == types.IntType: + if instance: + retval = '%s%d%s' % (prefix, instance, self._suffix) + else: + retval = prefix + self._suffix + else: + retval = prefix + instance + return URL.Canonicalize(retval) + #end def GenerateURL + + def GenerateWildURL(self, root_url): + """ Generates a wildcard that should match all our iterations """ + prefix = URL.Canonicalize(root_url + self._prefix) + temp = URL.Canonicalize(prefix + self._suffix) + suffix = temp[len(prefix):] + return prefix + '*' + suffix + #end def GenerateURL +#end class FilePathGenerator + + +class PerURLStatistics: + """ Keep track of some simple per-URL statistics, like file extension. """ + + def __init__(self): + self._extensions = {} # Count of extension instances + #end def __init__ + + def Consume(self, url): + """ Log some stats for the URL. At the moment, that means extension. """ + if url and url.loc: + (scheme, netloc, path, query, frag) = urlparse.urlsplit(url.loc) + if not path: + return + + # Recognize directories + if path.endswith('/'): + if self._extensions.has_key('/'): + self._extensions['/'] = self._extensions['/'] + 1 + else: + self._extensions['/'] = 1 + return + + # Strip to a filename + i = path.rfind('/') + if i >= 0: + assert i < len(path) + path = path[i:] + + # Find extension + i = path.rfind('.') + if i > 0: + assert i < len(path) + ext = path[i:].lower() + if self._extensions.has_key(ext): + self._extensions[ext] = self._extensions[ext] + 1 + else: + self._extensions[ext] = 1 + else: + if self._extensions.has_key('(no extension)'): + self._extensions['(no extension)'] = self._extensions[ + '(no extension)'] + 1 + else: + self._extensions['(no extension)'] = 1 + #end def Consume + + def Log(self): + """ Dump out stats to the output. """ + if len(self._extensions): + output.Log('Count of file extensions on URLs:', 1) + set = self._extensions.keys() + set.sort() + for ext in set: + output.Log(' %7d %s' % (self._extensions[ext], ext), 1) + #end def Log + +class Sitemap(xml.sax.handler.ContentHandler): + """ + This is the big workhorse class that processes your inputs and spits + out sitemap files. It is built as a SAX handler for set up purposes. + That is, it processes an XML stream to bring itself up. + """ + + def __init__(self, suppress_notify): + xml.sax.handler.ContentHandler.__init__(self) + self._filters = [] # Filter objects + self._inputs = [] # Input objects + self._urls = {} # Maps URLs to count of dups + self._set = [] # Current set of URLs + self._filegen = None # Path generator for output files + self._wildurl1 = None # Sitemap URLs to filter out + self._wildurl2 = None # Sitemap URLs to filter out + self._sitemaps = 0 # Number of output files + # We init _dup_max to 2 so the default priority is 0.5 instead of 1.0 + self._dup_max = 2 # Max number of duplicate URLs + self._stat = PerURLStatistics() # Some simple stats + self._in_site = False # SAX: are we in a Site node? + self._in_Site_ever = False # SAX: were we ever in a Site? + + self._default_enc = None # Best encoding to try on URLs + self._base_url = None # Prefix to all valid URLs + self._store_into = None # Output filepath + self._suppress = suppress_notify # Suppress notify of servers + #end def __init__ + + def ValidateBasicConfig(self): + """ Verifies (and cleans up) the basic user-configurable options. """ + all_good = True + + if self._default_enc: + encoder.SetUserEncoding(self._default_enc) + + # Canonicalize the base_url + if all_good and not self._base_url: + output.Error('A site needs a "base_url" attribute.') + all_good = False + if all_good and not URL.IsAbsolute(self._base_url): + output.Error('The "base_url" must be absolute, not relative: %s' % + self._base_url) + all_good = False + if all_good: + self._base_url = URL.Canonicalize(self._base_url) + if not self._base_url.endswith('/'): + self._base_url = self._base_url + '/' + output.Log('BaseURL is set to: %s' % self._base_url, 2) + + # Load store_into into a generator + if all_good: + if self._store_into: + self._filegen = FilePathGenerator() + if not self._filegen.Preload(self._store_into): + all_good = False + else: + output.Error('A site needs a "store_into" attribute.') + all_good = False + + # Ask the generator for patterns on what its output will look like + if all_good: + self._wildurl1 = self._filegen.GenerateWildURL(self._base_url) + self._wildurl2 = self._filegen.GenerateURL(SITEINDEX_SUFFIX, + self._base_url) + + # Unify various forms of False + if all_good: + if self._suppress: + if (type(self._suppress) == types.StringType) or (type(self._suppress) + == types.UnicodeType): + if (self._suppress == '0') or (self._suppress.lower() == 'false'): + self._suppress = False + + # Done + if not all_good: + output.Log('See "example_config.xml" for more information.', 0) + return all_good + #end def ValidateBasicConfig + + def Generate(self): + """ Run over all the Inputs and ask them to Produce """ + # Run the inputs + for input in self._inputs: + input.ProduceURLs(self.ConsumeURL) + + # Do last flushes + if len(self._set): + self.FlushSet() + if not self._sitemaps: + output.Warn('No URLs were recorded, writing ... [truncated message content] |
From: <rom...@us...> - 2011-03-07 21:02:53
|
Revision: 1855 http://pygccxml.svn.sourceforge.net/pygccxml/?rev=1855&view=rev Author: roman_yakovenko Date: 2011-03-07 21:02:46 +0000 (Mon, 07 Mar 2011) Log Message: ----------- remove reference to "language-binding.net" site Modified Paths: -------------- sphinx/conf.py sphinx/readme.txt Removed Paths: ------------- sphinx/__templates_www/ sphinx/sitemap_gen.py Modified: sphinx/conf.py =================================================================== --- sphinx/conf.py 2011-03-07 20:44:32 UTC (rev 1854) +++ sphinx/conf.py 2011-03-07 21:02:46 UTC (rev 1855) @@ -28,7 +28,7 @@ project_root = os.path.abspath('..') doc_project_root = os.path.abspath('.') -packages = ( 'pydsc', 'pygccxml', 'pyplusplus' ) +packages = ( 'pygccxml', 'pyplusplus' ) #'pydsc' - it is an internal package, used to fix spelling mistakes sys.path.append( doc_project_root ) @@ -47,6 +47,7 @@ os.symlink( source, target ) else: shutil.copytree( source, target, ignore=shutil.ignore_patterns( r'.svn', '*.pyc', 'osdc2006' ) ) + if has_true_links: if os.path.exists(os.path.join( doc_project_root, 'index.rest' )): os.unlink( os.path.join( doc_project_root, 'index.rest' ) ) @@ -76,44 +77,8 @@ shutil.rmtree(target_dir) shutil.copytree( source_dir, target_dir, ignore=shutil.ignore_patterns( r'.svn' ) ) -def generate_sitemap(app, exception): - if 'www' not in outdir: - return - if exception: - print 'SITEMAP generation was skipped - there were errors during the build process' - return - try: - import sitemap_gen - - working_dir = os.path.join( doc_project_root, outdir ) - config = \ - """<?xml version="1.0" encoding="UTF-8"?> - <site base_url="http://www.language-binding.net/" store_into="%(path)s/sitemap.xml.gz" verbose="1"> - <directory path="%(path)s" url="http://www.language-binding.net/" default_file="index.html" /> - <filter action="drop" type="regexp" pattern="/\.[^/]*" /> - <filter action="drop" type="regexp" pattern="/_[^/]*" /> - </site> - """ % dict( path=os.path.join( doc_project_root, working_dir ) ) - - f_config_path = os.path.join( working_dir, 'sitemap_config.xml' ) - f_config = file( f_config_path, 'w+' ) - f_config.write( config ) - f_config.close() - - sitemap = sitemap_gen.CreateSitemapFromFile(f_config_path, True) - if not sitemap: - print 'ERROR(SITEMAP): configuration file errors' - else: - sitemap.Generate() - print 'ERRORS(SITEMAP): %d' % sitemap_gen.output.num_errors - print 'WARNINGS(SITEMAP): %d' % sitemap_gen.output.num_warns - except Exception, error: - print "ERROR(SITEMAP): sitemap file was not generated - ", str(error) - - def setup(app): app.connect('build-finished', copy_indexing_suite_v2_files) - app.connect('build-finished', generate_sitemap) # General configuration # --------------------- @@ -123,8 +88,6 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage'] # Add any paths that contain templates here, relative to this directory. templates_path = ['__templates'] -if 'www' in outdir: - templates_path = ['__templates_www'] # The suffix of source filenames. source_suffix = '.rest' @@ -144,9 +107,9 @@ # built documents. # # The short X.Y version. -version = '1.1' +version = 'SVN - Mar 7 2011' # The full version, including alpha/beta/rc tags. -release = '1.1' +release = 'SVN - Mar 7 2011' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. Modified: sphinx/readme.txt =================================================================== --- sphinx/readme.txt 2011-03-07 20:44:32 UTC (rev 1854) +++ sphinx/readme.txt 2011-03-07 21:02:46 UTC (rev 1855) @@ -1,7 +1 @@ -language-binding web site: - - sphinx-build -b [changes|linkcheck|doctest] . www - -ads free doumentation: - - sphinx-build . docs +$ sphinx-build -E . docs Deleted: sphinx/sitemap_gen.py =================================================================== --- sphinx/sitemap_gen.py 2011-03-07 20:44:32 UTC (rev 1854) +++ sphinx/sitemap_gen.py 2011-03-07 21:02:46 UTC (rev 1855) @@ -1,2205 +0,0 @@ -#!/usr/bin/python -# -# Copyright (c) 2004, 2005 Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# * Neither the name of Google nor the names of its contributors may -# be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# -# The sitemap_gen.py script is written in Python 2.2 and released to -# the open source community for continuous improvements under the BSD -# 2.0 new license, which can be found at: -# -# http://www.opensource.org/licenses/bsd-license.php -# - -__usage__ = \ -"""A simple script to automatically produce sitemaps for a webserver, -in the Google Sitemap Protocol (GSP). - -Usage: python sitemap_gen.py --config=config.xml [--help] [--testing] - --config=config.xml, specifies config file location - --help, displays usage message - --testing, specified when user is experimenting -""" - -# Please be careful that all syntax used in this file can be parsed on -# Python 1.5 -- this version check is not evaluated until after the -# entire file has been parsed. -import sys -if sys.hexversion < 0x02020000: - print 'This script requires Python 2.2 or later.' - print 'Currently run with version: %s' % sys.version - sys.exit(1) - -import fnmatch -import glob -import gzip -import md5 -import os -import re -import stat -import time -import types -import urllib -import urlparse -import xml.sax - -# True and False were introduced in Python2.2.2 -try: - testTrue=True - del testTrue -except NameError: - True=1 - False=0 - -# Text encodings -ENC_ASCII = 'ASCII' -ENC_UTF8 = 'UTF-8' -ENC_IDNA = 'IDNA' -ENC_ASCII_LIST = ['ASCII', 'US-ASCII', 'US', 'IBM367', 'CP367', 'ISO646-US' - 'ISO_646.IRV:1991', 'ISO-IR-6', 'ANSI_X3.4-1968', - 'ANSI_X3.4-1986', 'CPASCII' ] -ENC_DEFAULT_LIST = ['ISO-8859-1', 'ISO-8859-2', 'ISO-8859-5'] - -# Maximum number of urls in each sitemap, before next Sitemap is created -MAXURLS_PER_SITEMAP = 50000 - -# Suffix on a Sitemap index file -SITEINDEX_SUFFIX = '_index.xml' - -# Regular expressions tried for extracting URLs from access logs. -ACCESSLOG_CLF_PATTERN = re.compile( - r'.+\s+"([^\s]+)\s+([^\s]+)\s+HTTP/\d+\.\d+"\s+200\s+.*' - ) - -# Match patterns for lastmod attributes -LASTMOD_PATTERNS = map(re.compile, [ - r'^\d\d\d\d$', - r'^\d\d\d\d-\d\d$', - r'^\d\d\d\d-\d\d-\d\d$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\dZ$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d[+-]\d\d:\d\d$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?Z$', - r'^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?[+-]\d\d:\d\d$', - ]) - -# Match patterns for changefreq attributes -CHANGEFREQ_PATTERNS = [ - 'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never' - ] - -# XML formats -SITEINDEX_HEADER = \ - '<?xml version="1.0" encoding="UTF-8"?>\n' \ - '<sitemapindex\n' \ - ' xmlns="http://www.google.com/schemas/sitemap/0.84"\n' \ - ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ - ' xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84\n' \ - ' http://www.google.com/schemas/sitemap/0.84/' \ - 'siteindex.xsd">\n' -SITEINDEX_FOOTER = '</sitemapindex>\n' -SITEINDEX_ENTRY = \ - ' <sitemap>\n' \ - ' <loc>%(loc)s</loc>\n' \ - ' <lastmod>%(lastmod)s</lastmod>\n' \ - ' </sitemap>\n' -SITEMAP_HEADER = \ - '<?xml version="1.0" encoding="UTF-8"?>\n' \ - '<urlset\n' \ - ' xmlns="http://www.google.com/schemas/sitemap/0.84"\n' \ - ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' \ - ' xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84\n' \ - ' http://www.google.com/schemas/sitemap/0.84/' \ - 'sitemap.xsd">\n' -SITEMAP_FOOTER = '</urlset>\n' -SITEURL_XML_PREFIX = ' <url>\n' -SITEURL_XML_SUFFIX = ' </url>\n' - -# Search engines to notify with the updated sitemaps -# -# This list is very non-obvious in what's going on. Here's the gist: -# Each item in the list is a 6-tuple of items. The first 5 are "almost" -# the same as the input arguments to urlparse.urlunsplit(): -# 0 - schema -# 1 - netloc -# 2 - path -# 3 - query <-- EXCEPTION: specify a query map rather than a string -# 4 - fragment -# Additionally, add item 5: -# 5 - query attribute that should be set to the new Sitemap URL -# Clear as mud, I know. -NOTIFICATION_SITES = [ - ('http', 'www.google.com', 'webmasters/sitemaps/ping', {}, '', 'sitemap') - ] - - -class Error(Exception): - """ - Base exception class. In this module we tend not to use our own exception - types for very much, but they come in very handy on XML parsing with SAX. - """ - pass -#end class Error - - -class SchemaError(Error): - """Failure to process an XML file according to the schema we know.""" - pass -#end class SchemeError - - -class Encoder: - """ - Manages wide-character/narrow-character conversions for just about all - text that flows into or out of the script. - - You should always use this class for string coercion, as opposed to - letting Python handle coercions automatically. Reason: Python - usually assumes ASCII (7-bit) as a default narrow character encoding, - which is not the kind of data we generally deal with. - - General high-level methodologies used in sitemap_gen: - - [PATHS] - File system paths may be wide or narrow, depending on platform. - This works fine, just be aware of it and be very careful to not - mix them. That is, if you have to pass several file path arguments - into a library call, make sure they are all narrow or all wide. - This class has MaybeNarrowPath() which should be called on every - file system path you deal with. - - [URLS] - URL locations are stored in Narrow form, already escaped. This has the - benefit of keeping escaping and encoding as close as possible to the format - we read them in. The downside is we may end up with URLs that have - intermingled encodings -- the root path may be encoded in one way - while the filename is encoded in another. This is obviously wrong, but - it should hopefully be an issue hit by very few users. The workaround - from the user level (assuming they notice) is to specify a default_encoding - parameter in their config file. - - [OTHER] - Other text, such as attributes of the URL class, configuration options, - etc, are generally stored in Unicode for simplicity. - """ - - def __init__(self): - self._user = None # User-specified default encoding - self._learned = [] # Learned default encodings - self._widefiles = False # File system can be wide - - # Can the file system be Unicode? - try: - self._widefiles = os.path.supports_unicode_filenames - except AttributeError: - try: - self._widefiles = sys.getwindowsversion() == os.VER_PLATFORM_WIN32_NT - except AttributeError: - pass - - # Try to guess a working default - try: - encoding = sys.getfilesystemencoding() - if encoding and not (encoding.upper() in ENC_ASCII_LIST): - self._learned = [ encoding ] - except AttributeError: - pass - - if not self._learned: - encoding = sys.getdefaultencoding() - if encoding and not (encoding.upper() in ENC_ASCII_LIST): - self._learned = [ encoding ] - - # If we had no guesses, start with some European defaults - if not self._learned: - self._learned = ENC_DEFAULT_LIST - #end def __init__ - - def SetUserEncoding(self, encoding): - self._user = encoding - #end def SetUserEncoding - - def NarrowText(self, text, encoding): - """ Narrow a piece of arbitrary text """ - if type(text) != types.UnicodeType: - return text - - # Try the passed in preference - if encoding: - try: - result = text.encode(encoding) - if not encoding in self._learned: - self._learned.append(encoding) - return result - except UnicodeError: - pass - except LookupError: - output.Warn('Unknown encoding: %s' % encoding) - - # Try the user preference - if self._user: - try: - return text.encode(self._user) - except UnicodeError: - pass - except LookupError: - temp = self._user - self._user = None - output.Warn('Unknown default_encoding: %s' % temp) - - # Look through learned defaults, knock any failing ones out of the list - while self._learned: - try: - return text.encode(self._learned[0]) - except: - del self._learned[0] - - # When all other defaults are exhausted, use UTF-8 - try: - return text.encode(ENC_UTF8) - except UnicodeError: - pass - - # Something is seriously wrong if we get to here - return text.encode(ENC_ASCII, 'ignore') - #end def NarrowText - - def MaybeNarrowPath(self, text): - """ Paths may be allowed to stay wide """ - if self._widefiles: - return text - return self.NarrowText(text, None) - #end def MaybeNarrowPath - - def WidenText(self, text, encoding): - """ Widen a piece of arbitrary text """ - if type(text) != types.StringType: - return text - - # Try the passed in preference - if encoding: - try: - result = unicode(text, encoding) - if not encoding in self._learned: - self._learned.append(encoding) - return result - except UnicodeError: - pass - except LookupError: - output.Warn('Unknown encoding: %s' % encoding) - - # Try the user preference - if self._user: - try: - return unicode(text, self._user) - except UnicodeError: - pass - except LookupError: - temp = self._user - self._user = None - output.Warn('Unknown default_encoding: %s' % temp) - - # Look through learned defaults, knock any failing ones out of the list - while self._learned: - try: - return unicode(text, self._learned[0]) - except: - del self._learned[0] - - # When all other defaults are exhausted, use UTF-8 - try: - return unicode(text, ENC_UTF8) - except UnicodeError: - pass - - # Getting here means it wasn't UTF-8 and we had no working default. - # We really don't have anything "right" we can do anymore. - output.Warn('Unrecognized encoding in text: %s' % text) - if not self._user: - output.Warn('You may need to set a default_encoding in your ' - 'configuration file.') - return text.decode(ENC_ASCII, 'ignore') - #end def WidenText -#end class Encoder -encoder = Encoder() - - -class Output: - """ - Exposes logging functionality, and tracks how many errors - we have thus output. - - Logging levels should be used as thus: - Fatal -- extremely sparingly - Error -- config errors, entire blocks of user 'intention' lost - Warn -- individual URLs lost - Log(,0) -- Un-suppressable text that's not an error - Log(,1) -- touched files, major actions - Log(,2) -- parsing notes, filtered or duplicated URLs - Log(,3) -- each accepted URL - """ - - def __init__(self): - self.num_errors = 0 # Count of errors - self.num_warns = 0 # Count of warnings - - self._errors_shown = {} # Shown errors - self._warns_shown = {} # Shown warnings - self._verbose = 0 # Level of verbosity - #end def __init__ - - def Log(self, text, level): - """ Output a blurb of diagnostic text, if the verbose level allows it """ - if text: - text = encoder.NarrowText(text, None) - if self._verbose >= level: - print text - #end def Log - - def Warn(self, text): - """ Output and count a warning. Suppress duplicate warnings. """ - if text: - text = encoder.NarrowText(text, None) - hash = md5.new(text).digest() - if not self._warns_shown.has_key(hash): - self._warns_shown[hash] = 1 - print '[WARNING] ' + text - else: - self.Log('(suppressed) [WARNING] ' + text, 3) - self.num_warns = self.num_warns + 1 - #end def Warn - - def Error(self, text): - """ Output and count an error. Suppress duplicate errors. """ - if text: - text = encoder.NarrowText(text, None) - hash = md5.new(text).digest() - if not self._errors_shown.has_key(hash): - self._errors_shown[hash] = 1 - print '[ERROR] ' + text - else: - self.Log('(suppressed) [ERROR] ' + text, 3) - self.num_errors = self.num_errors + 1 - #end def Error - - def Fatal(self, text): - """ Output an error and terminate the program. """ - if text: - text = encoder.NarrowText(text, None) - print '[FATAL] ' + text - else: - print 'Fatal error.' - sys.exit(1) - #end def Fatal - - def SetVerbose(self, level): - """ Sets the verbose level. """ - try: - if type(level) != types.IntType: - level = int(level) - if (level >= 0) and (level <= 3): - self._verbose = level - return - except ValueError: - pass - self.Error('Verbose level (%s) must be between 0 and 3 inclusive.' % level) - #end def SetVerbose -#end class Output -output = Output() - - -class URL(object): - """ URL is a smart structure grouping together the properties we - care about for a single web reference. """ - __slots__ = 'loc', 'lastmod', 'changefreq', 'priority' - - def __init__(self): - self.loc = None # URL -- in Narrow characters - self.lastmod = None # ISO8601 timestamp of last modify - self.changefreq = None # Text term for update frequency - self.priority = None # Float between 0 and 1 (inc) - #end def __init__ - - def __cmp__(self, other): - if self.loc < other.loc: - return -1 - if self.loc > other.loc: - return 1 - return 0 - #end def __cmp__ - - def TrySetAttribute(self, attribute, value): - """ Attempt to set the attribute to the value, with a pretty try - block around it. """ - if attribute == 'loc': - self.loc = self.Canonicalize(value) - else: - try: - setattr(self, attribute, value) - except AttributeError: - output.Warn('Unknown URL attribute: %s' % attribute) - #end def TrySetAttribute - - def IsAbsolute(loc): - """ Decide if the URL is absolute or not """ - if not loc: - return False - narrow = encoder.NarrowText(loc, None) - (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) - if (not scheme) or (not netloc): - return False - return True - #end def IsAbsolute - IsAbsolute = staticmethod(IsAbsolute) - - def Canonicalize(loc): - """ Do encoding and canonicalization on a URL string """ - if not loc: - return loc - - # Let the encoder try to narrow it - narrow = encoder.NarrowText(loc, None) - - # Escape components individually - (scheme, netloc, path, query, frag) = urlparse.urlsplit(narrow) - unr = '-._~' - sub = '!$&\'()*+,;=' - netloc = urllib.quote(netloc, unr + sub + '%:@/[]') - path = urllib.quote(path, unr + sub + '%:@/') - query = urllib.quote(query, unr + sub + '%:@/?') - frag = urllib.quote(frag, unr + sub + '%:@/?') - - # Try built-in IDNA encoding on the netloc - try: - (ignore, widenetloc, ignore, ignore, ignore) = urlparse.urlsplit(loc) - for c in widenetloc: - if c >= unichr(128): - netloc = widenetloc.encode(ENC_IDNA) - netloc = urllib.quote(netloc, unr + sub + '%:@/[]') - break - except UnicodeError: - # urlsplit must have failed, based on implementation differences in the - # library. There is not much we can do here, except ignore it. - pass - except LookupError: - output.Warn('An International Domain Name (IDN) is being used, but this ' - 'version of Python does not have support for IDNA encoding. ' - ' (IDNA support was introduced in Python 2.3) The encoding ' - 'we have used instead is wrong and will probably not yield ' - 'valid URLs.') - bad_netloc = False - if '%' in netloc: - bad_netloc = True - - # Put it all back together - narrow = urlparse.urlunsplit((scheme, netloc, path, query, frag)) - - # I let '%' through. Fix any that aren't pre-existing escapes. - HEXDIG = '0123456789abcdefABCDEF' - list = narrow.split('%') - narrow = list[0] - del list[0] - for item in list: - if (len(item) >= 2) and (item[0] in HEXDIG) and (item[1] in HEXDIG): - narrow = narrow + '%' + item - else: - narrow = narrow + '%25' + item - - # Issue a warning if this is a bad URL - if bad_netloc: - output.Warn('Invalid characters in the host or domain portion of a URL: ' - + narrow) - - return narrow - #end def Canonicalize - Canonicalize = staticmethod(Canonicalize) - - def Validate(self, base_url, allow_fragment): - """ Verify the data in this URL is well-formed, and override if not. """ - assert type(base_url) == types.StringType - - # Test (and normalize) the ref - if not self.loc: - output.Warn('Empty URL') - return False - if allow_fragment: - self.loc = urlparse.urljoin(base_url, self.loc) - if not self.loc.startswith(base_url): - output.Warn('Discarded URL for not starting with the base_url: %s' % - self.loc) - self.loc = None - return False - - # Test the lastmod - if self.lastmod: - match = False - self.lastmod = self.lastmod.upper() - for pattern in LASTMOD_PATTERNS: - match = pattern.match(self.lastmod) - if match: - break - if not match: - output.Warn('Lastmod "%s" does not appear to be in ISO8601 format on ' - 'URL: %s' % (self.lastmod, self.loc)) - self.lastmod = None - - # Test the changefreq - if self.changefreq: - match = False - self.changefreq = self.changefreq.lower() - for pattern in CHANGEFREQ_PATTERNS: - if self.changefreq == pattern: - match = True - break - if not match: - output.Warn('Changefreq "%s" is not a valid change frequency on URL ' - ': %s' % (self.changefreq, self.loc)) - self.changefreq = None - - # Test the priority - if self.priority: - priority = -1.0 - try: - priority = float(self.priority) - except ValueError: - pass - if (priority < 0.0) or (priority > 1.0): - output.Warn('Priority "%s" is not a number between 0 and 1 inclusive ' - 'on URL: %s' % (self.priority, self.loc)) - self.priority = None - - return True - #end def Validate - - def MakeHash(self): - """ Provides a uniform way of hashing URLs """ - if not self.loc: - return None - if self.loc.endswith('/'): - return md5.new(self.loc[:-1]).digest() - return md5.new(self.loc).digest() - #end def MakeHash - - def Log(self, prefix='URL', level=3): - """ Dump the contents, empty or not, to the log. """ - out = prefix + ':' - - for attribute in self.__slots__: - value = getattr(self, attribute) - if not value: - value = '' - out = out + (' %s=[%s]' % (attribute, value)) - - output.Log('%s' % encoder.NarrowText(out, None), level) - #end def Log - - def WriteXML(self, file): - """ Dump non-empty contents to the output file, in XML format. """ - if not self.loc: - return - out = SITEURL_XML_PREFIX - - for attribute in self.__slots__: - value = getattr(self, attribute) - if value: - if type(value) == types.UnicodeType: - value = encoder.NarrowText(value, None) - elif type(value) != types.StringType: - value = str(value) - value = xml.sax.saxutils.escape(value) - out = out + (' <%s>%s</%s>\n' % (attribute, value, attribute)) - - out = out + SITEURL_XML_SUFFIX - file.write(out) - #end def WriteXML -#end class URL - - -class Filter: - """ - A filter on the stream of URLs we find. A filter is, in essence, - a wildcard applied to the stream. You can think of this as an - operator that returns a tri-state when given a URL: - - True -- this URL is to be included in the sitemap - None -- this URL is undecided - False -- this URL is to be dropped from the sitemap - """ - - def __init__(self, attributes): - self._wildcard = None # Pattern for wildcard match - self._regexp = None # Pattern for regexp match - self._pass = False # "Drop" filter vs. "Pass" filter - - if not ValidateAttributes('FILTER', attributes, - ('pattern', 'type', 'action')): - return - - # Check error count on the way in - num_errors = output.num_errors - - # Fetch the attributes - pattern = attributes.get('pattern') - type = attributes.get('type', 'wildcard') - action = attributes.get('action', 'drop') - if type: - type = type.lower() - if action: - action = action.lower() - - # Verify the attributes - if not pattern: - output.Error('On a filter you must specify a "pattern" to match') - elif (not type) or ((type != 'wildcard') and (type != 'regexp')): - output.Error('On a filter you must specify either \'type="wildcard"\' ' - 'or \'type="regexp"\'') - elif (action != 'pass') and (action != 'drop'): - output.Error('If you specify a filter action, it must be either ' - '\'action="pass"\' or \'action="drop"\'') - - # Set the rule - if action == 'drop': - self._pass = False - elif action == 'pass': - self._pass = True - - if type == 'wildcard': - self._wildcard = pattern - elif type == 'regexp': - try: - self._regexp = re.compile(pattern) - except re.error: - output.Error('Bad regular expression: %s' % pattern) - - # Log the final results iff we didn't add any errors - if num_errors == output.num_errors: - output.Log('Filter: %s any URL that matches %s "%s"' % - (action, type, pattern), 2) - #end def __init__ - - def Apply(self, url): - """ Process the URL, as above. """ - if (not url) or (not url.loc): - return None - - if self._wildcard: - if fnmatch.fnmatchcase(url.loc, self._wildcard): - return self._pass - return None - - if self._regexp: - if self._regexp.search(url.loc): - return self._pass - return None - - assert False # unreachable - #end def Apply -#end class Filter - - -class InputURL: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles a single URL, manually specified in the config file. - """ - - def __init__(self, attributes): - self._url = None # The lonely URL - - if not ValidateAttributes('URL', attributes, - ('href', 'lastmod', 'changefreq', 'priority')): - return - - url = URL() - for attr in attributes.keys(): - if attr == 'href': - url.TrySetAttribute('loc', attributes[attr]) - else: - url.TrySetAttribute(attr, attributes[attr]) - - if not url.loc: - output.Error('Url entries must have an href attribute.') - return - - self._url = url - output.Log('Input: From URL "%s"' % self._url.loc, 2) - #end def __init__ - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - if self._url: - consumer(self._url, True) - #end def ProduceURLs -#end class InputURL - - -class InputURLList: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles a text file with a list of URLs - """ - - def __init__(self, attributes): - self._path = None # The file path - self._encoding = None # Encoding of that file - - if not ValidateAttributes('URLLIST', attributes, ('path', 'encoding')): - return - - self._path = attributes.get('path') - self._encoding = attributes.get('encoding', ENC_UTF8) - if self._path: - self._path = encoder.MaybeNarrowPath(self._path) - if os.path.isfile(self._path): - output.Log('Input: From URLLIST "%s"' % self._path, 2) - else: - output.Error('Can not locate file: %s' % self._path) - self._path = None - else: - output.Error('Urllist entries must have a "path" attribute.') - #end def __init__ - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - - # Open the file - (frame, file) = OpenFileForRead(self._path, 'URLLIST') - if not file: - return - - # Iterate lines - linenum = 0 - for line in file.readlines(): - linenum = linenum + 1 - - # Strip comments and empty lines - if self._encoding: - line = encoder.WidenText(line, self._encoding) - line = line.strip() - if (not line) or line[0] == '#': - continue - - # Split the line on space - url = URL() - cols = line.split(' ') - for i in range(0,len(cols)): - cols[i] = cols[i].strip() - url.TrySetAttribute('loc', cols[0]) - - # Extract attributes from the other columns - for i in range(1,len(cols)): - if cols[i]: - try: - (attr_name, attr_val) = cols[i].split('=', 1) - url.TrySetAttribute(attr_name, attr_val) - except ValueError: - output.Warn('Line %d: Unable to parse attribute: %s' % - (linenum, cols[i])) - - # Pass it on - consumer(url, False) - - file.close() - if frame: - frame.close() - #end def ProduceURLs -#end class InputURLList - - -class InputDirectory: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles a directory that acts as base for walking the filesystem. - """ - - def __init__(self, attributes, base_url): - self._path = None # The directory - self._url = None # The URL equivelant - self._default_file = None - - if not ValidateAttributes('DIRECTORY', attributes, ('path', 'url', - 'default_file')): - return - - # Prep the path -- it MUST end in a sep - path = attributes.get('path') - if not path: - output.Error('Directory entries must have both "path" and "url" ' - 'attributes') - return - path = encoder.MaybeNarrowPath(path) - if not path.endswith(os.sep): - path = path + os.sep - if not os.path.isdir(path): - output.Error('Can not locate directory: %s' % path) - return - - # Prep the URL -- it MUST end in a sep - url = attributes.get('url') - if not url: - output.Error('Directory entries must have both "path" and "url" ' - 'attributes') - return - url = URL.Canonicalize(url) - if not url.endswith('/'): - url = url + '/' - if not url.startswith(base_url): - url = urlparse.urljoin(base_url, url) - if not url.startswith(base_url): - output.Error('The directory URL "%s" is not relative to the ' - 'base_url: %s' % (url, base_url)) - return - - # Prep the default file -- it MUST be just a filename - file = attributes.get('default_file') - if file: - file = encoder.MaybeNarrowPath(file) - if os.sep in file: - output.Error('The default_file "%s" can not include path information.' - % file) - file = None - - self._path = path - self._url = url - self._default_file = file - if file: - output.Log('Input: From DIRECTORY "%s" (%s) with default file "%s"' - % (path, url, file), 2) - else: - output.Log('Input: From DIRECTORY "%s" (%s) with no default file' - % (path, url), 2) - #end def __init__ - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - if not self._path: - return - - root_path = self._path - root_URL = self._url - root_file = self._default_file - - def PerFile(dirpath, name): - """ - Called once per file. - Note that 'name' will occasionally be None -- for a directory itself - """ - # Pull a timestamp - url = URL() - isdir = False - try: - if name: - path = os.path.join(dirpath, name) - else: - path = dirpath - isdir = os.path.isdir(path) - time = None - if isdir and root_file: - file = os.path.join(path, root_file) - try: - time = os.stat(file)[stat.ST_MTIME]; - except OSError: - pass - if not time: - time = os.stat(path)[stat.ST_MTIME]; - url.lastmod = TimestampISO8601(time) - except OSError: - pass - except ValueError: - pass - - # Build a URL - middle = dirpath[len(root_path):] - if os.sep != '/': - middle = middle.replace(os.sep, '/') - if middle: - middle = middle + '/' - if name: - middle = middle + name - if isdir: - middle = middle + '/' - url.TrySetAttribute('loc', root_URL + encoder.WidenText(middle, None)) - - # Suppress default files. (All the way down here so we can log it.) - if name and (root_file == name): - url.Log(prefix='IGNORED (default file)', level=2) - return - - consumer(url, False) - #end def PerFile - - def PerDirectory(ignore, dirpath, namelist): - """ - Called once per directory with a list of all the contained files/dirs. - """ - ignore = ignore # Avoid warnings of an unused parameter - - if not dirpath.startswith(root_path): - output.Warn('Unable to decide what the root path is for directory: ' - '%s' % dirpath) - return - - for name in namelist: - PerFile(dirpath, name) - #end def PerDirectory - - output.Log('Walking DIRECTORY "%s"' % self._path, 1) - PerFile(self._path, None) - os.path.walk(self._path, PerDirectory, None) - #end def ProduceURLs -#end class InputDirectory - - -class InputAccessLog: - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles access logs. It's non-trivial in that we want to - auto-detect log files in the Common Logfile Format (as used by Apache, - for instance) and the Extended Log File Format (as used by IIS, for - instance). - """ - - def __init__(self, attributes): - self._path = None # The file path - self._encoding = None # Encoding of that file - self._is_elf = False # Extended Log File Format? - self._is_clf = False # Common Logfile Format? - self._elf_status = -1 # ELF field: '200' - self._elf_method = -1 # ELF field: 'HEAD' - self._elf_uri = -1 # ELF field: '/foo?bar=1' - self._elf_urifrag1 = -1 # ELF field: '/foo' - self._elf_urifrag2 = -1 # ELF field: 'bar=1' - - if not ValidateAttributes('ACCESSLOG', attributes, ('path', 'encoding')): - return - - self._path = attributes.get('path') - self._encoding = attributes.get('encoding', ENC_UTF8) - if self._path: - self._path = encoder.MaybeNarrowPath(self._path) - if os.path.isfile(self._path): - output.Log('Input: From ACCESSLOG "%s"' % self._path, 2) - else: - output.Error('Can not locate file: %s' % self._path) - self._path = None - else: - output.Error('Accesslog entries must have a "path" attribute.') - #end def __init__ - - def RecognizeELFLine(self, line): - """ Recognize the Fields directive that heads an ELF file """ - if not line.startswith('#Fields:'): - return False - fields = line.split(' ') - del fields[0] - for i in range(0, len(fields)): - field = fields[i].strip() - if field == 'sc-status': - self._elf_status = i - elif field == 'cs-method': - self._elf_method = i - elif field == 'cs-uri': - self._elf_uri = i - elif field == 'cs-uri-stem': - self._elf_urifrag1 = i - elif field == 'cs-uri-query': - self._elf_urifrag2 = i - output.Log('Recognized an Extended Log File Format file.', 2) - return True - #end def RecognizeELFLine - - def GetELFLine(self, line): - """ Fetch the requested URL from an ELF line """ - fields = line.split(' ') - count = len(fields) - - # Verify status was Ok - if self._elf_status >= 0: - if self._elf_status >= count: - return None - if not fields[self._elf_status].strip() == '200': - return None - - # Verify method was HEAD or GET - if self._elf_method >= 0: - if self._elf_method >= count: - return None - if not fields[self._elf_method].strip() in ('HEAD', 'GET'): - return None - - # Pull the full URL if we can - if self._elf_uri >= 0: - if self._elf_uri >= count: - return None - url = fields[self._elf_uri].strip() - if url != '-': - return url - - # Put together a fragmentary URL - if self._elf_urifrag1 >= 0: - if self._elf_urifrag1 >= count or self._elf_urifrag2 >= count: - return None - urlfrag1 = fields[self._elf_urifrag1].strip() - urlfrag2 = None - if self._elf_urifrag2 >= 0: - urlfrag2 = fields[self._elf_urifrag2] - if urlfrag1 and (urlfrag1 != '-'): - if urlfrag2 and (urlfrag2 != '-'): - urlfrag1 = urlfrag1 + '?' + urlfrag2 - return urlfrag1 - - return None - #end def GetELFLine - - def RecognizeCLFLine(self, line): - """ Try to tokenize a logfile line according to CLF pattern and see if - it works. """ - match = ACCESSLOG_CLF_PATTERN.match(line) - recognize = match and (match.group(1) in ('HEAD', 'GET')) - if recognize: - output.Log('Recognized a Common Logfile Format file.', 2) - return recognize - #end def RecognizeCLFLine - - def GetCLFLine(self, line): - """ Fetch the requested URL from a CLF line """ - match = ACCESSLOG_CLF_PATTERN.match(line) - if match: - request = match.group(1) - if request in ('HEAD', 'GET'): - return match.group(2) - return None - #end def GetCLFLine - - def ProduceURLs(self, consumer): - """ Produces URLs from our data source, hands them in to the consumer. """ - - # Open the file - (frame, file) = OpenFileForRead(self._path, 'ACCESSLOG') - if not file: - return - - # Iterate lines - for line in file.readlines(): - if self._encoding: - line = encoder.WidenText(line, self._encoding) - line = line.strip() - - # If we don't know the format yet, try them both - if (not self._is_clf) and (not self._is_elf): - self._is_elf = self.RecognizeELFLine(line) - self._is_clf = self.RecognizeCLFLine(line) - - # Digest the line - match = None - if self._is_elf: - match = self.GetELFLine(line) - elif self._is_clf: - match = self.GetCLFLine(line) - if not match: - continue - - # Pass it on - url = URL() - url.TrySetAttribute('loc', match) - consumer(url, True) - - file.close() - if frame: - frame.close() - #end def ProduceURLs -#end class InputAccessLog - - -class InputSitemap(xml.sax.handler.ContentHandler): - - """ - Each Input class knows how to yield a set of URLs from a data source. - - This one handles Sitemap files and Sitemap index files. For the sake - of simplicity in design (and simplicity in interfacing with the SAX - package), we do not handle these at the same time, recursively. Instead - we read an index file completely and make a list of Sitemap files, then - go back and process each Sitemap. - """ - - class _ContextBase(object): - - """Base class for context handlers in our SAX processing. A context - handler is a class that is responsible for understanding one level of - depth in the XML schema. The class knows what sub-tags are allowed, - and doing any processing specific for the tag we're in. - - This base class is the API filled in by specific context handlers, - all defined below. - """ - - def __init__(self, subtags): - """Initialize with a sequence of the sub-tags that would be valid in - this context.""" - self._allowed_tags = subtags # Sequence of sub-tags we can have - self._last_tag = None # Most recent seen sub-tag - #end def __init__ - - def AcceptTag(self, tag): - """Returns True iff opening a sub-tag is valid in this context.""" - valid = tag in self._allowed_tags - if valid: - self._last_tag = tag - else: - self._last_tag = None - return valid - #end def AcceptTag - - def AcceptText(self, text): - """Returns True iff a blurb of text is valid in this context.""" - return False - #end def AcceptText - - def Open(self): - """The context is opening. Do initialization.""" - pass - #end def Open - - def Close(self): - """The context is closing. Return our result, if any.""" - pass - #end def Close - - def Return(self, result): - """We're returning to this context after handling a sub-tag. This - method is called with the result data from the sub-tag that just - closed. Here in _ContextBase, if we ever see a result it means - the derived child class forgot to override this method.""" - if result: - raise NotImplementedError - #end def Return - #end class _ContextBase - - class _ContextUrlSet(_ContextBase): - - """Context handler for the document node in a Sitemap.""" - - def __init__(self): - InputSitemap._ContextBase.__init__(self, ('url',)) - #end def __init__ - #end class _ContextUrlSet - - class _ContextUrl(_ContextBase): - - """Context handler for a URL node in a Sitemap.""" - - def __init__(self, consumer): - """Initialize this context handler with the callable consumer that - wants our URLs.""" - InputSitemap._ContextBase.__init__(self, URL.__slots__) - self._url = None # The URL object we're building - self._consumer = consumer # Who wants to consume it - #end def __init__ - - def Open(self): - """Initialize the URL.""" - assert not self._url - self._url = URL() - #end def Open - - def Close(self): - """Pass the URL to the consumer and reset it to None.""" - assert self._url - self._consumer(self._url, False) - self._url = None - #end def Close - - def Return(self, result): - """A value context has closed, absorb the data it gave us.""" - assert self._url - if result: - self._url.TrySetAttribute(self._last_tag, result) - #end def Return - #end class _ContextUrl - - class _ContextSitemapIndex(_ContextBase): - - """Context handler for the document node in an index file.""" - - def __init__(self): - InputSitemap._ContextBase.__init__(self, ('sitemap',)) - self._loclist = [] # List of accumulated Sitemap URLs - #end def __init__ - - def Open(self): - """Just a quick verify of state.""" - assert not self._loclist - #end def Open - - def Close(self): - """Return our list of accumulated URLs.""" - if self._loclist: - temp = self._loclist - self._loclist = [] - return temp - #end def Close - - def Return(self, result): - """Getting a new loc URL, add it to the collection.""" - if result: - self._loclist.append(result) - #end def Return - #end class _ContextSitemapIndex - - class _ContextSitemap(_ContextBase): - - """Context handler for a Sitemap entry in an index file.""" - - def __init__(self): - InputSitemap._ContextBase.__init__(self, ('loc', 'lastmod')) - self._loc = None # The URL to the Sitemap - #end def __init__ - - def Open(self): - """Just a quick verify of state.""" - assert not self._loc - #end def Open - - def Close(self): - """Return our URL to our parent.""" - if self._loc: - temp = self._loc - self._loc = None - return temp - output.Warn('In the Sitemap index file, a "sitemap" entry had no "loc".') - #end def Close - - def Return(self, result): - """A value has closed. If it was a 'loc', absorb it.""" - if result and (self._last_tag == 'loc'): - self._loc = result - #end def Return - #end class _ContextSitemap - - class _ContextValue(_ContextBase): - - """Context handler for a single value. We return just the value. The - higher level context has to remember what tag led into us.""" - - def __init__(self): - InputSitemap._ContextBase.__init__(self, ()) - self._text = None - #end def __init__ - - def AcceptText(self, text): - """Allow all text, adding it to our buffer.""" - if self._text: - self._text = self._text + text - else: - self._text = text - return True - #end def AcceptText - - def Open(self): - """Initialize our buffer.""" - self._text = None - #end def Open - - def Close(self): - """Return what's in our buffer.""" - text = self._text - self._text = None - if text: - text = text.strip() - return text - #end def Close - #end class _ContextValue - - def __init__(self, attributes): - """Initialize with a dictionary of attributes from our entry in the - config file.""" - xml.sax.handler.ContentHandler.__init__(self) - self._pathlist = None # A list of files - self._current = -1 # Current context in _contexts - self._contexts = None # The stack of contexts we allow - self._contexts_idx = None # ...contexts for index files - self._contexts_stm = None # ...contexts for Sitemap files - - if not ValidateAttributes('SITEMAP', attributes, ['path']): - return - - # Init the first file path - path = attributes.get('path') - if path: - path = encoder.MaybeNarrowPath(path) - if os.path.isfile(path): - output.Log('Input: From SITEMAP "%s"' % path, 2) - self._pathlist = [path] - else: - output.Error('Can not locate file "%s"' % path) - else: - output.Error('Sitemap entries must have a "path" attribute.') - #end def __init__ - - def ProduceURLs(self, consumer): - """In general: Produces URLs from our data source, hand them to the - callable consumer. - - In specific: Iterate over our list of paths and delegate the actual - processing to helper methods. This is a complexity no other data source - needs to suffer. We are unique in that we can have files that tell us - to bring in other files. - - Note the decision to allow an index file or not is made in this method. - If we call our parser with (self._contexts == None) the parser will - grab whichever context stack can handle the file. IE: index is allowed. - If instead we set (self._contexts = ...) before parsing, the parser - will only use the stack we specify. IE: index not allowed. - """ - # Set up two stacks of contexts - self._contexts_idx = [InputSitemap._ContextSitemapIndex(), - InputSitemap._ContextSitemap(), - InputSitemap._ContextValue()] - - self._contexts_stm = [InputSitemap._ContextUrlSet(), - InputSitemap._ContextUrl(consumer), - InputSitemap._ContextValue()] - - # Process the first file - assert self._pathlist - path = self._pathlist[0] - self._contexts = None # We allow an index file here - self._ProcessFile(path) - - # Iterate over remaining files - self._contexts = self._contexts_stm # No index files allowed - for path in self._pathlist[1:]: - self._ProcessFile(path) - #end def ProduceURLs - - def _ProcessFile(self, path): - """Do per-file reading/parsing/consuming for the file path passed in.""" - assert path - - # Open our file - (frame, file) = OpenFileForRead(path, 'SITEMAP') - if not file: - return - - # Rev up the SAX engine - try: - self._current = -1 - xml.sax.parse(file, self) - except SchemaError: - output.Error('An error in file "%s" made us abort reading the Sitemap.' - % path) - except IOError: - output.Error('Cannot read from file "%s"' % path) - except xml.sax._exceptions.SAXParseException, e: - output.Error('XML error in the file "%s" (line %d, column %d): %s' % - (path, e._linenum, e._colnum, e.getMessage())) - - # Clean up - file.close() - if frame: - frame.close() - #end def _ProcessFile - - def _MungeLocationListIntoFiles(self, urllist): - """Given a list of URLs, munge them into our self._pathlist property. - We do this by assuming all the files live in the same directory as - the first file in the existing pathlist. That is, we assume a - Sitemap index points to Sitemaps only in the same directory. This - is not true in general, but will be true for any output produced - by this script. - """ - assert self._pathlist - path = self._pathlist[0] - path = os.path.normpath(path) - dir = os.path.dirname(path) - wide = False - if type(path) == types.UnicodeType: - wide = True - - for url in urllist: - url = URL.Canonicalize(url) - output.Log('Index points to Sitemap file at: %s' % url, 2) - (scheme, netloc, path, query, frag) = urlparse.urlsplit(url) - file = os.path.basename(path) - file = urllib.unquote(file) - if wide: - file = encoder.WidenText(file) - if dir: - file = dir + os.sep + file - if file: - self._pathlist.append(file) - output.Log('Will attempt to read Sitemap file: %s' % file, 1) - #end def _MungeLocationListIntoFiles - - def startElement(self, tag, attributes): - """SAX processing, called per node in the config stream. - As long as the new tag is legal in our current context, this - becomes an Open call on one context deeper. - """ - # If this is the document node, we may have to look for a context stack - if (self._current < 0) and not self._contexts: - assert self._contexts_idx and self._contexts_stm - if tag == 'urlset': - self._contexts = self._contexts_stm - elif tag == 'sitemapindex': - self._contexts = self._contexts_idx - output.Log('File is a Sitemap index.', 2) - else: - output.Error('The document appears to be neither a Sitemap nor a ' - 'Sitemap index.') - raise SchemaError - - # Display a kinder error on a common mistake - if (self._current < 0) and (self._contexts == self._contexts_stm) and ( - tag == 'sitemapindex'): - output.Error('A Sitemap index can not refer to another Sitemap index.') - raise SchemaError - - # Verify no unexpected attributes - if attributes: - text = '' - for attr in attributes.keys(): - # The document node will probably have namespaces - if self._current < 0: - if attr.find('xmlns') >= 0: - continue - if attr.find('xsi') >= 0: - continue - if text: - text = text + ', ' - text = text + attr - if text: - output.Warn('Did not expect any attributes on any tag, instead tag ' - '"%s" had attributes: %s' % (tag, text)) - - # Switch contexts - if (self._current < 0) or (self._contexts[self._current].AcceptTag(tag)): - self._current = self._current + 1 - assert self._current < len(self._contexts) - self._contexts[self._current].Open() - else: - output.Error('Can not accept tag "%s" where it appears.' % tag) - raise SchemaError - #end def startElement - - def endElement(self, tag): - """SAX processing, called per node in the config stream. - This becomes a call to Close on one context followed by a call - to Return on the previous. - """ - tag = tag # Avoid warning on unused argument - assert self._current >= 0 - retval = self._contexts[self._current].Close() - self._current = self._current - 1 - if self._current >= 0: - self._contexts[self._current].Return(retval) - elif retval and (self._contexts == self._contexts_idx): - self._MungeLocationListIntoFiles(retval) - #end def endElement - - def characters(self, text): - """SAX processing, called when text values are read. Important to - note that one single text value may be split across multiple calls - of this method. - """ - if (self._current < 0) or ( - not self._contexts[self._current].AcceptText(text)): - if text.strip(): - output.Error('Can not accept text "%s" where it appears.' % text) - raise SchemaError - #end def characters -#end class InputSitemap - - -class FilePathGenerator: - """ - This class generates filenames in a series, upon request. - You can request any iteration number at any time, you don't - have to go in order. - - Example of iterations for '/path/foo.xml.gz': - 0 --> /path/foo.xml.gz - 1 --> /path/foo1.xml.gz - 2 --> /path/foo2.xml.gz - _index.xml --> /path/foo_index.xml - """ - - def __init__(self): - self.is_gzip = False # Is this a GZIP file? - - self._path = None # '/path/' - self._prefix = None # 'foo' - self._suffix = None # '.xml.gz' - #end def __init__ - - def Preload(self, path): - """ Splits up a path into forms ready for recombination. """ - path = encoder.MaybeNarrowPath(path) - - # Get down to a base name - path = os.path.normpath(path) - base = os.path.basename(path).lower() - if not base: - output.Error('Couldn\'t parse the file path: %s' % path) - return False - lenbase = len(base) - - # Recognize extension - lensuffix = 0 - compare_suffix = ['.xml', '.xml.gz', '.gz'] - for suffix in compare_suffix: - if base.endswith(suffix): - lensuffix = len(suffix) - break - if not lensuffix: - output.Error('The path "%s" doesn\'t end in a supported file ' - 'extension.' % path) - return False - self.is_gzip = suffix.endswith('.gz') - - # Split the original path - lenpath = len(path) - self._path = path[:lenpath-lenbase] - self._prefix = path[lenpath-lenbase:lenpath-lensuffix] - self._suffix = path[lenpath-lensuffix:] - - return True - #end def Preload - - def GeneratePath(self, instance): - """ Generates the iterations, as described above. """ - prefix = self._path + self._prefix - if type(instance) == types.IntType: - if instance: - return '%s%d%s' % (prefix, instance, self._suffix) - return prefix + self._suffix - return prefix + instance - #end def GeneratePath - - def GenerateURL(self, instance, root_url): - """ Generates iterations, but as a URL instead of a path. """ - prefix = root_url + self._prefix - retval = None - if type(instance) == types.IntType: - if instance: - retval = '%s%d%s' % (prefix, instance, self._suffix) - else: - retval = prefix + self._suffix - else: - retval = prefix + instance - return URL.Canonicalize(retval) - #end def GenerateURL - - def GenerateWildURL(self, root_url): - """ Generates a wildcard that should match all our iterations """ - prefix = URL.Canonicalize(root_url + self._prefix) - temp = URL.Canonicalize(prefix + self._suffix) - suffix = temp[len(prefix):] - return prefix + '*' + suffix - #end def GenerateURL -#end class FilePathGenerator - - -class PerURLStatistics: - """ Keep track of some simple per-URL statistics, like file extension. """ - - def __init__(self): - self._extensions = {} # Count of extension instances - #end def __init__ - - def Consume(self, url): - """ Log some stats for the URL. At the moment, that means extension. """ - if url and url.loc: - (scheme, netloc, path, query, frag) = urlparse.urlsplit(url.loc) - if not path: - return - - # Recognize directories - if path.endswith('/'): - if self._extensions.has_key('/'): - self._extensions['/'] = self._extensions['/'] + 1 - else: - self._extensions['/'] = 1 - return - - # Strip to a filename - i = path.rfind('/') - if i >= 0: - assert i < len(path) - path = path[i:] - - # Find extension - i = path.rfind('.') - if i > 0: - assert i < len(path) - ext = path[i:].lower() - if self._extensions.has_key(ext): - self._extensions[ext] = self._extensions[ext] + 1 - else: - self._extensions[ext] = 1 - else: - if self._extensions.has_key('(no extension)'): - self._extensions['(no extension)'] = self._extensions[ - '(no extension)'] + 1 - else: - self._extensions['(no extension)'] = 1 - #end def Consume - - def Log(self): - """ Dump out stats to the output. """ - if len(self._extensions): - output.Log('Count of file extensions on URLs:', 1) - set = self._extensions.keys() - set.sort() - for ext in set: - output.Log(' %7d %s' % (self._extensions[ext], ext), 1) - #end def Log - -class Sitemap(xml.sax.handler.ContentHandler): - """ - This is the big workhorse class that processes your inputs and spits - out sitemap files. It is built as a SAX handler for set up purposes. - That is, it processes an XML stream to bring itself up. - """ - - def __init__(self, suppress_notify): - xml.sax.handler.ContentHandler.__init__(self) - self._filters = [] # Filter objects - self._inputs = [] ... [truncated message content] |