From: <jh...@us...> - 2011-04-12 01:23:59
|
Revision: 267 http://etch.svn.sourceforge.net/etch/?rev=267&view=rev Author: jheiss Date: 2011-04-12 01:23:53 +0000 (Tue, 12 Apr 2011) Log Message: ----------- Add support for nokogiri as a choice of XML libraries in addition to the existing support for libxml and rexml. Modified Paths: -------------- trunk/server/lib/etch.rb trunk/server/lib/etchserver.rb trunk/test/README Modified: trunk/server/lib/etch.rb =================================================================== --- trunk/server/lib/etch.rb 2011-04-12 01:21:17 UTC (rev 266) +++ trunk/server/lib/etch.rb 2011-04-12 01:23:53 UTC (rev 267) @@ -3,6 +3,19 @@ require 'digest/sha1' # hexdigest require 'base64' # decode64, encode64 require 'fileutils' # mkdir_p +require 'erb' +require 'versiontype' # Version +require 'logger' + +class Etch + def self.xmllib + @@xmllib + end + def self.xmllib=(lib) + @@xmllib=lib + end +end + # By default we try to use libxml, falling back to rexml if it is not # available. The xmllib environment variable can be used to force one library # or the other, mostly for testing purposes. @@ -10,24 +23,24 @@ if !ENV['xmllib'] || ENV['xmllib'] == 'libxml' require 'rubygems' # libxml is a gem require 'libxml' - @@xmllib = :libxml + Etch.xmllib = :libxml + elsif ENV['xmllib'] == 'nokogiri' + require 'rubygems' # nokogiri is a gem + require 'nokogiri' + Etch.xmllib = :nokogiri else raise LoadError end rescue LoadError if !ENV['xmllib'] || ENV['xmllib'] == 'rexml' require 'rexml/document' - @@xmllib = :rexml + Etch.xmllib = :rexml else raise end end -require 'erb' -require 'versiontype' # Version -require 'logger' class Etch - # FIXME: I'm not really proud of this, it seems like there ought to be a way # to just use one logger. The problem is that on the server we'd like to # use RAILS_DEFAULT_LOGGER for general logging (which is logging to @@ -270,8 +283,10 @@ end # Validate the filtered file against config.dtd - if !Etch.xmlvalidate(config_xml, @config_dtd) - raise "Filtered config.xml for #{file} fails validation" + begin + Etch.xmlvalidate(config_xml, @config_dtd) + rescue Exception => e + raise Etch.wrap_exception(e, "Filtered config.xml for #{file} fails validation:\n" + e.message) end generation_status = :unknown @@ -846,8 +861,10 @@ end # Validate the filtered file against commands.dtd - if !Etch.xmlvalidate(commands_xml, @commands_dtd) - raise "Filtered commands.xml for #{command} fails validation" + begin + Etch.xmlvalidate(commands_xml, @commands_dtd) + rescue Exception => e + raise Etch.wrap_exception(e, "Filtered commands.xml for #{command} fails validation:\n" + e.message) end generation_status = :unknown @@ -1076,102 +1093,154 @@ end end + # These methods provide an abstraction from the underlying XML library in + # use, allowing us to use whatever the user has available and switch between + # libraries easily. + def self.xmlnewdoc - case @@xmllib + case Etch.xmllib when :libxml LibXML::XML::Document.new + when :nokogiri + Nokogiri::XML::Document.new when :rexml REXML::Document.new else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlroot(doc) - case @@xmllib + case Etch.xmllib when :libxml doc.root + when :nokogiri + doc.root when :rexml doc.root else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlsetroot(doc, root) - case @@xmllib + case Etch.xmllib when :libxml doc.root = root + when :nokogiri + doc.root = root when :rexml doc << root else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlload(file) - case @@xmllib + case Etch.xmllib when :libxml LibXML::XML::Document.file(file) + when :nokogiri + Nokogiri::XML(File.open(file)) do |config| + # Nokogiri is tolerant of malformed documents by default. Good when + # parsing HTML, but there's no reason for us to tolerate errors. We + # want to ensure that the user's instructions to us are clear. + config.options = Nokogiri::XML::ParseOptions::STRICT + end when :rexml REXML::Document.new(File.open(file)) else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlloaddtd(dtdfile) - case @@xmllib + case Etch.xmllib when :libxml LibXML::XML::Dtd.new(IO.read(dtdfile)) + when :nokogiri + # For some reason there isn't a straightforward way to load a standalone + # DTD in Nokogiri + dtddoctext = '<!DOCTYPE dtd [' + File.read(dtdfile) + ']' + dtddoc = Nokogiri::XML(dtddoctext) + dtddoc.children.first when :rexml nil else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end + # Returns true if validation is successful, or if validation is not + # supported by the XML library in use. Raises an exception if validation + # fails. def self.xmlvalidate(xmldoc, dtd) - case @@xmllib + case Etch.xmllib when :libxml - xmldoc.validate(dtd) + result = xmldoc.validate(dtd) + # LibXML::XML::Document#validate is documented to return false if + # validation fails. However, as currently implemented it raises an + # exception instead. Just in case that behavior ever changes raise an + # exception if a false value is returned. + if result + true + else + raise "Validation failed" + end + when :nokogiri + errors = dtd.validate(xmldoc) + if errors.empty? + true + else + raise errors.join('|') + end when :rexml true else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end - def self.xmlnewelem(name) - case @@xmllib + def self.xmlnewelem(name, doc) + case Etch.xmllib when :libxml LibXML::XML::Node.new(name) + when :nokogiri + Nokogiri::XML::Element.new(name, doc) when :rexml REXML::Element.new(name) else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmleach(xmldoc, xpath, &block) - case @@xmllib + case Etch.xmllib when :libxml xmldoc.find(xpath).each(&block) + when :nokogiri + xmldoc.xpath(xpath).each(&block) when :rexml xmldoc.elements.each(xpath, &block) else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmleachall(xmldoc, &block) - case @@xmllib + case Etch.xmllib when :libxml if xmldoc.kind_of?(LibXML::XML::Document) xmldoc.root.each_element(&block) else xmldoc.each_element(&block) end + when :nokogiri + if xmldoc.kind_of?(Nokogiri::XML::Document) + xmldoc.root.element_children.each(&block) + else + xmldoc.element_children.each(&block) + end when :rexml if xmldoc.node_type == :document xmldoc.root.elements.each(&block) @@ -1179,23 +1248,25 @@ xmldoc.elements.each(&block) end else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmleachattrall(elem, &block) - case @@xmllib + case Etch.xmllib when :libxml elem.attributes.each(&block) + when :nokogiri + elem.attribute_nodes.each(&block) when :rexml elem.attributes.each_attribute(&block) else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlarray(xmldoc, xpath) - case @@xmllib + case Etch.xmllib when :libxml elements = xmldoc.find(xpath) if elements @@ -1203,28 +1274,34 @@ else [] end + when :nokogiri + xmldoc.xpath(xpath).to_a when :rexml xmldoc.elements.to_a(xpath) else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlfindfirst(xmldoc, xpath) - case @@xmllib + case Etch.xmllib when :libxml xmldoc.find_first(xpath) + when :nokogiri + xmldoc.at_xpath(xpath) when :rexml xmldoc.elements[xpath] else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmltext(elem) - case @@xmllib + case Etch.xmllib when :libxml elem.content + when :nokogiri + elem.content when :rexml text = elem.text # REXML returns nil rather than '' if there is no text @@ -1234,57 +1311,67 @@ '' end else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlsettext(elem, text) - case @@xmllib + case Etch.xmllib when :libxml elem.content = text + when :nokogiri + elem.content = text when :rexml elem.text = text else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmladd(xmldoc, xpath, name, contents=nil) - case @@xmllib + case Etch.xmllib when :libxml elem = LibXML::XML::Node.new(name) if contents elem.content = contents end xmldoc.find_first(xpath) << elem - elem + when :nokogiri + elem = Nokogiri::XML::Node.new(name, xmldoc) + if contents + elem.content = contents + end + xmldoc.at_xpath(xpath) << elem when :rexml elem = REXML::Element.new(name) if contents elem.text = contents end xmldoc.elements[xpath].add_element(elem) - elem else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlcopyelem(elem, destelem) - case @@xmllib + case Etch.xmllib when :libxml destelem << elem.copy(true) + when :nokogiri + destelem << elem.dup when :rexml - destelem.add_element(elem.dup) + destelem.add_element(elem.clone) else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlremove(xmldoc, element) - case @@xmllib + case Etch.xmllib when :libxml element.remove! + when :nokogiri + element.remove when :rexml if xmldoc.node_type == :document xmldoc.root.elements.delete(element) @@ -1292,40 +1379,51 @@ xmldoc.elements.delete(element) end else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlremovepath(xmldoc, xpath) - case @@xmllib + case Etch.xmllib when :libxml xmldoc.find(xpath).each { |elem| elem.remove! } + when :nokogiri + xmldoc.xpath(xpath).each { |elem| elem.remove } when :rexml - xmldoc.delete_element(xpath) + elem = nil + # delete_element only removes the first match, so call it in a loop + # until it returns nil to indicate no matching element remain + begin + elem = xmldoc.delete_element(xpath) + end while elem != nil else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlattradd(elem, attrname, attrvalue) - case @@xmllib + case Etch.xmllib when :libxml elem.attributes[attrname] = attrvalue + when :nokogiri + elem[attrname] = attrvalue when :rexml elem.add_attribute(attrname, attrvalue) else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end def self.xmlattrremove(elem, attribute) - case @@xmllib + case Etch.xmllib when :libxml attribute.remove! + when :nokogiri + attribute.remove when :rexml elem.attributes.delete(attribute) else - raise "Unknown @xmllib #{@xmllib}" + raise "Unknown XML library #{Etch.xmllib}" end end Modified: trunk/server/lib/etchserver.rb =================================================================== --- trunk/server/lib/etchserver.rb 2011-04-12 01:21:17 UTC (rev 266) +++ trunk/server/lib/etchserver.rb 2011-04-12 01:23:53 UTC (rev 267) @@ -3,25 +3,6 @@ require 'openssl' require 'time' # Time.parse require 'fileutils' # mkdir_p -# By default we try to use libxml, falling back to rexml if it is not -# available. The xmllib environment variable can be used to force one library -# or the other, mostly for testing purposes. -begin - if !ENV['xmllib'] || ENV['xmllib'] == 'libxml' - require 'rubygems' # libxml is a gem - require 'libxml' - @@xmllib = :libxml - else - raise LoadError - end -rescue LoadError - if !ENV['xmllib'] || ENV['xmllib'] == 'rexml' - require 'rexml/document' - @@xmllib = :rexml - else - raise - end -end require 'logger' require 'etch' @@ -372,11 +353,11 @@ # Generate the XML document to return to the client response_xml = Etch.xmlnewdoc - responseroot = Etch.xmlnewelem('files') + responseroot = Etch.xmlnewelem('files', response_xml) Etch.xmlsetroot(response_xml, responseroot) # Add configs for files we generated if response[:configs] - configs_xml = Etch.xmlnewelem('configs') + configs_xml = Etch.xmlnewelem('configs', response_xml) response[:configs].each do |file, config_xml| # Update the stored record of the config # Exclude configs which correspond to files for which we're requesting @@ -408,18 +389,18 @@ end end if !need_sum.empty? - need_sums_xml = Etch.xmlnewelem('need_sums') + need_sums_xml = Etch.xmlnewelem('need_sums', response_xml) need_sum.each do |need| - need_xml = Etch.xmlnewelem('need_sum') + need_xml = Etch.xmlnewelem('need_sum', response_xml) Etch.xmlsettext(need_xml, need) need_sums_xml << need_xml end responseroot << need_sums_xml end if !need_orig.empty? - need_origs_xml = Etch.xmlnewelem('need_origs') + need_origs_xml = Etch.xmlnewelem('need_origs', response_xml) need_orig.each do |need| - need_xml = Etch.xmlnewelem('need_orig') + need_xml = Etch.xmlnewelem('need_orig', response_xml) Etch.xmlsettext(need_xml, need) need_origs_xml << need_xml end @@ -431,7 +412,7 @@ # "commands", so we have to use something different here as the XML # element we insert all of those into as part of the response. if response[:allcommands] - commands_xml = Etch.xmlnewelem('allcommands') + commands_xml = Etch.xmlnewelem('allcommands', response_xml) response[:allcommands].each do |commandname, command_xml| # Update the stored record of the command config = EtchConfig.find_or_create_by_client_id_and_file(:client_id => @client.id, :file => commandname, :config => command_xml.to_s) @@ -444,19 +425,19 @@ responseroot << commands_xml end if response[:retrycommands] - retrycommands_xml = Etch.xmlnewelem('retrycommands') + retrycommands_xml = Etch.xmlnewelem('retrycommands', response_xml) response[:retrycommands].each_key do |commandname| - retry_xml = Etch.xmlnewelem('retrycommand') + retry_xml = Etch.xmlnewelem('retrycommand', response_xml) Etch.xmlsettext(retry_xml, commandname) retrycommands_xml << retry_xml end responseroot << retrycommands_xml end - # FIXME: clean up XML formatting + # Clean up XML formatting # But only if we're in debug mode, in regular mode nobody but the # machines will see the XML and they don't care if it is pretty. - # Tidy's formatting breaks things, it inserts leading/trailing whitespace into text nodes + # FIXME: Tidy's formatting breaks things, it inserts leading/trailing whitespace into text nodes if @debug && false require 'tidy' Tidy.path = '/sw/lib/libtidy.dylib' Modified: trunk/test/README =================================================================== --- trunk/test/README 2011-04-12 01:21:17 UTC (rev 266) +++ trunk/test/README 2011-04-12 01:23:53 UTC (rev 267) @@ -3,8 +3,8 @@ To execute a specific test method run "rake test TEST=test/file.rb TESTOPTS='--name=test_files'" -To force a particular XML library set xmllib=libxml or xmllib=rexml in -your environment before running the tests. +To force a particular XML library set xmllib=libxml or xmllib=nokogiri or +xmllib=rexml in your environment before running the tests. Some of the older files here have all of their tests in one method. Over time I'm breaking those up into multiple methods so that it is easier to This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |